2 Commits

Author SHA1 Message Date
92e06cb193 Add release workflow for auto-tagging and Docker image builds
Some checks failed
Release / Auto-tag release (push) Failing after 3s
Release / Build & push cowrie (push) Has been skipped
Release / Build & push docker_api (push) Has been skipped
Release / Build & push elasticsearch (push) Has been skipped
Release / Build & push ftp (push) Has been skipped
Release / Build & push http (push) Has been skipped
Release / Build & push imap (push) Has been skipped
Release / Build & push k8s (push) Has been skipped
Release / Build & push ldap (push) Has been skipped
Release / Build & push llmnr (push) Has been skipped
Release / Build & push mongodb (push) Has been skipped
Release / Build & push mqtt (push) Has been skipped
Release / Build & push mssql (push) Has been skipped
Release / Build & push mysql (push) Has been skipped
Release / Build & push pop3 (push) Has been skipped
Release / Build & push postgres (push) Has been skipped
Release / Build & push rdp (push) Has been skipped
Release / Build & push real_ssh (push) Has been skipped
Release / Build & push redis (push) Has been skipped
Release / Build & push sip (push) Has been skipped
Release / Build & push smb (push) Has been skipped
Release / Build & push smtp (push) Has been skipped
Release / Build & push snmp (push) Has been skipped
Release / Build & push tftp (push) Has been skipped
Release / Build & push vnc (push) Has been skipped
2026-04-04 17:16:53 -03:00
7ad7e1e53b main: remove tests and pytest dependency 2026-04-04 16:28:33 -03:00
633 changed files with 5167 additions and 72636 deletions

View File

@@ -1,12 +0,0 @@
# API Options
DECNET_API_HOST=0.0.0.0
DECNET_API_PORT=8000
DECNET_JWT_SECRET=supersecretkey12345678901234567
DECNET_INGEST_LOG_FILE=/var/log/decnet/decnet.log
# Web Dashboard Options
DECNET_WEB_HOST=0.0.0.0
DECNET_WEB_PORT=8080
DECNET_ADMIN_USER=admin
DECNET_ADMIN_PASSWORD=admin
DECNET_DEVELOPER=False

View File

@@ -1,175 +0,0 @@
name: CI
on:
push:
branches: [dev, testing, "temp/merge-*"]
paths-ignore:
- "**/*.md"
- "docs/**"
jobs:
lint:
name: Lint (ruff)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install ruff
- run: ruff check .
bandit:
name: SAST (bandit)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install bandit
- run: bandit -r decnet/ -ll -x decnet/services/registry.py -x decnet/templates/
pip-audit:
name: Dependency audit (pip-audit)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install pip-audit
- run: pip install -e .[dev]
- run: pip-audit --skip-editable --ignore-vuln CVE-2025-65896
test-standard:
name: Test (Standard)
runs-on: ubuntu-latest
needs: [lint, bandit, pip-audit]
strategy:
matrix:
python-version: ["3.11"]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- run: pip install -e .[dev]
- run: pytest
test-live:
name: Test (Live)
runs-on: ubuntu-latest
needs: [test-standard]
services:
mysql:
image: mysql:8.0
env:
MYSQL_ROOT_PASSWORD: root
MYSQL_DATABASE: decnet_test
ports:
- 3307:3306
options: >-
--health-cmd="mysqladmin ping -h 127.0.0.1"
--health-interval=10s
--health-timeout=5s
--health-retries=5
strategy:
matrix:
python-version: ["3.11"]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- run: pip install -e .[dev]
- run: pytest -m live
env:
DECNET_MYSQL_HOST: 127.0.0.1
DECNET_MYSQL_PORT: 3307
DECNET_MYSQL_USER: root
DECNET_MYSQL_PASSWORD: root
DECNET_MYSQL_DATABASE: decnet_test
test-fuzz:
name: Test (Fuzz)
runs-on: ubuntu-latest
needs: [test-live]
strategy:
matrix:
python-version: ["3.11"]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- run: pip install -e .[dev]
- run: pytest -m fuzz
env:
SCHEMATHESIS_CONFIG: schemathesis.ci.toml
merge-to-testing:
name: Merge dev → testing
runs-on: ubuntu-latest
needs: [test-standard, test-live, test-fuzz]
if: github.ref == 'refs/heads/dev'
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.DECNET_PR_TOKEN }}
- name: Configure git
run: |
git config user.name "DECNET CI"
git config user.email "ci@decnet.local"
- name: Merge dev into testing
run: |
git fetch origin testing
git checkout testing
git merge origin/dev --no-ff -m "ci: auto-merge dev → testing [skip ci]"
git push origin testing
prepare-merge-to-main:
name: Prepare Merge to Main
runs-on: ubuntu-latest
needs: [test-standard, test-live, test-fuzz]
if: github.ref == 'refs/heads/testing'
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.DECNET_PR_TOKEN }}
- name: Configure git
run: |
git config user.name "DECNET CI"
git config user.email "ci@decnet.local"
- name: Create temp branch and sync with main
run: |
git fetch origin main
git checkout -b temp/merge-testing-to-main
echo "--- Switched to temp branch, merging main into it ---"
git merge origin/main --no-edit || { echo "CONFLICT: Manual resolution required"; exit 1; }
git push origin temp/merge-testing-to-main --force
finalize-merge-to-main:
name: Finalize Merge to Main
runs-on: ubuntu-latest
needs: [test-standard, test-live, test-fuzz]
if: startsWith(github.ref, 'refs/heads/temp/merge-')
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.DECNET_PR_TOKEN }}
- name: Configure git
run: |
git config user.name "DECNET CI"
git config user.email "ci@decnet.local"
- name: Merge RC into main
run: |
git fetch origin main
git checkout main
git merge ${{ github.ref }} --no-ff -m "ci: auto-merge testing → main"
git push origin main
echo "--- Cleaning up temp branch ---"
git push origin --delete ${{ github.ref_name }}

View File

@@ -1,57 +0,0 @@
name: PR Gate
on:
pull_request:
branches: [main]
paths-ignore:
- "**/*.md"
- "docs/**"
jobs:
lint:
name: Lint (ruff)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install ruff
- run: ruff check .
test:
name: Test (pytest)
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- run: pip install -e .[dev]
- run: pytest tests/ -v --tb=short
bandit:
name: SAST (bandit)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install bandit
- run: bandit -r decnet/ -ll -x decnet/services/registry.py
pip-audit:
name: Dependency audit (pip-audit)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install pip-audit
- run: pip install -e .[dev]
- run: pip-audit --skip-editable

View File

@@ -3,9 +3,6 @@ name: Release
on:
push:
branches: [main]
paths-ignore:
- "**/*.md"
- "docs/**"
env:
REGISTRY: git.resacachile.cl
@@ -22,52 +19,37 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.DECNET_PR_TOKEN }}
- name: Configure git
run: |
git config user.name "DECNET CI"
git config user.email "ci@decnet.local"
- name: Bump version and Tag
- name: Extract version from pyproject.toml
id: version
run: |
# Calculate next version (v0.x)
LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
NEXT_VER=$(python3 -c "
tag = '$LATEST_TAG'.lstrip('v')
parts = tag.split('.')
major = int(parts[0]) if parts[0] else 0
minor = int(parts[1]) if len(parts) > 1 else 0
print(f'{major}.{minor + 1}.0')
")
VERSION=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); d=tomllib.load(f); print(d['project']['version'])")
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Next version: $NEXT_VER (calculated from $LATEST_TAG)"
# Update pyproject.toml
sed -i "s/^version = \".*\"/version = \"$NEXT_VER\"/" pyproject.toml
git add pyproject.toml
git commit -m "chore: auto-release v$NEXT_VER [skip ci]" || echo "No changes to commit"
CHANGELOG=$(git log ${LATEST_TAG}..HEAD --oneline --no-decorate --no-merges)
git tag -a "v$NEXT_VER" -m "Auto-release v$NEXT_VER
Changes since $LATEST_TAG:
$CHANGELOG"
git push origin main --follow-tags
echo "version=$NEXT_VER" >> $GITHUB_OUTPUT
- name: Create tag if not exists
id: tag
run: |
VERSION=${{ steps.version.outputs.version }}
if git rev-parse "v$VERSION" >/dev/null 2>&1; then
echo "Tag v$VERSION already exists, skipping."
echo "created=false" >> $GITHUB_OUTPUT
else
git config user.name "gitea-actions"
git config user.email "actions@git.resacachile.cl"
git tag -a "v$VERSION" -m "Release v$VERSION"
git push origin "v$VERSION"
echo "created=true" >> $GITHUB_OUTPUT
fi
docker:
name: Build, scan & push ${{ matrix.service }}
name: Build & push ${{ matrix.service }}
runs-on: ubuntu-latest
needs: tag
strategy:
fail-fast: false
matrix:
service:
- conpot
- cowrie
- docker_api
- elasticsearch
- ftp
@@ -84,20 +66,16 @@ $CHANGELOG"
- postgres
- rdp
- redis
- real_ssh
- sip
- smb
- smtp
- snmp
- ssh
- telnet
- tftp
- vnc
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Gitea container registry
uses: docker/login-action@v3
with:
@@ -105,26 +83,7 @@ $CHANGELOG"
username: ${{ secrets.REGISTRY_USER }}
password: ${{ secrets.REGISTRY_TOKEN }}
- name: Build image locally
uses: docker/build-push-action@v5
with:
context: templates/${{ matrix.service }}
load: true
push: false
tags: decnet-${{ matrix.service }}:scan
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Install Trivy
run: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
- name: Scan with Trivy
run: |
trivy image --exit-code 1 --severity CRITICAL --ignore-unfixed decnet-${{ matrix.service }}:scan
- name: Push image
if: success()
- name: Build and push
uses: docker/build-push-action@v5
with:
context: templates/${{ matrix.service }}
@@ -132,4 +91,3 @@ $CHANGELOG"
tags: |
${{ env.REGISTRY }}/${{ env.OWNER }}/decnet-${{ matrix.service }}:latest
${{ env.REGISTRY }}/${{ env.OWNER }}/decnet-${{ matrix.service }}:v${{ needs.tag.outputs.version }}
cache-from: type=gha

21
.gitignore vendored
View File

@@ -1,7 +1,4 @@
.venv/
logs/
.claude/*
CLAUDE.md
__pycache__/
*.pyc
*.pyo
@@ -11,20 +8,4 @@ build/
decnet-compose.yml
decnet-state.json
*.ini
decnet.log*
*.loggy
*.nmap
linterfails.log
webmail
windows1
*.db
*.db-shm
*.db-wal
decnet.*.log
decnet.json
.env*
.env.local
.coverage
.hypothesis/
profiles/*
tests/test_decnet.db*
.env

56
CLAUDE.md Normal file
View File

@@ -0,0 +1,56 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Commands
```bash
# Install (dev)
pip install -e .
# List registered service plugins
decnet services
# Dry-run (generates compose, no containers)
decnet deploy --mode unihost --deckies 3 --randomize-services --dry-run
# Full deploy (requires root for MACVLAN)
sudo decnet deploy --mode unihost --deckies 5 --interface eth0 --randomize-services
sudo decnet deploy --mode unihost --deckies 3 --services ssh,smb --log-target 192.168.1.5:5140
# Status / teardown
decnet status
sudo decnet teardown --all
sudo decnet teardown --id decky-01
```
## Project Overview
DECNET is a honeypot/deception network framework. It deploys fake machines (called **deckies**) with realistic services (RDP, SMB, SSH, FTP, etc.) to lure and profile attackers. All attacker interactions are aggregated to an isolated logging network (ELK stack / SIEM).
## Deployment Models
**UNIHOST** — one real host spins up _n_ deckies via a container orchestrator. Simpler, single-machine deployment.
**SWARM (MULTIHOST)**_n_ real hosts each running deckies. Orchestrated via Ansible/sshpass or similar tooling.
## Core Technology Choices
- **Containers**: Docker Compose is the starting point but other orchestration frameworks should be evaluated if they serve the project better. `debian:bookworm-slim` is the default base image; mixing in Ubuntu, CentOS, or other distros is encouraged to make the decoy network look heterogeneous.
- **Networking**: Deckies need to appear as real machines on the LAN (own MACs/IPs). MACVLAN and IPVLAN are candidates; the right driver depends on the host environment. WSL has known limitations — bare metal or a VM is preferred for testing.
- **Log pipeline**: Logstash → ELK stack → SIEM (isolated network, not reachable from decoy network)
## Architecture Constraints
- The decoy network must be reachable from the outside (attacker-facing).
- The logging/aggregation network must be isolated from the decoy network.
- A publicly accessible real server acts as the bridge between the two networks.
- Deckies should differ in exposed services and OS fingerprints to appear as a heterogeneous network.
## Development and testing
- For every new feature, pytests must me made.
- Pytest is the main testing framework in use.
- NEVER pass broken code to the user.
- Broken means: not running, not passing 100% tests, etc.
- After tests pass with 100%, always git commit your changes.

View File

@@ -51,7 +51,3 @@ docker network create -d macvlan \
#### Issues
This initial test doesn't seem to be working. Might be that I'm using WSL, so I downloaded a Ubuntu 22.04 Server ISO. I'll try the MACVLAN network on it. Now, if that doesn't work, I don't see how the 802.1q would work, at least on _my network_. Perhaps if I had a switch I could make it work, but currently I don't have one :c
---
# End of Notes

164
README.md
View File

@@ -69,7 +69,7 @@ From the outside a decky looks identical to a real machine: it has its own MAC a
## Installation
```bash
git clone https://git.resacachile.cl/anti/DECNET
git clone <repo-url> DECNET
cd DECNET
pip install -e .
```
@@ -207,26 +207,6 @@ sudo decnet deploy --deckies 4 --archetype windows-workstation
[corp-workstations]
archetype = windows-workstation
amount = 4
[win-fileserver]
services = ftp
nmap_os = windows
os_version = Windows Server 2019
[dbsrv01]
ip = 192.168.1.112
services = mysql, http
nmap_os = linux
[dbsrv01.http]
server_header = Apache/2.4.54 (Debian)
response_code = 200
fake_app = wordpress
[dbsrv01.mysql]
mysql_version = 5.7.38-log
mysql_banner = MySQL Community Server
```
---
@@ -474,7 +454,7 @@ Key/value pairs are passed directly to the service plugin as persona config. Com
| `mongodb` | `mongo_version` |
| `elasticsearch` | `es_version`, `cluster_name` |
| `ldap` | `base_dn`, `domain` |
| `snmp` | `snmp_community`, `sys_descr`, `snmp_archetype` (picks predefined sysDescr for `water_plant`, `hospital`, etc.) |
| `snmp` | `snmp_community`, `sys_descr` |
| `mqtt` | `mqtt_version` |
| `sip` | `sip_server`, `sip_domain` |
| `k8s` | `k8s_version` |
@@ -490,34 +470,6 @@ See [`test-full.ini`](test-full.ini) — covers all 25 services across 10 role-t
---
## Environment Configuration (.env)
DECNET supports loading configuration from `.env.local` and `.env` files located in the project root. This is useful for securing secrets like the JWT key and configuring default ports without passing flags every time.
An example `.env.example` is provided:
```ini
# API Options
DECNET_API_HOST=0.0.0.0
DECNET_API_PORT=8000
DECNET_JWT_SECRET=supersecretkey12345
DECNET_INGEST_LOG_FILE=/var/log/decnet/decnet.log
# Web Dashboard Options
DECNET_WEB_HOST=0.0.0.0
DECNET_WEB_PORT=8080
DECNET_ADMIN_USER=admin
DECNET_ADMIN_PASSWORD=admin
# Database pool tuning (applies to both SQLite and MySQL)
DECNET_DB_POOL_SIZE=20 # base pool connections (default: 20)
DECNET_DB_MAX_OVERFLOW=40 # extra connections under burst (default: 40)
```
Copy `.env.example` to `.env.local` and modify it to suit your environment.
---
## Logging
All attacker interactions are forwarded off the decoy network to an isolated logging sink. The log pipeline lives on a separate internal Docker bridge (`decnet_logs`) that is not reachable from the fake LAN.
@@ -679,115 +631,3 @@ The test suite covers:
| `test_cli_service_pool.py` | CLI service resolution |
Every new feature requires passing tests before merging.
### Stress Testing
A [Locust](https://locust.io)-based stress test suite lives in `tests/stress/`. It hammers every API endpoint with realistic traffic patterns to find throughput ceilings and latency degradation.
```bash
# Run via pytest (starts its own server)
pytest -m stress tests/stress/ -v -x -n0 -s
# Crank it up
STRESS_USERS=2000 STRESS_SPAWN_RATE=200 STRESS_DURATION=120 pytest -m stress tests/stress/ -v -x -n0 -s
# Standalone Locust web UI against a running server
locust -f tests/stress/locustfile.py --host http://localhost:8000
```
| Env var | Default | Description |
|---|---|---|
| `STRESS_USERS` | `500` | Total simulated users |
| `STRESS_SPAWN_RATE` | `50` | Users spawned per second |
| `STRESS_DURATION` | `60` | Test duration in seconds |
| `STRESS_WORKERS` | CPU count (max 4) | Uvicorn workers for the test server |
| `STRESS_MIN_RPS` | `500` | Minimum RPS to pass baseline test |
| `STRESS_MAX_P99_MS` | `200` | Maximum p99 latency (ms) to pass |
| `STRESS_SPIKE_USERS` | `1000` | Users for thundering herd test |
| `STRESS_SUSTAINED_USERS` | `200` | Users for sustained load test |
#### Measured baseline
Reference numbers from recent Locust runs against a MySQL backend
(asyncmy driver). All runs hold zero failures throughout.
**Single worker** (unless noted):
| Metric | 500u, tracing on | 1500u, tracing on | 1500u, tracing **off** | 1500u, tracing off, **pinned to 1 core** | 1500u, tracing off, **12 workers** |
|---|---|---|---|---|---|
| Requests served | 396,672 | 232,648 | 277,214 | 3,532 | 308,024 |
| Failures | 0 | 0 | 0 | 0 | 0 |
| Throughput (current RPS) | ~960 | ~880 | ~990 | ~46 | ~1,585 |
| Average latency | 465 ms | 1,774 ms | 1,489 ms | 21.7 s | 930 ms |
| Median (p50) | 100 ms | 690 ms | 340 ms | 270 ms | 700 ms |
| p95 | 1.9 s | 6.5 s | 5.7 s | 115 s | 2.7 s |
| p99 | 2.9 s | 9.5 s | 8.4 s | 122 s | 4.2 s |
| Max observed | 8.3 s | 24.4 s | 20.9 s | 124.5 s | 16.5 s |
Ramp is 15 users/s for the 500u column, 40 users/s otherwise.
Takeaways:
- **Tracing off**: at 1500 users, flipping `DECNET_TRACING=false`
halves p50 (690 → 340 ms) and pushes RPS from ~880 past the
500-user figure on a single worker.
- **12 workers**: RPS scales ~1.6× over a single worker (~990 →
~1585). Sublinear because the workload is DB-bound — MySQL and the
connection pool become the new ceiling, not Python. p99 drops from
8.4 s to 4.2 s.
- **Connection math**: `DECNET_DB_POOL_SIZE=20` × `DECNET_DB_MAX_OVERFLOW=40`
× 12 workers = 720 connections at peak. MySQL's default
`max_connections=151` needs bumping (we used 2000) before running
multi-worker load.
- **Single-core pinning**: ~46 RPS with p95 near two minutes. Interesting
as a "physics floor" datapoint — not a production config.
Top endpoints by volume: `/api/v1/attackers`, `/api/v1/deckies`,
`/api/v1/bounty`, `/api/v1/logs/histogram`, `/api/v1/config`,
`/api/v1/health`, `/api/v1/auth/login`, `/api/v1/logs`.
Notes on tuning:
- **Python 3.14 is currently a no-go for the API server.** Under heavy
concurrent async load the reworked 3.14 GC segfaults inside
`mark_all_reachable` (observed in `_PyGC_Collect` during pending-GC
on 3.14.3). Stick to Python 3.113.13 until upstream stabilises.
- Router-level TTL caches on hot count/stats endpoints (`/stats`,
`/logs` count, `/attackers` count, `/bounty`, `/logs/histogram`,
`/deckies`, `/config`) collapse concurrent duplicate work onto a
single DB hit per window — essential to reach this RPS on one worker.
- Turning off request tracing (`DECNET_TRACING=false`) is the next
free headroom: tracing was still on during the run above.
- On SQLite, `DECNET_DB_POOL_PRE_PING=false` skips the per-checkout
`SELECT 1`. On MySQL, keep it `true` — network disconnects are real.
#### System tuning: open file limit
Under heavy load (500+ concurrent users), the server will exhaust the default Linux open file limit (`ulimit -n`), causing `OSError: [Errno 24] Too many open files`. Most distros default to **1024**, which is far too low for stress testing or production use.
**Before running stress tests:**
```bash
# Check current limit
ulimit -n
# Bump for this shell session
ulimit -n 65536
```
**Permanent fix** — add to `/etc/security/limits.conf`:
```
* soft nofile 65536
* hard nofile 65536
```
Or for systemd-managed services, add `LimitNOFILE=65536` to the unit file.
> This applies to production deployments too — any server handling hundreds of concurrent connections needs a raised file descriptor limit.
# AI Disclosure
This project has been made with lots, and I mean lots of help from AIs. While most of the design was made by me, most of the coding was done by AI models.
Nevertheless, this project will be kept under high scrutiny by humans.

View File

@@ -1,64 +0,0 @@
; /etc/decnet/decnet.ini — DECNET host configuration
;
; Copy to /etc/decnet/decnet.ini and edit. Values here seed os.environ at
; CLI startup via setdefault() — real env vars still win, so you can
; override any value on the shell without editing this file.
;
; A missing file is fine; every daemon has sensible defaults. The main
; reason to use this file is to skip typing the same flags on every
; `decnet` invocation and to pin a host's role via `mode`.
[decnet]
; mode = agent | master
; agent — worker host (runs `decnet agent`, `decnet forwarder`, `decnet updater`).
; Master-only commands (api, swarmctl, swarm, deploy, teardown, ...)
; are hidden from `decnet --help` and refuse to run.
; master — central server (runs `decnet api`, `decnet web`, `decnet swarmctl`,
; `decnet listener`). All commands visible.
mode = agent
; disallow-master = true (default when mode=agent)
; Set to false for hybrid dev hosts that legitimately run both roles.
disallow-master = true
; log-directory — root for DECNET's per-component logs. Systemd units set
; DECNET_SYSTEM_LOGS=<log-directory>/decnet.<component>.log so agent, forwarder,
; and engine each get their own file. The forwarder tails decnet.log.
log-directory = /var/log/decnet
; ─── Agent-only settings (read when mode=agent) ───────────────────────────
[agent]
; Where the master's syslog-TLS listener lives. DECNET_SWARM_MASTER_HOST.
master-host = 192.168.1.50
; Master listener port (RFC 5425 default 6514). DECNET_SWARM_SYSLOG_PORT.
swarm-syslog-port = 6514
; Bind address/port for this worker's agent API (mTLS).
agent-port = 8765
; Cert bundle dir — must contain ca.crt, worker.crt, worker.key from enroll.
; DECNET_AGENT_DIR — honored by the forwarder child as well.
agent-dir = /home/anti/.decnet/agent
; Updater cert bundle (required for `decnet updater`).
updater-dir = /home/anti/.decnet/updater
; ─── Master-only settings (read when mode=master) ─────────────────────────
[master]
; Main API (REST for the React dashboard). DECNET_API_HOST / _PORT.
api-host = 0.0.0.0
api-port = 8000
; React dev-server dashboard (`decnet web`). DECNET_WEB_HOST / _PORT.
web-host = 0.0.0.0
web-port = 8080
; Swarm controller (master-internal). DECNET_SWARMCTL_HOST isn't exposed
; under that name today — this block is the forward-compatible spelling.
; swarmctl-host = 127.0.0.1
; swarmctl-port = 8770
; Syslog-over-TLS listener bind address and port. DECNET_LISTENER_HOST and
; DECNET_SWARM_SYSLOG_PORT. The listener is auto-spawned by `decnet swarmctl`.
listener-host = 0.0.0.0
swarm-syslog-port = 6514
; Master CA dir (for enroll / swarm cert issuance).
; ca-dir = /home/anti/.decnet/ca
; JWT secret for the web API. MUST be set; 32+ bytes. Keep out of git.
; jwt-secret = REPLACE_ME_WITH_A_32_BYTE_SECRET

View File

@@ -1,12 +0,0 @@
"""DECNET — honeypot deception-network framework.
This __init__ runs once, on the first `import decnet.*`. It seeds
os.environ from /etc/decnet/decnet.ini (if present) so that later
module-level reads in decnet.env pick up the INI values as if they had
been exported by the shell. Real env vars always win via setdefault().
Kept minimal on purpose — any heavier work belongs in a submodule.
"""
from decnet.config_ini import load_ini_config as _load_ini_config
_load_ini_config()

View File

@@ -1,7 +0,0 @@
"""DECNET worker agent — runs on every SWARM worker host.
Exposes an mTLS-protected FastAPI service the master's SWARM controller
calls to deploy, mutate, and tear down deckies locally. The agent reuses
the existing `decnet.engine.deployer` code path unchanged, so a worker runs
deckies the same way `decnet deploy --mode unihost` does today.
"""

View File

@@ -1,144 +0,0 @@
"""Worker-side FastAPI app.
Protected by mTLS at the ASGI/uvicorn transport layer: uvicorn is started
with ``--ssl-ca-certs`` + ``--ssl-cert-reqs 2`` (CERT_REQUIRED), so any
client that cannot prove a cert signed by the DECNET CA is rejected before
reaching a handler. Once past the TLS handshake, all peers are trusted
equally (the only entity holding a CA-signed cert is the master
controller).
Endpoints mirror the existing unihost CLI verbs:
* ``POST /deploy`` — body: serialized ``DecnetConfig``
* ``POST /teardown`` — body: optional ``{"decky_id": "..."}``
* ``POST /mutate`` — body: ``{"decky_id": "...", "services": [...]}``
* ``GET /status`` — deployment snapshot
* ``GET /health`` — liveness probe, does NOT require mTLS? No — mTLS
still required; master pings it with its cert.
"""
from __future__ import annotations
from contextlib import asynccontextmanager
from typing import Optional
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from decnet.agent import executor as _exec
from decnet.agent import heartbeat as _heartbeat
from decnet.config import DecnetConfig
from decnet.logging import get_logger
log = get_logger("agent.app")
@asynccontextmanager
async def _lifespan(app: FastAPI):
# Best-effort: if identity/bundle plumbing isn't configured (e.g. dev
# runs or non-enrolled hosts), heartbeat.start() is a silent no-op.
_heartbeat.start()
try:
yield
finally:
await _heartbeat.stop()
app = FastAPI(
title="DECNET SWARM Agent",
version="0.1.0",
docs_url=None, # no interactive docs on worker — narrow attack surface
redoc_url=None,
openapi_url=None,
lifespan=_lifespan,
responses={
400: {"description": "Malformed request body"},
500: {"description": "Executor error"},
},
)
# ------------------------------------------------------------------ schemas
class DeployRequest(BaseModel):
config: DecnetConfig = Field(..., description="Full DecnetConfig to materialise on this worker")
dry_run: bool = False
no_cache: bool = False
class TeardownRequest(BaseModel):
decky_id: Optional[str] = None
class MutateRequest(BaseModel):
decky_id: str
services: list[str]
# ------------------------------------------------------------------ routes
@app.get("/health")
async def health() -> dict[str, str]:
return {"status": "ok"}
@app.get("/status")
async def status() -> dict:
return await _exec.status()
@app.post(
"/deploy",
responses={500: {"description": "Deployer raised an exception materialising the config"}},
)
async def deploy(req: DeployRequest) -> dict:
try:
await _exec.deploy(req.config, dry_run=req.dry_run, no_cache=req.no_cache)
except Exception as exc:
log.exception("agent.deploy failed")
raise HTTPException(status_code=500, detail=str(exc)) from exc
return {"status": "deployed", "deckies": len(req.config.deckies)}
@app.post(
"/teardown",
responses={500: {"description": "Teardown raised an exception"}},
)
async def teardown(req: TeardownRequest) -> dict:
try:
await _exec.teardown(req.decky_id)
except Exception as exc:
log.exception("agent.teardown failed")
raise HTTPException(status_code=500, detail=str(exc)) from exc
return {"status": "torn_down", "decky_id": req.decky_id}
@app.post(
"/self-destruct",
responses={500: {"description": "Reaper could not be scheduled"}},
)
async def self_destruct() -> dict:
"""Stop all DECNET services on this worker and delete the install
footprint. Called by the master during decommission. Logs under
/var/log/decnet* are preserved. Fire-and-forget — returns 202 before
the reaper starts deleting files."""
try:
await _exec.self_destruct()
except Exception as exc:
log.exception("agent.self_destruct failed")
raise HTTPException(status_code=500, detail=str(exc)) from exc
return {"status": "self_destruct_scheduled"}
@app.post(
"/mutate",
responses={501: {"description": "Worker-side mutate not yet implemented"}},
)
async def mutate(req: MutateRequest) -> dict:
# TODO: implement worker-side mutate. Currently the master performs
# mutation by re-sending a full /deploy with the updated DecnetConfig;
# this avoids duplicating mutation logic on the worker for v1. When
# ready, replace the 501 with a real redeploy-of-a-single-decky path.
raise HTTPException(
status_code=501,
detail="Per-decky mutate is performed via /deploy with updated services",
)

View File

@@ -1,223 +0,0 @@
"""Thin adapter between the agent's HTTP endpoints and the existing
``decnet.engine.deployer`` code path.
Kept deliberately small: the agent does not re-implement deployment logic,
it only translates a master RPC into the same function calls the unihost
CLI already uses. Everything runs in a worker thread (the deployer is
blocking) so the FastAPI event loop stays responsive.
"""
from __future__ import annotations
import asyncio
from ipaddress import IPv4Network
from typing import Any
from decnet.engine import deployer as _deployer
from decnet.config import DecnetConfig, load_state, clear_state
from decnet.logging import get_logger
from decnet.network import (
allocate_ips,
detect_interface,
detect_subnet,
get_host_ip,
)
log = get_logger("agent.executor")
def _relocalize(config: DecnetConfig) -> DecnetConfig:
"""Rewrite a master-built config to the worker's local network reality.
The master populates ``interface``/``subnet``/``gateway`` from its own
box before dispatching, which blows up the deployer on any worker whose
NIC name differs (common in heterogeneous fleets — master on ``wlp6s0``,
worker on ``enp0s3``). We always re-detect locally; if the worker sits
on a different subnet than the master, decky IPs are re-allocated from
the worker's subnet so they're actually reachable.
"""
local_iface = detect_interface()
local_subnet, local_gateway = detect_subnet(local_iface)
local_host_ip = get_host_ip(local_iface)
updates: dict[str, Any] = {
"interface": local_iface,
"subnet": local_subnet,
"gateway": local_gateway,
}
master_net = IPv4Network(config.subnet, strict=False) if config.subnet else None
local_net = IPv4Network(local_subnet, strict=False)
if master_net is None or master_net != local_net:
log.info(
"agent.deploy subnet mismatch master=%s local=%s — re-allocating decky IPs",
config.subnet, local_subnet,
)
fresh_ips = allocate_ips(
subnet=local_subnet,
gateway=local_gateway,
host_ip=local_host_ip,
count=len(config.deckies),
)
new_deckies = [d.model_copy(update={"ip": ip}) for d, ip in zip(config.deckies, fresh_ips)]
updates["deckies"] = new_deckies
return config.model_copy(update=updates)
async def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False) -> None:
"""Run the blocking deployer off-loop. The deployer itself calls
save_state() internally once the compose file is materialised."""
log.info(
"agent.deploy mode=%s deckies=%d interface=%s (incoming)",
config.mode, len(config.deckies), config.interface,
)
if config.mode == "swarm":
config = _relocalize(config)
log.info(
"agent.deploy relocalized interface=%s subnet=%s gateway=%s",
config.interface, config.subnet, config.gateway,
)
await asyncio.to_thread(_deployer.deploy, config, dry_run, no_cache, False)
async def teardown(decky_id: str | None = None) -> None:
log.info("agent.teardown decky_id=%s", decky_id)
await asyncio.to_thread(_deployer.teardown, decky_id)
if decky_id is None:
await asyncio.to_thread(clear_state)
def _decky_runtime_states(config: DecnetConfig) -> dict[str, dict[str, Any]]:
"""Map decky_name → {"running": bool, "services": {svc: container_state}}.
Queried so the master can tell, after a partial-failure deploy, which
deckies actually came up instead of tainting the whole shard as failed.
Best-effort: a docker error returns an empty map, not an exception.
"""
try:
import docker # local import — agent-only path
client = docker.from_env()
live = {c.name: c.status for c in client.containers.list(all=True, ignore_removed=True)}
except Exception: # pragma: no cover — defensive
log.exception("_decky_runtime_states: docker query failed")
return {}
out: dict[str, dict[str, Any]] = {}
for d in config.deckies:
svc_states = {
svc: live.get(f"{d.name}-{svc.replace('_', '-')}", "absent")
for svc in d.services
}
out[d.name] = {
"running": bool(svc_states) and all(s == "running" for s in svc_states.values()),
"services": svc_states,
}
return out
_REAPER_SCRIPT = r"""#!/bin/bash
# DECNET agent self-destruct reaper.
# Runs detached from the agent process so it survives the agent's death.
# Waits briefly for the HTTP response to drain, then stops services,
# wipes install paths, and preserves logs.
set +e
sleep 3
# Stop decky containers started by the local deployer (best-effort).
if command -v docker >/dev/null 2>&1; then
docker ps -q --filter "label=com.docker.compose.project=decnet" | xargs -r docker stop
docker ps -aq --filter "label=com.docker.compose.project=decnet" | xargs -r docker rm -f
docker network rm decnet_lan 2>/dev/null
fi
# Stop+disable every systemd unit the installer may have dropped.
for unit in decnet-agent decnet-engine decnet-collector decnet-forwarder decnet-prober decnet-sniffer decnet-updater; do
systemctl stop "$unit" 2>/dev/null
systemctl disable "$unit" 2>/dev/null
done
# Nuke install paths. Logs under /var/log/decnet* are intentionally
# preserved — the operator typically wants them for forensic review.
rm -rf /opt/decnet* /var/lib/decnet/* /usr/local/bin/decnet* /etc/decnet
rm -f /etc/systemd/system/decnet-*.service /etc/systemd/system/decnet-*.timer
systemctl daemon-reload 2>/dev/null
rm -f "$0"
"""
async def self_destruct() -> None:
"""Tear down deckies, then spawn a detached reaper that wipes the
install footprint. Returns immediately so the HTTP response can drain
before the reaper starts deleting files out from under the agent."""
import os
import shutil
import subprocess # nosec B404
import tempfile
# Best-effort teardown first — the reaper also runs docker stop, but
# going through the deployer gives the host-macvlan/ipvlan helper a
# chance to clean up routes cleanly.
try:
await asyncio.to_thread(_deployer.teardown, None)
await asyncio.to_thread(clear_state)
except Exception:
log.exception("self_destruct: pre-reap teardown failed — reaper will force-stop containers")
# Reaper lives under /tmp so it survives rm -rf /opt/decnet*.
fd, path = tempfile.mkstemp(prefix="decnet-reaper-", suffix=".sh", dir="/tmp") # nosec B108 — reaper must outlive /opt/decnet removal
try:
os.write(fd, _REAPER_SCRIPT.encode())
finally:
os.close(fd)
os.chmod(path, 0o700) # nosec B103 — root-owned reaper, needs exec
# The reaper MUST run outside decnet-agent.service's cgroup — otherwise
# `systemctl stop decnet-agent` SIGTERMs the whole cgroup (reaper included)
# before rm -rf completes. `start_new_session=True` gets us a fresh POSIX
# session but does NOT escape the systemd cgroup. So we prefer
# `systemd-run --scope` (launches the command in a transient scope
# detached from the caller's service), falling back to a bare Popen if
# systemd-run is unavailable (non-systemd host / container).
systemd_run = shutil.which("systemd-run")
if systemd_run:
argv = [
systemd_run,
"--collect",
"--unit", f"decnet-reaper-{os.getpid()}",
"--description", "DECNET agent self-destruct reaper",
"/bin/bash", path,
]
spawn_kwargs = {"start_new_session": True}
else:
argv = ["/bin/bash", path]
spawn_kwargs = {"start_new_session": True}
subprocess.Popen( # nosec B603
argv,
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
close_fds=True,
**spawn_kwargs,
)
log.warning(
"self_destruct: reaper spawned path=%s via=%s — agent will die in ~3s",
path, "systemd-run" if systemd_run else "popen",
)
async def status() -> dict[str, Any]:
state = await asyncio.to_thread(load_state)
if state is None:
return {"deployed": False, "deckies": []}
config, _compose_path = state
runtime = await asyncio.to_thread(_decky_runtime_states, config)
return {
"deployed": True,
"mode": config.mode,
"compose_path": str(_compose_path),
"deckies": [d.model_dump() for d in config.deckies],
"runtime": runtime,
}

View File

@@ -1,134 +0,0 @@
"""Agent → master liveness heartbeat loop.
Every ``INTERVAL_S`` seconds the worker posts ``executor.status()`` to
``POST <master>/swarm/heartbeat`` over mTLS. The master pins the
presented client cert's SHA-256 against the ``SwarmHost`` row for the
claimed ``host_uuid``; a match refreshes ``last_heartbeat`` + each
``DeckyShard``'s snapshot + runtime state.
Identity comes from ``/etc/decnet/decnet.ini`` (seeded by the enroll
bundle) — specifically ``DECNET_HOST_UUID`` and ``DECNET_MASTER_HOST``.
The worker's existing ``~/.decnet/agent/`` bundle (or
``/etc/decnet/agent/``) provides the mTLS client cert.
Started/stopped via the agent FastAPI app's lifespan. If identity
plumbing is missing (pre-enrollment dev runs) the loop logs at DEBUG and
declines to start — callers don't have to guard it.
"""
from __future__ import annotations
import asyncio
import pathlib
from typing import Optional
import httpx
from decnet.agent import executor as _exec
from decnet.logging import get_logger
from decnet.swarm import pki
from decnet.swarm.log_forwarder import build_worker_ssl_context
log = get_logger("agent.heartbeat")
INTERVAL_S = 30.0
_TIMEOUT = httpx.Timeout(connect=5.0, read=10.0, write=5.0, pool=5.0)
_task: Optional[asyncio.Task] = None
def _resolve_agent_dir() -> pathlib.Path:
"""Match the agent-dir resolution order used by the agent server:
DECNET_AGENT_DIR env, else /etc/decnet/agent (production install),
else ~/.decnet/agent (dev)."""
import os
env = os.environ.get("DECNET_AGENT_DIR")
if env:
return pathlib.Path(env)
system = pathlib.Path("/etc/decnet/agent")
if system.exists():
return system
return pki.DEFAULT_AGENT_DIR
async def _tick(client: httpx.AsyncClient, url: str, host_uuid: str, agent_version: str) -> None:
snap = await _exec.status()
resp = await client.post(
url,
json={
"host_uuid": host_uuid,
"agent_version": agent_version,
"status": snap,
},
)
# 403 / 404 are terminal-ish — we still keep looping because an
# operator may re-enrol the host mid-session, but we log loudly so
# prod ops can spot cert-pinning drift.
if resp.status_code == 204:
return
log.warning(
"heartbeat rejected status=%d body=%s",
resp.status_code, resp.text[:200],
)
async def _loop(url: str, host_uuid: str, agent_version: str, ssl_ctx) -> None:
log.info("heartbeat loop starting url=%s host_uuid=%s interval=%ss",
url, host_uuid, INTERVAL_S)
async with httpx.AsyncClient(verify=ssl_ctx, timeout=_TIMEOUT) as client:
while True:
try:
await _tick(client, url, host_uuid, agent_version)
except asyncio.CancelledError:
raise
except Exception:
log.exception("heartbeat tick failed — will retry in %ss", INTERVAL_S)
await asyncio.sleep(INTERVAL_S)
def start() -> Optional[asyncio.Task]:
"""Kick off the background heartbeat task. No-op if identity is
unconfigured (dev mode) — the caller doesn't need to check."""
global _task
from decnet.env import (
DECNET_HOST_UUID,
DECNET_MASTER_HOST,
DECNET_SWARMCTL_PORT,
)
if _task is not None and not _task.done():
return _task
if not DECNET_HOST_UUID or not DECNET_MASTER_HOST:
log.debug("heartbeat not starting — DECNET_HOST_UUID or DECNET_MASTER_HOST unset")
return None
agent_dir = _resolve_agent_dir()
try:
ssl_ctx = build_worker_ssl_context(agent_dir)
except Exception:
log.exception("heartbeat not starting — worker SSL context unavailable at %s", agent_dir)
return None
try:
from decnet import __version__ as _v
agent_version = _v
except Exception:
agent_version = "unknown"
url = f"https://{DECNET_MASTER_HOST}:{DECNET_SWARMCTL_PORT}/swarm/heartbeat"
_task = asyncio.create_task(
_loop(url, DECNET_HOST_UUID, agent_version, ssl_ctx),
name="agent-heartbeat",
)
return _task
async def stop() -> None:
global _task
if _task is None:
return
_task.cancel()
try:
await _task
except (asyncio.CancelledError, Exception):
pass
_task = None

View File

@@ -1,70 +0,0 @@
"""Worker-agent uvicorn launcher.
Starts ``decnet.agent.app:app`` over HTTPS with mTLS enforcement. The
worker must already have a bundle in ``~/.decnet/agent/`` (delivered by
``decnet swarm enroll`` from the master); if it does not, we refuse to
start — unauthenticated agents are not a supported mode.
"""
from __future__ import annotations
import os
import pathlib
import signal
import subprocess # nosec B404
import sys
from decnet.logging import get_logger
from decnet.swarm import pki
log = get_logger("agent.server")
def run(host: str, port: int, agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR) -> int:
bundle = pki.load_worker_bundle(agent_dir)
if bundle is None:
print(
f"[agent] No cert bundle at {agent_dir}. "
f"Run `decnet swarm enroll` from the master first.",
file=sys.stderr,
)
return 2
keyfile = agent_dir / "worker.key"
certfile = agent_dir / "worker.crt"
cafile = agent_dir / "ca.crt"
cmd = [
sys.executable,
"-m",
"uvicorn",
"decnet.agent.app:app",
"--host",
host,
"--port",
str(port),
"--ssl-keyfile",
str(keyfile),
"--ssl-certfile",
str(certfile),
"--ssl-ca-certs",
str(cafile),
# 2 == ssl.CERT_REQUIRED — clients MUST present a CA-signed cert.
"--ssl-cert-reqs",
"2",
]
log.info("agent starting host=%s port=%d bundle=%s", host, port, agent_dir)
# Own process group for clean Ctrl+C / SIGTERM propagation to uvicorn
# workers (same pattern as `decnet api`).
proc = subprocess.Popen(cmd, start_new_session=True) # nosec B603
try:
return proc.wait()
except KeyboardInterrupt:
try:
os.killpg(proc.pid, signal.SIGTERM)
try:
return proc.wait(timeout=10)
except subprocess.TimeoutExpired:
os.killpg(proc.pid, signal.SIGKILL)
return proc.wait()
except ProcessLookupError:
return 0

View File

@@ -148,7 +148,7 @@ ARCHETYPES: dict[str, Archetype] = {
slug="deaddeck",
display_name="Deaddeck (Entry Point)",
description="Internet-facing entry point with real interactive SSH — no honeypot emulation",
services=["ssh"],
services=["real_ssh"],
preferred_distros=["debian", "ubuntu22"],
nmap_os="linux",
),
@@ -167,4 +167,4 @@ def all_archetypes() -> dict[str, Archetype]:
def random_archetype() -> Archetype:
return random.choice(list(ARCHETYPES.values())) # nosec B311
return random.choice(list(ARCHETYPES.values()))

461
decnet/cli.py Normal file
View File

@@ -0,0 +1,461 @@
"""
DECNET CLI — entry point for all commands.
Usage:
decnet deploy --mode unihost --deckies 5 --randomize-services
decnet status
decnet teardown [--all | --id decky-01]
decnet services
"""
import random
from typing import Optional
import typer
from rich.console import Console
from rich.table import Table
from decnet.archetypes import Archetype, all_archetypes, get_archetype
from decnet.config import (
DeckyConfig,
DecnetConfig,
random_hostname,
)
from decnet.distros import all_distros, get_distro, random_distro
from decnet.ini_loader import IniConfig, load_ini
from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
from decnet.services.registry import all_services
app = typer.Typer(
name="decnet",
help="Deploy a deception network of honeypot deckies on your LAN.",
no_args_is_help=True,
)
console = Console()
def _all_service_names() -> list[str]:
"""Return all registered service names from the live plugin registry."""
return sorted(all_services().keys())
def _resolve_distros(
distros_explicit: list[str] | None,
randomize_distros: bool,
n: int,
archetype: Archetype | None = None,
) -> list[str]:
"""Return a list of n distro slugs based on CLI flags or archetype preference."""
if distros_explicit:
return [distros_explicit[i % len(distros_explicit)] for i in range(n)]
if randomize_distros:
return [random_distro().slug for _ in range(n)]
if archetype:
pool = archetype.preferred_distros
return [pool[i % len(pool)] for i in range(n)]
# Default: cycle through all distros to maximize heterogeneity
slugs = list(all_distros().keys())
return [slugs[i % len(slugs)] for i in range(n)]
def _build_deckies(
n: int,
ips: list[str],
services_explicit: list[str] | None,
randomize_services: bool,
distros_explicit: list[str] | None = None,
randomize_distros: bool = False,
archetype: Archetype | None = None,
) -> list[DeckyConfig]:
deckies = []
used_combos: set[frozenset] = set()
distro_slugs = _resolve_distros(distros_explicit, randomize_distros, n, archetype)
for i, ip in enumerate(ips):
name = f"decky-{i + 1:02d}"
distro = get_distro(distro_slugs[i])
hostname = random_hostname(distro.slug)
if services_explicit:
svc_list = services_explicit
elif archetype:
svc_list = list(archetype.services)
elif randomize_services:
svc_pool = _all_service_names()
attempts = 0
while True:
count = random.randint(1, min(3, len(svc_pool)))
chosen = frozenset(random.sample(svc_pool, count))
attempts += 1
if chosen not in used_combos or attempts > 20:
break
svc_list = list(chosen)
used_combos.add(chosen)
else:
typer.echo("Error: provide --services, --archetype, or --randomize-services.", err=True)
raise typer.Exit(1)
deckies.append(
DeckyConfig(
name=name,
ip=ip,
services=svc_list,
distro=distro.slug,
base_image=distro.image,
build_base=distro.build_base,
hostname=hostname,
archetype=archetype.slug if archetype else None,
nmap_os=archetype.nmap_os if archetype else "linux",
)
)
return deckies
def _build_deckies_from_ini(
ini: IniConfig,
subnet_cidr: str,
gateway: str,
host_ip: str,
randomize: bool,
) -> list[DeckyConfig]:
"""Build DeckyConfig list from an IniConfig, auto-allocating missing IPs."""
from ipaddress import IPv4Address, IPv4Network
explicit_ips: set[IPv4Address] = {
IPv4Address(s.ip) for s in ini.deckies if s.ip
}
net = IPv4Network(subnet_cidr, strict=False)
reserved = {
net.network_address,
net.broadcast_address,
IPv4Address(gateway),
IPv4Address(host_ip),
} | explicit_ips
auto_pool = (str(addr) for addr in net.hosts() if addr not in reserved)
deckies: list[DeckyConfig] = []
for spec in ini.deckies:
# Resolve archetype (if any) — explicit services/distro override it
arch: Archetype | None = None
if spec.archetype:
try:
arch = get_archetype(spec.archetype)
except ValueError as e:
console.print(f"[red]{e}[/]")
raise typer.Exit(1)
# Distro: archetype preferred list → random → global cycle
distro_pool = arch.preferred_distros if arch else list(all_distros().keys())
distro = get_distro(distro_pool[len(deckies) % len(distro_pool)])
hostname = random_hostname(distro.slug)
ip = spec.ip or next(auto_pool, None)
if ip is None:
raise RuntimeError(
f"Not enough free IPs in {subnet_cidr} while assigning IP for '{spec.name}'."
)
if spec.services:
known = set(_all_service_names())
unknown = [s for s in spec.services if s not in known]
if unknown:
console.print(
f"[red]Unknown service(s) in [{spec.name}]: {unknown}. "
f"Available: {_all_service_names()}[/]"
)
raise typer.Exit(1)
svc_list = spec.services
elif arch:
svc_list = list(arch.services)
elif randomize:
svc_pool = _all_service_names()
count = random.randint(1, min(3, len(svc_pool)))
svc_list = random.sample(svc_pool, count)
else:
console.print(
f"[red]Decky '[{spec.name}]' has no services= in config. "
"Add services=, archetype=, or use --randomize-services.[/]"
)
raise typer.Exit(1)
# nmap_os priority: explicit INI key > archetype default > "linux"
resolved_nmap_os = spec.nmap_os or (arch.nmap_os if arch else "linux")
deckies.append(DeckyConfig(
name=spec.name,
ip=ip,
services=svc_list,
distro=distro.slug,
base_image=distro.image,
build_base=distro.build_base,
hostname=hostname,
archetype=arch.slug if arch else None,
service_config=spec.service_config,
nmap_os=resolved_nmap_os,
))
return deckies
@app.command()
def deploy(
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
log_target: Optional[str] = typer.Option(None, "--log-target", help="Forward logs to ip:port (e.g. 192.168.1.5:5140)"),
log_file: Optional[str] = typer.Option(None, "--log-file", help="Write RFC 5424 syslog to this path inside containers (e.g. /var/log/decnet/decnet.log)"),
archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
) -> None:
"""Deploy deckies to the LAN."""
if mode not in ("unihost", "swarm"):
console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
raise typer.Exit(1)
# ------------------------------------------------------------------ #
# Config-file path #
# ------------------------------------------------------------------ #
if config_file:
try:
ini = load_ini(config_file)
except FileNotFoundError as e:
console.print(f"[red]{e}[/]")
raise typer.Exit(1)
# CLI flags override INI values when explicitly provided
iface = interface or ini.interface or detect_interface()
subnet_cidr = subnet or ini.subnet
effective_gateway = ini.gateway
if subnet_cidr is None:
subnet_cidr, effective_gateway = detect_subnet(iface)
elif effective_gateway is None:
_, effective_gateway = detect_subnet(iface)
host_ip = get_host_ip(iface)
console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} "
f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} "
f"[dim]Host IP:[/] {host_ip}")
# Register bring-your-own services from INI before validation
if ini.custom_services:
from decnet.custom_service import CustomService
from decnet.services.registry import register_custom_service
for cs in ini.custom_services:
register_custom_service(
CustomService(
name=cs.name,
image=cs.image,
exec_cmd=cs.exec_cmd,
ports=cs.ports,
)
)
effective_log_target = log_target or ini.log_target
effective_log_file = log_file
decky_configs = _build_deckies_from_ini(
ini, subnet_cidr, effective_gateway, host_ip, randomize_services
)
# ------------------------------------------------------------------ #
# Classic CLI path #
# ------------------------------------------------------------------ #
else:
if deckies is None:
console.print("[red]--deckies is required when --config is not used.[/]")
raise typer.Exit(1)
services_list = [s.strip() for s in services.split(",")] if services else None
if services_list:
known = set(_all_service_names())
unknown = [s for s in services_list if s not in known]
if unknown:
console.print(f"[red]Unknown service(s): {unknown}. Available: {_all_service_names()}[/]")
raise typer.Exit(1)
# Resolve archetype if provided
arch: Archetype | None = None
if archetype_name:
try:
arch = get_archetype(archetype_name)
except ValueError as e:
console.print(f"[red]{e}[/]")
raise typer.Exit(1)
if not services_list and not randomize_services and not arch:
console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
raise typer.Exit(1)
iface = interface or detect_interface()
if subnet is None:
subnet_cidr, effective_gateway = detect_subnet(iface)
else:
subnet_cidr = subnet
_, effective_gateway = detect_subnet(iface)
host_ip = get_host_ip(iface)
console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} "
f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}")
distros_list = [d.strip() for d in distro.split(",")] if distro else None
if distros_list:
try:
for slug in distros_list:
get_distro(slug)
except ValueError as e:
console.print(f"[red]{e}[/]")
raise typer.Exit(1)
ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
decky_configs = _build_deckies(
deckies, ips, services_list, randomize_services,
distros_explicit=distros_list, randomize_distros=randomize_distros,
archetype=arch,
)
effective_log_target = log_target
effective_log_file = log_file
config = DecnetConfig(
mode=mode,
interface=iface,
subnet=subnet_cidr,
gateway=effective_gateway,
deckies=decky_configs,
log_target=effective_log_target,
log_file=effective_log_file,
ipvlan=ipvlan,
)
if effective_log_target and not dry_run:
from decnet.logging.forwarder import probe_log_target
if not probe_log_target(effective_log_target):
console.print(f"[yellow]Warning: log target {effective_log_target} is unreachable. "
"Logs will be lost if it stays down.[/]")
from decnet.deployer import deploy as _deploy
_deploy(config, dry_run=dry_run, no_cache=no_cache)
@app.command()
def status() -> None:
"""Show running deckies and their status."""
from decnet.deployer import status as _status
_status()
@app.command()
def teardown(
all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
) -> None:
"""Stop and remove deckies."""
if not all_ and not id_:
console.print("[red]Specify --all or --id <name>.[/]")
raise typer.Exit(1)
from decnet.deployer import teardown as _teardown
_teardown(decky_id=id_)
@app.command(name="services")
def list_services() -> None:
"""List all registered honeypot service plugins."""
svcs = all_services()
table = Table(title="Available Services", show_lines=True)
table.add_column("Name", style="bold cyan")
table.add_column("Ports")
table.add_column("Image")
for name, svc in sorted(svcs.items()):
table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
console.print(table)
@app.command(name="distros")
def list_distros() -> None:
"""List all available OS distro profiles for deckies."""
table = Table(title="Available Distro Profiles", show_lines=True)
table.add_column("Slug", style="bold cyan")
table.add_column("Display Name")
table.add_column("Docker Image", style="dim")
for slug, profile in sorted(all_distros().items()):
table.add_row(slug, profile.display_name, profile.image)
console.print(table)
@app.command(name="correlate")
def correlate(
log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"),
min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"),
output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"),
emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"),
) -> None:
"""Analyse logs for cross-decky traversals and print the attacker movement graph."""
import sys
import json as _json
from pathlib import Path
from decnet.correlation.engine import CorrelationEngine
engine = CorrelationEngine()
if log_file:
path = Path(log_file)
if not path.exists():
console.print(f"[red]Log file not found: {log_file}[/]")
raise typer.Exit(1)
engine.ingest_file(path)
elif not sys.stdin.isatty():
for line in sys.stdin:
engine.ingest(line)
else:
console.print("[red]Provide --log-file or pipe log data via stdin.[/]")
raise typer.Exit(1)
traversals = engine.traversals(min_deckies)
if output == "json":
console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2))
elif output == "syslog":
for line in engine.traversal_syslog_lines(min_deckies):
typer.echo(line)
else:
if not traversals:
console.print(
f"[yellow]No traversals detected "
f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]"
)
else:
console.print(engine.report_table(min_deckies))
console.print(
f"[dim]Parsed {engine.lines_parsed} lines · "
f"indexed {engine.events_indexed} events · "
f"{len(engine.all_attackers())} unique IPs · "
f"[bold]{len(traversals)}[/] traversal(s)[/]"
)
if emit_syslog:
for line in engine.traversal_syslog_lines(min_deckies):
typer.echo(line)
@app.command(name="archetypes")
def list_archetypes() -> None:
"""List all machine archetype profiles."""
table = Table(title="Machine Archetypes", show_lines=True)
table.add_column("Slug", style="bold cyan")
table.add_column("Display Name")
table.add_column("Default Services", style="green")
table.add_column("Description", style="dim")
for slug, arch in sorted(all_archetypes().items()):
table.add_row(
slug,
arch.display_name,
", ".join(arch.services),
arch.description,
)
console.print(table)

View File

@@ -1,80 +0,0 @@
"""
DECNET CLI — entry point for all commands.
Usage:
decnet deploy --mode unihost --deckies 5 --randomize-services
decnet status
decnet teardown [--all | --id decky-01]
decnet services
Layout: each command module exports ``register(app)`` which attaches its
commands to the passed Typer app. ``__init__.py`` builds the root app,
calls every module's ``register`` in order, then runs the master-only
gate. The gate must fire LAST so it sees the fully-populated dispatch
table before filtering.
"""
from __future__ import annotations
import typer
from . import (
agent,
api,
db,
deploy,
forwarder,
inventory,
lifecycle,
listener,
profiler,
sniffer,
swarm,
swarmctl,
updater,
web,
workers,
)
from .gating import _gate_commands_by_mode
from .utils import console as console, log as log
app = typer.Typer(
name="decnet",
help="Deploy a deception network of honeypot deckies on your LAN.",
no_args_is_help=True,
)
# Order matches the old flat layout so `decnet --help` reads the same.
for _mod in (
api, swarmctl, agent, updater, listener, forwarder,
swarm,
deploy, lifecycle, workers, inventory,
web, profiler, sniffer, db,
):
_mod.register(app)
_gate_commands_by_mode(app)
# Backwards-compat re-exports. Tests and third-party tooling import these
# directly from ``decnet.cli``; the refactor must keep them resolvable.
from .db import _db_reset_mysql_async # noqa: E402,F401
from .gating import ( # noqa: E402,F401
MASTER_ONLY_COMMANDS,
MASTER_ONLY_GROUPS,
_agent_mode_active,
_require_master_mode,
)
from .utils import ( # noqa: E402,F401
_daemonize,
_http_request,
_is_running,
_kill_all_services,
_pid_dir,
_service_registry,
_spawn_detached,
_swarmctl_base_url,
)
if __name__ == "__main__": # pragma: no cover
app()

View File

@@ -1,64 +0,0 @@
from __future__ import annotations
import os
import pathlib as _pathlib
import sys as _sys
from typing import Optional
import typer
from . import utils as _utils
from .utils import console, log
def register(app: typer.Typer) -> None:
@app.command()
def agent(
port: int = typer.Option(8765, "--port", help="Port for the worker agent"),
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the worker agent"), # nosec B104
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent, expanded under the running user's HOME — set this when running as sudo/root)"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
no_forwarder: bool = typer.Option(False, "--no-forwarder", help="Do not auto-spawn the log forwarder alongside the agent"),
) -> None:
"""Run the DECNET SWARM worker agent (requires a cert bundle in ~/.decnet/agent/).
By default, `decnet agent` auto-spawns `decnet forwarder` as a fully-
detached sibling process so worker logs start flowing to the master
without a second manual invocation. The forwarder survives agent
restarts and crashes — if it dies on its own, restart it manually
with `decnet forwarder --daemon …`. Pass --no-forwarder to skip.
"""
from decnet.agent import server as _agent_server
from decnet.env import DECNET_SWARM_MASTER_HOST, DECNET_INGEST_LOG_FILE
from decnet.swarm import pki as _pki
resolved_dir = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
if daemon:
log.info("agent daemonizing host=%s port=%d", host, port)
_utils._daemonize()
if not no_forwarder and DECNET_SWARM_MASTER_HOST:
fw_argv = [
_sys.executable, "-m", "decnet", "forwarder",
"--master-host", DECNET_SWARM_MASTER_HOST,
"--master-port", str(int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))),
"--agent-dir", str(resolved_dir),
"--log-file", str(DECNET_INGEST_LOG_FILE),
"--daemon",
]
try:
pid = _utils._spawn_detached(fw_argv, _utils._pid_dir() / "forwarder.pid")
log.info("agent auto-spawned forwarder pid=%d master=%s", pid, DECNET_SWARM_MASTER_HOST)
console.print(f"[dim]Auto-spawned forwarder (pid {pid}) → {DECNET_SWARM_MASTER_HOST}.[/]")
except Exception as e: # noqa: BLE001
log.warning("agent could not auto-spawn forwarder: %s", e)
console.print(f"[yellow]forwarder auto-spawn skipped: {e}[/]")
elif not no_forwarder:
log.info("agent skipping forwarder auto-spawn (DECNET_SWARM_MASTER_HOST unset)")
log.info("agent command invoked host=%s port=%d dir=%s", host, port, resolved_dir)
console.print(f"[green]Starting DECNET worker agent on {host}:{port} (mTLS)...[/]")
rc = _agent_server.run(host, port, agent_dir=resolved_dir)
if rc != 0:
raise typer.Exit(rc)

View File

@@ -1,53 +0,0 @@
from __future__ import annotations
import os
import signal
import subprocess # nosec B404
import sys
import typer
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
from . import utils as _utils
from .gating import _require_master_mode
from .utils import console, log
def register(app: typer.Typer) -> None:
@app.command()
def api(
port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"),
host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"),
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
workers: int = typer.Option(1, "--workers", "-w", min=1, help="Number of uvicorn worker processes"),
) -> None:
"""Run the DECNET API and Web Dashboard in standalone mode."""
_require_master_mode("api")
if daemon:
log.info("API daemonizing host=%s port=%d workers=%d", host, port, workers)
_utils._daemonize()
log.info("API command invoked host=%s port=%d workers=%d", host, port, workers)
console.print(f"[green]Starting DECNET API on {host}:{port} (workers={workers})...[/]")
_env: dict[str, str] = os.environ.copy()
_env["DECNET_INGEST_LOG_FILE"] = str(log_file)
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.api:app",
"--host", host, "--port", str(port), "--workers", str(workers)]
try:
proc = subprocess.Popen(_cmd, env=_env, start_new_session=True) # nosec B603 B404
try:
proc.wait()
except KeyboardInterrupt:
try:
os.killpg(proc.pid, signal.SIGTERM)
try:
proc.wait(timeout=10)
except subprocess.TimeoutExpired:
os.killpg(proc.pid, signal.SIGKILL)
proc.wait()
except ProcessLookupError:
pass
except (FileNotFoundError, subprocess.SubprocessError):
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")

View File

@@ -1,130 +0,0 @@
from __future__ import annotations
from typing import Optional
import typer
from rich.table import Table
from .utils import console, log
_DB_RESET_TABLES: tuple[str, ...] = (
# Order matters for DROP TABLE: child FKs first.
# - attacker_behavior FK-references attackers.
# - decky_shards FK-references swarm_hosts.
"attacker_behavior",
"attackers",
"logs",
"bounty",
"state",
"users",
"decky_shards",
"swarm_hosts",
)
async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None:
"""Inspect + (optionally) wipe a MySQL database. Pulled out of the CLI
wrapper so tests can drive it without spawning a Typer runner."""
from urllib.parse import urlparse
from sqlalchemy import text
from sqlalchemy.ext.asyncio import create_async_engine
db_name = urlparse(dsn).path.lstrip("/") or "(default)"
engine = create_async_engine(dsn)
try:
rows: dict[str, int] = {}
async with engine.connect() as conn:
for tbl in _DB_RESET_TABLES:
try:
result = await conn.execute(text(f"SELECT COUNT(*) FROM `{tbl}`")) # nosec B608
rows[tbl] = result.scalar() or 0
except Exception: # noqa: BLE001 — ProgrammingError for missing table varies by driver
rows[tbl] = -1
summary = Table(title=f"DECNET MySQL reset — database `{db_name}` (mode={mode})")
summary.add_column("Table", style="cyan")
summary.add_column("Rows", justify="right")
for tbl, count in rows.items():
summary.add_row(tbl, "[dim]missing[/]" if count < 0 else f"{count:,}")
console.print(summary)
if not confirm:
console.print(
"[yellow]Dry-run only. Re-run with [bold]--i-know-what-im-doing[/] "
"to actually execute.[/]"
)
return
async with engine.begin() as conn:
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0"))
for tbl in _DB_RESET_TABLES:
if rows.get(tbl, -1) < 0:
continue
if mode == "truncate":
await conn.execute(text(f"TRUNCATE TABLE `{tbl}`"))
console.print(f"[green]✓ TRUNCATE {tbl}[/]")
else:
await conn.execute(text(f"DROP TABLE `{tbl}`"))
console.print(f"[green]✓ DROP TABLE {tbl}[/]")
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 1"))
console.print(f"[bold green]Done. Database `{db_name}` reset ({mode}).[/]")
finally:
await engine.dispose()
def register(app: typer.Typer) -> None:
@app.command(name="db-reset")
def db_reset(
i_know: bool = typer.Option(
False,
"--i-know-what-im-doing",
help="Required to actually execute. Without it, the command runs in dry-run mode.",
),
mode: str = typer.Option(
"truncate",
"--mode",
help="truncate (wipe rows, keep schema) | drop-tables (DROP TABLE for each DECNET table)",
),
url: Optional[str] = typer.Option(
None,
"--url",
help="Override DECNET_DB_URL for this invocation (e.g. when cleanup needs admin creds).",
),
) -> None:
"""Wipe the MySQL database used by the DECNET dashboard.
Destructive. Runs dry by default — pass --i-know-what-im-doing to commit.
Only supported against MySQL; refuses to operate on SQLite.
"""
import asyncio
import os
if mode not in ("truncate", "drop-tables"):
console.print(f"[red]Invalid --mode '{mode}'. Expected: truncate | drop-tables.[/]")
raise typer.Exit(2)
db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
if db_type != "mysql":
console.print(
f"[red]db-reset is MySQL-only (DECNET_DB_TYPE='{db_type}'). "
f"For SQLite, just delete the decnet.db file.[/]"
)
raise typer.Exit(2)
dsn = url or os.environ.get("DECNET_DB_URL")
if not dsn:
from decnet.web.db.mysql.database import build_mysql_url
try:
dsn = build_mysql_url()
except ValueError as e:
console.print(f"[red]{e}[/]")
raise typer.Exit(2) from e
log.info("db-reset invoked mode=%s confirm=%s", mode, i_know)
try:
asyncio.run(_db_reset_mysql_async(dsn, mode=mode, confirm=i_know))
except Exception as e: # noqa: BLE001
console.print(f"[red]db-reset failed: {e}[/]")
raise typer.Exit(1) from e

View File

@@ -1,307 +0,0 @@
from __future__ import annotations
from typing import Optional
import typer
from rich.table import Table
from decnet.archetypes import Archetype, get_archetype
from decnet.config import DecnetConfig
from decnet.distros import get_distro
from decnet.env import DECNET_API_HOST, DECNET_INGEST_LOG_FILE
from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini
from decnet.ini_loader import load_ini
from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
from . import utils as _utils
from .gating import _require_master_mode
from .utils import console, log
def _deploy_swarm(config: "DecnetConfig", *, dry_run: bool, no_cache: bool) -> None:
"""Shard deckies round-robin across enrolled workers and POST to swarmctl."""
base = _utils._swarmctl_base_url(None)
resp = _utils._http_request("GET", base + "/swarm/hosts?host_status=enrolled")
enrolled = resp.json()
resp2 = _utils._http_request("GET", base + "/swarm/hosts?host_status=active")
active = resp2.json()
workers = [*enrolled, *active]
if not workers:
console.print("[red]No enrolled workers — run `decnet swarm enroll ...` first.[/]")
raise typer.Exit(1)
assigned: list = []
for idx, d in enumerate(config.deckies):
target = workers[idx % len(workers)]
assigned.append(d.model_copy(update={"host_uuid": target["uuid"]}))
config = config.model_copy(update={"deckies": assigned})
body = {"config": config.model_dump(mode="json"), "dry_run": dry_run, "no_cache": no_cache}
console.print(f"[cyan]Dispatching {len(config.deckies)} deckies across {len(workers)} worker(s)...[/]")
resp3 = _utils._http_request("POST", base + "/swarm/deploy", json_body=body, timeout=900.0)
results = resp3.json().get("results", [])
table = Table(title="SWARM deploy results")
for col in ("worker", "host_uuid", "ok", "detail"):
table.add_column(col)
any_failed = False
for r in results:
ok = bool(r.get("ok"))
if not ok:
any_failed = True
detail = r.get("detail")
if isinstance(detail, dict):
detail = detail.get("status") or "ok"
table.add_row(
str(r.get("host_name") or ""),
str(r.get("host_uuid") or ""),
"[green]yes[/]" if ok else "[red]no[/]",
str(detail)[:80],
)
console.print(table)
if any_failed:
raise typer.Exit(1)
def register(app: typer.Typer) -> None:
@app.command()
def deploy(
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"),
ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"),
api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"),
daemon: bool = typer.Option(False, "--daemon", help="Detach to background as a daemon process"),
) -> None:
"""Deploy deckies to the LAN."""
import os
import subprocess # nosec B404
import sys
from pathlib import Path as _Path
_require_master_mode("deploy")
if daemon:
log.info("deploy daemonizing mode=%s deckies=%s", mode, deckies)
_utils._daemonize()
log.info("deploy command invoked mode=%s deckies=%s dry_run=%s", mode, deckies, dry_run)
if mode not in ("unihost", "swarm"):
console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
raise typer.Exit(1)
if config_file:
try:
ini = load_ini(config_file)
except FileNotFoundError as e:
console.print(f"[red]{e}[/]")
raise typer.Exit(1)
iface = interface or ini.interface or detect_interface()
subnet_cidr = subnet or ini.subnet
effective_gateway = ini.gateway
if subnet_cidr is None:
subnet_cidr, effective_gateway = detect_subnet(iface)
elif effective_gateway is None:
_, effective_gateway = detect_subnet(iface)
host_ip = get_host_ip(iface)
console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} "
f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} "
f"[dim]Host IP:[/] {host_ip}")
if ini.custom_services:
from decnet.custom_service import CustomService
from decnet.services.registry import register_custom_service
for cs in ini.custom_services:
register_custom_service(
CustomService(
name=cs.name,
image=cs.image,
exec_cmd=cs.exec_cmd,
ports=cs.ports,
)
)
effective_log_file = log_file
try:
decky_configs = build_deckies_from_ini(
ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval
)
except ValueError as e:
console.print(f"[red]{e}[/]")
raise typer.Exit(1)
else:
if deckies is None:
console.print("[red]--deckies is required when --config is not used.[/]")
raise typer.Exit(1)
services_list = [s.strip() for s in services.split(",")] if services else None
if services_list:
known = set(all_service_names())
unknown = [s for s in services_list if s not in known]
if unknown:
console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]")
raise typer.Exit(1)
arch: Archetype | None = None
if archetype_name:
try:
arch = get_archetype(archetype_name)
except ValueError as e:
console.print(f"[red]{e}[/]")
raise typer.Exit(1)
if not services_list and not randomize_services and not arch:
console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
raise typer.Exit(1)
iface = interface or detect_interface()
if subnet is None:
subnet_cidr, effective_gateway = detect_subnet(iface)
else:
subnet_cidr = subnet
_, effective_gateway = detect_subnet(iface)
host_ip = get_host_ip(iface)
console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} "
f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}")
distros_list = [d.strip() for d in distro.split(",")] if distro else None
if distros_list:
try:
for slug in distros_list:
get_distro(slug)
except ValueError as e:
console.print(f"[red]{e}[/]")
raise typer.Exit(1)
ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
decky_configs = build_deckies(
deckies, ips, services_list, randomize_services,
distros_explicit=distros_list, randomize_distros=randomize_distros,
archetype=arch, mutate_interval=mutate_interval,
)
effective_log_file = log_file
if api and not effective_log_file:
effective_log_file = os.path.join(os.getcwd(), "decnet.log")
console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]")
config = DecnetConfig(
mode=mode,
interface=iface,
subnet=subnet_cidr,
gateway=effective_gateway,
deckies=decky_configs,
log_file=effective_log_file,
ipvlan=ipvlan,
mutate_interval=mutate_interval,
)
log.debug("deploy: config built deckies=%d interface=%s subnet=%s", len(config.deckies), config.interface, config.subnet)
if mode == "swarm":
_deploy_swarm(config, dry_run=dry_run, no_cache=no_cache)
if dry_run:
log.info("deploy: swarm dry-run complete, no workers dispatched")
else:
log.info("deploy: swarm deployment complete deckies=%d", len(config.deckies))
return
from decnet.engine import deploy as _deploy
_deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel)
if dry_run:
log.info("deploy: dry-run complete, no containers started")
else:
log.info("deploy: deployment complete deckies=%d", len(config.deckies))
if mutate_interval is not None and not dry_run:
console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]")
try:
subprocess.Popen( # nosec B603
[sys.executable, "-m", "decnet.cli", "mutate", "--watch"],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
start_new_session=True,
)
except (FileNotFoundError, subprocess.SubprocessError):
console.print("[red]Failed to start mutator watcher.[/]")
if effective_log_file and not dry_run and not api:
_collector_err = _Path(effective_log_file).with_suffix(".collector.log")
console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
subprocess.Popen( # nosec B603
[sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
stdin=subprocess.DEVNULL,
stdout=open(_collector_err, "a"),
stderr=subprocess.STDOUT,
start_new_session=True,
)
if api and not dry_run:
console.print(f"[green]Starting DECNET API on port {api_port}...[/]")
_env: dict[str, str] = os.environ.copy()
_env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "")
try:
subprocess.Popen( # nosec B603
[sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)],
env=_env,
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT
)
console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]")
except (FileNotFoundError, subprocess.SubprocessError):
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
if effective_log_file and not dry_run:
console.print("[bold cyan]Starting DECNET-PROBER[/] (auto-discovers attackers from log stream)")
try:
subprocess.Popen( # nosec B603
[sys.executable, "-m", "decnet.cli", "probe", "--daemon", "--log-file", str(effective_log_file)],
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
start_new_session=True,
)
except (FileNotFoundError, subprocess.SubprocessError):
console.print("[red]Failed to start DECNET-PROBER.[/]")
if effective_log_file and not dry_run:
console.print("[bold cyan]Starting DECNET-PROFILER[/] (builds attacker profiles from log stream)")
try:
subprocess.Popen( # nosec B603
[sys.executable, "-m", "decnet.cli", "profiler", "--daemon"],
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
start_new_session=True,
)
except (FileNotFoundError, subprocess.SubprocessError):
console.print("[red]Failed to start DECNET-PROFILER.[/]")
if effective_log_file and not dry_run:
console.print("[bold cyan]Starting DECNET-SNIFFER[/] (passive network capture)")
try:
subprocess.Popen( # nosec B603
[sys.executable, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", str(effective_log_file)],
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
start_new_session=True,
)
except (FileNotFoundError, subprocess.SubprocessError):
console.print("[red]Failed to start DECNET-SNIFFER.[/]")

View File

@@ -1,74 +0,0 @@
from __future__ import annotations
import asyncio
import pathlib
import signal
from typing import Optional
import typer
from decnet.env import DECNET_INGEST_LOG_FILE
from . import utils as _utils
from .utils import console, log
def register(app: typer.Typer) -> None:
@app.command()
def forwarder(
master_host: Optional[str] = typer.Option(None, "--master-host", help="Master listener hostname/IP (default: $DECNET_SWARM_MASTER_HOST)"),
master_port: int = typer.Option(6514, "--master-port", help="Master listener TCP port (RFC 5425 default 6514)"),
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Local RFC 5424 file to tail and forward"),
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent)"),
state_db: Optional[str] = typer.Option(None, "--state-db", help="Forwarder offset SQLite path (default: <agent_dir>/forwarder.db)"),
poll_interval: float = typer.Option(0.5, "--poll-interval", help="Seconds between log file stat checks"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
) -> None:
"""Run the worker-side syslog-over-TLS forwarder (RFC 5425, mTLS to master:6514)."""
from decnet.env import DECNET_SWARM_MASTER_HOST
from decnet.swarm import pki
from decnet.swarm.log_forwarder import ForwarderConfig, run_forwarder
resolved_host = master_host or DECNET_SWARM_MASTER_HOST
if not resolved_host:
console.print("[red]--master-host is required (or set DECNET_SWARM_MASTER_HOST).[/]")
raise typer.Exit(2)
resolved_agent_dir = pathlib.Path(agent_dir) if agent_dir else pki.DEFAULT_AGENT_DIR
if not (resolved_agent_dir / "worker.crt").exists():
console.print(f"[red]No worker cert bundle at {resolved_agent_dir} — enroll from the master first.[/]")
raise typer.Exit(2)
if not log_file:
console.print("[red]--log-file is required.[/]")
raise typer.Exit(2)
cfg = ForwarderConfig(
log_path=pathlib.Path(log_file),
master_host=resolved_host,
master_port=master_port,
agent_dir=resolved_agent_dir,
state_db=pathlib.Path(state_db) if state_db else None,
)
if daemon:
log.info("forwarder daemonizing master=%s:%d log=%s", resolved_host, master_port, log_file)
_utils._daemonize()
log.info("forwarder command invoked master=%s:%d log=%s", resolved_host, master_port, log_file)
console.print(f"[green]Starting DECNET forwarder → {resolved_host}:{master_port} (mTLS)...[/]")
async def _main() -> None:
stop = asyncio.Event()
loop = asyncio.get_running_loop()
for sig in (signal.SIGTERM, signal.SIGINT):
try:
loop.add_signal_handler(sig, stop.set)
except (NotImplementedError, RuntimeError): # pragma: no cover
pass
await run_forwarder(cfg, poll_interval=poll_interval, stop_event=stop)
try:
asyncio.run(_main())
except KeyboardInterrupt:
pass

View File

@@ -1,71 +0,0 @@
"""Role-based CLI gating.
MAINTAINERS: when you add a new Typer command (or add_typer group) that is
master-only, register its name in MASTER_ONLY_COMMANDS / MASTER_ONLY_GROUPS
below. The gate is the only thing that:
(a) hides the command from `decnet --help` on worker hosts, and
(b) prevents a misconfigured worker from invoking master-side logic.
Forgetting to register a new command is a role-boundary bug. Grep for
MASTER_ONLY when touching command registration.
Worker-legitimate commands (NOT in these sets): agent, updater, forwarder,
status, collect, probe, sniffer. Agents run deckies locally and should be
able to inspect them + run the per-host microservices (collector streams
container logs, prober characterizes attackers hitting this host, sniffer
captures traffic). Mutator and Profiler stay master-only: the mutator
orchestrates respawns across the swarm; the profiler rebuilds attacker
profiles against the master DB (no per-host DB exists).
"""
from __future__ import annotations
import os
import typer
from .utils import console
MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
"api", "swarmctl", "deploy", "redeploy", "teardown",
"mutate", "listener", "profiler",
"services", "distros", "correlate", "archetypes", "web",
"db-reset",
})
MASTER_ONLY_GROUPS: frozenset[str] = frozenset({"swarm"})
def _agent_mode_active() -> bool:
"""True when the host is configured as an agent AND master commands are
disallowed (the default for agents). Workers overriding this explicitly
set DECNET_DISALLOW_MASTER=false to opt into hybrid use."""
mode = os.environ.get("DECNET_MODE", "master").lower()
disallow = os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true"
return mode == "agent" and disallow
def _require_master_mode(command_name: str) -> None:
"""Defence-in-depth: called at the top of every master-only command body.
The registration-time gate in _gate_commands_by_mode() already hides
these commands from Typer's dispatch table, but this check protects
against direct function imports (e.g. from tests or third-party tools)
that would bypass Typer entirely."""
if _agent_mode_active():
console.print(
f"[red]`decnet {command_name}` is a master-only command; this host "
f"is configured as an agent (DECNET_MODE=agent).[/]"
)
raise typer.Exit(1)
def _gate_commands_by_mode(_app: typer.Typer) -> None:
if not _agent_mode_active():
return
_app.registered_commands = [
c for c in _app.registered_commands
if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS
]
_app.registered_groups = [
g for g in _app.registered_groups
if g.name not in MASTER_ONLY_GROUPS
]

View File

@@ -1,52 +0,0 @@
from __future__ import annotations
import typer
from rich.table import Table
from decnet.archetypes import all_archetypes
from decnet.distros import all_distros
from decnet.services.registry import all_services
from .utils import console
def register(app: typer.Typer) -> None:
@app.command(name="services")
def list_services() -> None:
"""List all registered honeypot service plugins."""
svcs = all_services()
table = Table(title="Available Services", show_lines=True)
table.add_column("Name", style="bold cyan")
table.add_column("Ports")
table.add_column("Image")
for name, svc in sorted(svcs.items()):
table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
console.print(table)
@app.command(name="distros")
def list_distros() -> None:
"""List all available OS distro profiles for deckies."""
table = Table(title="Available Distro Profiles", show_lines=True)
table.add_column("Slug", style="bold cyan")
table.add_column("Display Name")
table.add_column("Docker Image", style="dim")
for slug, profile in sorted(all_distros().items()):
table.add_row(slug, profile.display_name, profile.image)
console.print(table)
@app.command(name="archetypes")
def list_archetypes() -> None:
"""List all machine archetype profiles."""
table = Table(title="Machine Archetypes", show_lines=True)
table.add_column("Slug", style="bold cyan")
table.add_column("Display Name")
table.add_column("Default Services", style="green")
table.add_column("Description", style="dim")
for slug, arch in sorted(all_archetypes().items()):
table.add_row(
slug,
arch.display_name,
", ".join(arch.services),
arch.description,
)
console.print(table)

View File

@@ -1,97 +0,0 @@
from __future__ import annotations
import subprocess # nosec B404
from typing import Optional
import typer
from rich.table import Table
from decnet.env import DECNET_INGEST_LOG_FILE
from . import utils as _utils
from .gating import _agent_mode_active, _require_master_mode
from .utils import console, log
def register(app: typer.Typer) -> None:
@app.command()
def redeploy(
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to the DECNET log file"),
) -> None:
"""Check running DECNET services and relaunch any that are down."""
log.info("redeploy: checking services")
registry = _utils._service_registry(str(log_file))
table = Table(title="DECNET Services", show_lines=True)
table.add_column("Service", style="bold cyan")
table.add_column("Status")
table.add_column("PID", style="dim")
table.add_column("Action")
relaunched = 0
for name, match_fn, launch_args in registry:
pid = _utils._is_running(match_fn)
if pid is not None:
table.add_row(name, "[green]UP[/]", str(pid), "")
else:
try:
subprocess.Popen( # nosec B603
launch_args,
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
start_new_session=True,
)
table.add_row(name, "[red]DOWN[/]", "", "[green]relaunched[/]")
relaunched += 1
except (FileNotFoundError, subprocess.SubprocessError) as exc:
table.add_row(name, "[red]DOWN[/]", "", f"[red]failed: {exc}[/]")
console.print(table)
if relaunched:
console.print(f"[green]{relaunched} service(s) relaunched.[/]")
else:
console.print("[green]All services running.[/]")
@app.command()
def status() -> None:
"""Show running deckies and their status."""
log.info("status command invoked")
from decnet.engine import status as _status
_status()
registry = _utils._service_registry(str(DECNET_INGEST_LOG_FILE))
if _agent_mode_active():
registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}]
svc_table = Table(title="DECNET Services", show_lines=True)
svc_table.add_column("Service", style="bold cyan")
svc_table.add_column("Status")
svc_table.add_column("PID", style="dim")
for name, match_fn, _launch_args in registry:
pid = _utils._is_running(match_fn)
if pid is not None:
svc_table.add_row(name, "[green]UP[/]", str(pid))
else:
svc_table.add_row(name, "[red]DOWN[/]", "")
console.print(svc_table)
@app.command()
def teardown(
all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
) -> None:
"""Stop and remove deckies."""
_require_master_mode("teardown")
if not all_ and not id_:
console.print("[red]Specify --all or --id <name>.[/]")
raise typer.Exit(1)
log.info("teardown command invoked all=%s id=%s", all_, id_)
from decnet.engine import teardown as _teardown
_teardown(decky_id=id_)
log.info("teardown complete all=%s id=%s", all_, id_)
if all_:
_utils._kill_all_services()

View File

@@ -1,57 +0,0 @@
from __future__ import annotations
import asyncio
import pathlib
import signal
from typing import Optional
import typer
from . import utils as _utils
from .utils import console, log
def register(app: typer.Typer) -> None:
@app.command()
def listener(
bind_host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the master syslog-TLS listener"), # nosec B104
bind_port: int = typer.Option(6514, "--port", help="Listener TCP port (RFC 5425 default 6514)"),
log_path: Optional[str] = typer.Option(None, "--log-path", help="RFC 5424 forensic sink (default: ./master.log)"),
json_path: Optional[str] = typer.Option(None, "--json-path", help="Parsed-JSON ingest sink (default: ./master.json)"),
ca_dir: Optional[str] = typer.Option(None, "--ca-dir", help="DECNET CA dir (default: ~/.decnet/ca)"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
) -> None:
"""Run the master-side syslog-over-TLS listener (RFC 5425, mTLS)."""
from decnet.swarm import pki
from decnet.swarm.log_listener import ListenerConfig, run_listener
resolved_ca_dir = pathlib.Path(ca_dir) if ca_dir else pki.DEFAULT_CA_DIR
resolved_log = pathlib.Path(log_path) if log_path else pathlib.Path("master.log")
resolved_json = pathlib.Path(json_path) if json_path else pathlib.Path("master.json")
cfg = ListenerConfig(
log_path=resolved_log, json_path=resolved_json,
bind_host=bind_host, bind_port=bind_port, ca_dir=resolved_ca_dir,
)
if daemon:
log.info("listener daemonizing host=%s port=%d", bind_host, bind_port)
_utils._daemonize()
log.info("listener command invoked host=%s port=%d", bind_host, bind_port)
console.print(f"[green]Starting DECNET log listener on {bind_host}:{bind_port} (mTLS)...[/]")
async def _main() -> None:
stop = asyncio.Event()
loop = asyncio.get_running_loop()
for sig in (signal.SIGTERM, signal.SIGINT):
try:
loop.add_signal_handler(sig, stop.set)
except (NotImplementedError, RuntimeError): # pragma: no cover
pass
await run_listener(cfg, stop_event=stop)
try:
asyncio.run(_main())
except KeyboardInterrupt:
pass

View File

@@ -1,34 +0,0 @@
from __future__ import annotations
import typer
from . import utils as _utils
from .utils import console, log
def register(app: typer.Typer) -> None:
@app.command(name="profiler")
def profiler_cmd(
interval: int = typer.Option(30, "--interval", "-i", help="Seconds between profile rebuild cycles"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
) -> None:
"""Run the attacker profiler as a standalone microservice."""
import asyncio
from decnet.profiler import attacker_profile_worker
from decnet.web.dependencies import repo
if daemon:
log.info("profiler daemonizing interval=%d", interval)
_utils._daemonize()
log.info("profiler starting interval=%d", interval)
console.print(f"[bold cyan]Profiler starting[/] (interval: {interval}s)")
async def _run() -> None:
await repo.initialize()
await attacker_profile_worker(repo, interval=interval)
try:
asyncio.run(_run())
except KeyboardInterrupt:
console.print("\n[yellow]Profiler stopped.[/]")

View File

@@ -1,31 +0,0 @@
from __future__ import annotations
import typer
from decnet.env import DECNET_INGEST_LOG_FILE
from . import utils as _utils
from .utils import console, log
def register(app: typer.Typer) -> None:
@app.command(name="sniffer")
def sniffer_cmd(
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write captured syslog + JSON records"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
) -> None:
"""Run the network sniffer as a standalone microservice."""
import asyncio
from decnet.sniffer import sniffer_worker
if daemon:
log.info("sniffer daemonizing log_file=%s", log_file)
_utils._daemonize()
log.info("sniffer starting log_file=%s", log_file)
console.print(f"[bold cyan]Sniffer starting[/] → {log_file}")
try:
asyncio.run(sniffer_worker(log_file))
except KeyboardInterrupt:
console.print("\n[yellow]Sniffer stopped.[/]")

View File

@@ -1,346 +0,0 @@
"""`decnet swarm ...` — master-side operator commands (HTTP to local swarmctl)."""
from __future__ import annotations
from typing import Optional
import typer
from rich.table import Table
from . import utils as _utils
from .utils import console
def register(app: typer.Typer) -> None:
swarm_app = typer.Typer(
name="swarm",
help="Manage swarm workers (enroll, list, decommission). Requires `decnet swarmctl` running.",
no_args_is_help=True,
)
app.add_typer(swarm_app, name="swarm")
@swarm_app.command("enroll")
def swarm_enroll(
name: str = typer.Option(..., "--name", help="Short hostname for the worker (also the cert CN)"),
address: str = typer.Option(..., "--address", help="IP or DNS the master uses to reach the worker"),
agent_port: int = typer.Option(8765, "--agent-port", help="Worker agent TCP port"),
sans: Optional[str] = typer.Option(None, "--sans", help="Comma-separated extra SANs for the worker cert"),
notes: Optional[str] = typer.Option(None, "--notes", help="Free-form operator notes"),
out_dir: Optional[str] = typer.Option(None, "--out-dir", help="Write the bundle (ca.crt/worker.crt/worker.key) to this dir for scp"),
updater: bool = typer.Option(False, "--updater", help="Also issue an updater-identity cert (CN=updater@<name>) for the remote self-updater"),
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL (default: 127.0.0.1:8770)"),
) -> None:
"""Issue a mTLS bundle for a new worker and register it in the swarm."""
import pathlib as _pathlib
body: dict = {"name": name, "address": address, "agent_port": agent_port}
if sans:
body["sans"] = [s.strip() for s in sans.split(",") if s.strip()]
if notes:
body["notes"] = notes
if updater:
body["issue_updater_bundle"] = True
resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/enroll", json_body=body)
data = resp.json()
console.print(f"[green]Enrolled worker:[/] {data['name']} "
f"[dim]uuid=[/]{data['host_uuid']} "
f"[dim]fingerprint=[/]{data['fingerprint']}")
if data.get("updater"):
console.print(f"[green] + updater identity[/] "
f"[dim]fingerprint=[/]{data['updater']['fingerprint']}")
if out_dir:
target = _pathlib.Path(out_dir).expanduser()
target.mkdir(parents=True, exist_ok=True)
(target / "ca.crt").write_text(data["ca_cert_pem"])
(target / "worker.crt").write_text(data["worker_cert_pem"])
(target / "worker.key").write_text(data["worker_key_pem"])
for leaf in ("worker.key",):
try:
(target / leaf).chmod(0o600)
except OSError:
pass
console.print(f"[cyan]Agent bundle written to[/] {target}")
if data.get("updater"):
upd_target = target.parent / f"{target.name}-updater"
upd_target.mkdir(parents=True, exist_ok=True)
(upd_target / "ca.crt").write_text(data["ca_cert_pem"])
(upd_target / "updater.crt").write_text(data["updater"]["updater_cert_pem"])
(upd_target / "updater.key").write_text(data["updater"]["updater_key_pem"])
try:
(upd_target / "updater.key").chmod(0o600)
except OSError:
pass
console.print(f"[cyan]Updater bundle written to[/] {upd_target}")
console.print("[dim]Ship the agent dir to ~/.decnet/agent/ and the updater dir to ~/.decnet/updater/ on the worker.[/]")
else:
console.print("[dim]Ship this directory to the worker at ~/.decnet/agent/ (or wherever `decnet agent --agent-dir` points).[/]")
else:
console.print("[yellow]No --out-dir given — bundle PEMs are in the JSON response; persist them before leaving this shell.[/]")
@swarm_app.command("list")
def swarm_list(
host_status: Optional[str] = typer.Option(None, "--status", help="Filter by status (enrolled|active|unreachable|decommissioned)"),
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
) -> None:
"""List enrolled workers."""
q = f"?host_status={host_status}" if host_status else ""
resp = _utils._http_request("GET", _utils._swarmctl_base_url(url) + "/swarm/hosts" + q)
rows = resp.json()
if not rows:
console.print("[dim]No workers enrolled.[/]")
return
table = Table(title="DECNET swarm workers")
for col in ("name", "address", "port", "status", "last heartbeat", "enrolled"):
table.add_column(col)
for r in rows:
table.add_row(
r.get("name") or "",
r.get("address") or "",
str(r.get("agent_port") or ""),
r.get("status") or "",
str(r.get("last_heartbeat") or ""),
str(r.get("enrolled_at") or ""),
)
console.print(table)
@swarm_app.command("check")
def swarm_check(
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
) -> None:
"""Actively probe every enrolled worker and refresh status + last_heartbeat."""
resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/check", timeout=60.0)
payload = resp.json()
results = payload.get("results", [])
if json_out:
console.print_json(data=payload)
return
if not results:
console.print("[dim]No workers enrolled.[/]")
return
table = Table(title="DECNET swarm check")
for col in ("name", "address", "reachable", "detail"):
table.add_column(col)
for r in results:
reachable = r.get("reachable")
mark = "[green]yes[/]" if reachable else "[red]no[/]"
detail = r.get("detail")
detail_str = ""
if isinstance(detail, dict):
detail_str = detail.get("status") or ", ".join(f"{k}={v}" for k, v in detail.items())
elif detail is not None:
detail_str = str(detail)
table.add_row(
r.get("name") or "",
r.get("address") or "",
mark,
detail_str,
)
console.print(table)
@swarm_app.command("update")
def swarm_update(
host: Optional[str] = typer.Option(None, "--host", help="Target worker (name or UUID). Omit with --all."),
all_hosts: bool = typer.Option(False, "--all", help="Push to every enrolled worker."),
include_self: bool = typer.Option(False, "--include-self", help="Also push to each updater's /update-self after a successful agent update."),
root: Optional[str] = typer.Option(None, "--root", help="Source tree to tar (default: CWD)."),
exclude: list[str] = typer.Option([], "--exclude", help="Additional exclude glob. Repeatable."),
updater_port: int = typer.Option(8766, "--updater-port", help="Port the workers' updater listens on."),
dry_run: bool = typer.Option(False, "--dry-run", help="Build the tarball and print stats; no network."),
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL."),
) -> None:
"""Push the current working tree to workers' self-updaters (with auto-rollback on failure)."""
import asyncio
import pathlib as _pathlib
from decnet.swarm.tar_tree import tar_working_tree, detect_git_sha
from decnet.swarm.updater_client import UpdaterClient
if not (host or all_hosts):
console.print("[red]Supply --host <name> or --all.[/]")
raise typer.Exit(2)
if host and all_hosts:
console.print("[red]--host and --all are mutually exclusive.[/]")
raise typer.Exit(2)
base = _utils._swarmctl_base_url(url)
resp = _utils._http_request("GET", base + "/swarm/hosts")
rows = resp.json()
if host:
targets = [r for r in rows if r.get("name") == host or r.get("uuid") == host]
if not targets:
console.print(f"[red]No enrolled worker matching '{host}'.[/]")
raise typer.Exit(1)
else:
targets = [r for r in rows if r.get("status") != "decommissioned"]
if not targets:
console.print("[dim]No targets.[/]")
return
tree_root = _pathlib.Path(root) if root else _pathlib.Path.cwd()
sha = detect_git_sha(tree_root)
console.print(f"[dim]Tarring[/] {tree_root} [dim]sha={sha or '(not a git repo)'}[/]")
tarball = tar_working_tree(tree_root, extra_excludes=exclude)
console.print(f"[dim]Tarball size:[/] {len(tarball):,} bytes")
if dry_run:
console.print("[yellow]--dry-run: not pushing.[/]")
for t in targets:
console.print(f" would push to [cyan]{t.get('name')}[/] at {t.get('address')}:{updater_port}")
return
async def _push_one(h: dict) -> dict:
name = h.get("name") or h.get("uuid")
out: dict = {"name": name, "address": h.get("address"), "agent": None, "self": None}
try:
async with UpdaterClient(h, updater_port=updater_port) as u:
r = await u.update(tarball, sha=sha)
out["agent"] = {"status": r.status_code, "body": r.json() if r.content else {}}
if r.status_code == 200 and include_self:
rs = await u.update_self(tarball, sha=sha)
out["self"] = {"status": rs.status_code, "body": rs.json() if rs.content else {}}
except Exception as exc: # noqa: BLE001
out["error"] = f"{type(exc).__name__}: {exc}"
return out
async def _push_all() -> list[dict]:
return await asyncio.gather(*(_push_one(t) for t in targets))
results = asyncio.run(_push_all())
table = Table(title="DECNET swarm update")
for col in ("host", "address", "agent", "self", "detail"):
table.add_column(col)
any_failure = False
for r in results:
agent = r.get("agent") or {}
selff = r.get("self") or {}
err = r.get("error")
if err:
any_failure = True
table.add_row(r["name"], r.get("address") or "", "[red]error[/]", "", err)
continue
a_status = agent.get("status")
if a_status == 200:
agent_cell = "[green]updated[/]"
elif a_status == 409:
agent_cell = "[yellow]rolled-back[/]"
any_failure = True
else:
agent_cell = f"[red]{a_status}[/]"
any_failure = True
if not include_self:
self_cell = ""
elif selff.get("status") == 200 or selff.get("status") is None:
self_cell = "[green]ok[/]" if selff else "[dim]skipped[/]"
else:
self_cell = f"[red]{selff.get('status')}[/]"
detail = ""
body = agent.get("body") or {}
if isinstance(body, dict):
detail = body.get("release", {}).get("sha") or body.get("detail", {}).get("error") or ""
table.add_row(r["name"], r.get("address") or "", agent_cell, self_cell, str(detail)[:80])
console.print(table)
if any_failure:
raise typer.Exit(1)
@swarm_app.command("deckies")
def swarm_deckies(
host: Optional[str] = typer.Option(None, "--host", help="Filter by worker name or UUID"),
state: Optional[str] = typer.Option(None, "--state", help="Filter by shard state (pending|running|failed|torn_down)"),
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
) -> None:
"""List deployed deckies across the swarm with their owning worker host."""
base = _utils._swarmctl_base_url(url)
host_uuid: Optional[str] = None
if host:
resp = _utils._http_request("GET", base + "/swarm/hosts")
rows = resp.json()
match = next((r for r in rows if r.get("uuid") == host or r.get("name") == host), None)
if match is None:
console.print(f"[red]No enrolled worker matching '{host}'.[/]")
raise typer.Exit(1)
host_uuid = match["uuid"]
query = []
if host_uuid:
query.append(f"host_uuid={host_uuid}")
if state:
query.append(f"state={state}")
path = "/swarm/deckies" + ("?" + "&".join(query) if query else "")
resp = _utils._http_request("GET", base + path)
rows = resp.json()
if json_out:
console.print_json(data=rows)
return
if not rows:
console.print("[dim]No deckies deployed.[/]")
return
table = Table(title="DECNET swarm deckies")
for col in ("decky", "host", "address", "state", "services"):
table.add_column(col)
for r in rows:
services = ",".join(r.get("services") or []) or ""
state_val = r.get("state") or "pending"
colored = {
"running": f"[green]{state_val}[/]",
"failed": f"[red]{state_val}[/]",
"pending": f"[yellow]{state_val}[/]",
"torn_down": f"[dim]{state_val}[/]",
}.get(state_val, state_val)
table.add_row(
r.get("decky_name") or "",
r.get("host_name") or "<unknown>",
r.get("host_address") or "",
colored,
services,
)
console.print(table)
@swarm_app.command("decommission")
def swarm_decommission(
name: Optional[str] = typer.Option(None, "--name", help="Worker hostname"),
uuid: Optional[str] = typer.Option(None, "--uuid", help="Worker UUID (skip lookup)"),
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
yes: bool = typer.Option(False, "--yes", "-y", help="Skip interactive confirmation"),
) -> None:
"""Remove a worker from the swarm (cascades decky shard rows)."""
if not (name or uuid):
console.print("[red]Supply --name or --uuid.[/]")
raise typer.Exit(2)
base = _utils._swarmctl_base_url(url)
target_uuid = uuid
target_name = name
if target_uuid is None:
resp = _utils._http_request("GET", base + "/swarm/hosts")
rows = resp.json()
match = next((r for r in rows if r.get("name") == name), None)
if match is None:
console.print(f"[red]No enrolled worker named '{name}'.[/]")
raise typer.Exit(1)
target_uuid = match["uuid"]
target_name = match.get("name") or target_name
if not yes:
confirm = typer.confirm(f"Decommission worker {target_name!r} ({target_uuid})?", default=False)
if not confirm:
console.print("[dim]Aborted.[/]")
raise typer.Exit(0)
_utils._http_request("DELETE", f"{base}/swarm/hosts/{target_uuid}")
console.print(f"[green]Decommissioned {target_name or target_uuid}.[/]")

View File

@@ -1,104 +0,0 @@
from __future__ import annotations
import os
import signal
import subprocess # nosec B404
import sys
from typing import Optional
import typer
from . import utils as _utils
from .gating import _require_master_mode
from .utils import console, log
def register(app: typer.Typer) -> None:
@app.command()
def swarmctl(
port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
cert: Optional[str] = typer.Option(None, "--cert", help="BYOC: path to TLS server cert (PEM). Auto-issues from the DECNET CA if omitted."),
key: Optional[str] = typer.Option(None, "--key", help="BYOC: path to TLS server private key (PEM)."),
client_ca: Optional[str] = typer.Option(None, "--client-ca", help="CA bundle used to verify worker client certs. Defaults to the DECNET CA."),
) -> None:
"""Run the DECNET SWARM controller (master-side, separate process from `decnet api`).
By default, `decnet swarmctl` auto-spawns `decnet listener` as a fully-
detached sibling process so the master starts accepting forwarder
connections on 6514 without a second manual invocation. The listener
survives swarmctl restarts and crashes — if it dies on its own,
restart it manually with `decnet listener --daemon …`. Pass
--no-listener to skip.
Pass ``--tls`` to serve over HTTPS with mutual-TLS enforcement. By
default the server cert is auto-issued from the DECNET CA under
``~/.decnet/swarmctl/`` so enrolled workers (which already ship that
CA's ``ca.crt``) trust it out of the box. BYOC via ``--cert``/``--key``
if you need a publicly-trusted or externally-managed cert.
"""
_require_master_mode("swarmctl")
if daemon:
log.info("swarmctl daemonizing host=%s port=%d", host, port)
_utils._daemonize()
if not no_listener:
listener_host = os.environ.get("DECNET_LISTENER_HOST", "0.0.0.0") # nosec B104
listener_port = int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))
lst_argv = [
sys.executable, "-m", "decnet", "listener",
"--host", listener_host,
"--port", str(listener_port),
"--daemon",
]
try:
pid = _utils._spawn_detached(lst_argv, _utils._pid_dir() / "listener.pid")
log.info("swarmctl auto-spawned listener pid=%d bind=%s:%d",
pid, listener_host, listener_port)
console.print(f"[dim]Auto-spawned listener (pid {pid}) on {listener_host}:{listener_port}.[/]")
except Exception as e: # noqa: BLE001
log.warning("swarmctl could not auto-spawn listener: %s", e)
console.print(f"[yellow]listener auto-spawn skipped: {e}[/]")
log.info("swarmctl command invoked host=%s port=%d tls=%s", host, port, tls)
scheme = "https" if tls else "http"
console.print(f"[green]Starting DECNET SWARM controller on {scheme}://{host}:{port}...[/]")
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app",
"--host", host, "--port", str(port)]
if tls:
from decnet.swarm import pki as _pki
if cert and key:
cert_path, key_path = cert, key
elif cert or key:
console.print("[red]--cert and --key must be provided together.[/]")
raise typer.Exit(code=2)
else:
auto_cert, auto_key, _auto_ca = _pki.ensure_swarmctl_cert(host)
cert_path, key_path = str(auto_cert), str(auto_key)
console.print(f"[dim]Auto-issued swarmctl server cert → {cert_path}[/]")
ca_path = client_ca or str(_pki.DEFAULT_CA_DIR / "ca.crt")
_cmd += [
"--ssl-keyfile", key_path,
"--ssl-certfile", cert_path,
"--ssl-ca-certs", ca_path,
"--ssl-cert-reqs", "2",
]
try:
proc = subprocess.Popen(_cmd, start_new_session=True) # nosec B603 B404
try:
proc.wait()
except KeyboardInterrupt:
try:
os.killpg(proc.pid, signal.SIGTERM)
try:
proc.wait(timeout=10)
except subprocess.TimeoutExpired:
os.killpg(proc.pid, signal.SIGKILL)
proc.wait()
except ProcessLookupError:
pass
except (FileNotFoundError, subprocess.SubprocessError):
console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]")

View File

@@ -1,46 +0,0 @@
from __future__ import annotations
import pathlib as _pathlib
from typing import Optional
import typer
from . import utils as _utils
from .utils import console, log
def register(app: typer.Typer) -> None:
@app.command()
def updater(
port: int = typer.Option(8766, "--port", help="Port for the self-updater daemon"),
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the updater"), # nosec B104
updater_dir: Optional[str] = typer.Option(None, "--updater-dir", help="Updater cert bundle dir (default: ~/.decnet/updater)"),
install_dir: Optional[str] = typer.Option(None, "--install-dir", help="Release install root (default: /opt/decnet)"),
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker agent cert bundle (for local /health probes; default: ~/.decnet/agent)"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
) -> None:
"""Run the DECNET self-updater (requires a bundle in ~/.decnet/updater/)."""
from decnet.swarm import pki as _pki
from decnet.updater import server as _upd_server
resolved_updater = _pathlib.Path(updater_dir) if updater_dir else _upd_server.DEFAULT_UPDATER_DIR
resolved_install = _pathlib.Path(install_dir) if install_dir else _pathlib.Path("/opt/decnet")
resolved_agent = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
if daemon:
log.info("updater daemonizing host=%s port=%d", host, port)
_utils._daemonize()
log.info(
"updater command invoked host=%s port=%d updater_dir=%s install_dir=%s",
host, port, resolved_updater, resolved_install,
)
console.print(f"[green]Starting DECNET self-updater on {host}:{port} (mTLS)...[/]")
rc = _upd_server.run(
host, port,
updater_dir=resolved_updater,
install_dir=resolved_install,
agent_dir=resolved_agent,
)
if rc != 0:
raise typer.Exit(rc)

View File

@@ -1,177 +0,0 @@
"""Shared CLI helpers: console, logger, process management, swarm HTTP client.
Submodules reference these as ``from . import utils`` then ``utils.foo(...)``
so tests can patch ``decnet.cli.utils.<name>`` and have every caller see it.
"""
from __future__ import annotations
import os
import signal
import subprocess # nosec B404
import sys
from pathlib import Path
from typing import Optional
import typer
from rich.console import Console
from decnet.logging import get_logger
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
log = get_logger("cli")
console = Console()
def _daemonize() -> None:
"""Fork the current process into a background daemon (Unix double-fork)."""
if os.fork() > 0:
raise SystemExit(0)
os.setsid()
if os.fork() > 0:
raise SystemExit(0)
sys.stdout = open(os.devnull, "w") # noqa: SIM115
sys.stderr = open(os.devnull, "w") # noqa: SIM115
sys.stdin = open(os.devnull, "r") # noqa: SIM115
def _pid_dir() -> Path:
"""Return the writable PID directory.
/opt/decnet when it exists and is writable (production), else
~/.decnet (dev). The directory is created if needed."""
candidates = [Path("/opt/decnet"), Path.home() / ".decnet"]
for path in candidates:
try:
path.mkdir(parents=True, exist_ok=True)
if os.access(path, os.W_OK):
return path
except (PermissionError, OSError):
continue
return Path("/tmp") # nosec B108
def _spawn_detached(argv: list[str], pid_file: Path) -> int:
"""Spawn a DECNET subcommand as a fully-independent sibling process.
The parent does NOT wait() on this child. start_new_session=True puts
the child in its own session so SIGHUP on parent exit doesn't kill it;
stdin/stdout/stderr go to /dev/null so the launching shell can close
without EIO on the child. close_fds=True prevents inherited sockets
from pinning ports we're trying to rebind.
This is deliberately NOT a supervisor — we fire-and-forget. If the
child dies, the operator restarts it manually via its own subcommand.
"""
if pid_file.exists():
try:
existing = int(pid_file.read_text().strip())
os.kill(existing, 0)
return existing
except (ValueError, ProcessLookupError, PermissionError, OSError):
pass # stale pid_file — fall through and spawn
with open(os.devnull, "rb") as dn_in, open(os.devnull, "ab") as dn_out:
proc = subprocess.Popen( # nosec B603
argv,
stdin=dn_in, stdout=dn_out, stderr=dn_out,
start_new_session=True, close_fds=True,
)
pid_file.parent.mkdir(parents=True, exist_ok=True)
pid_file.write_text(f"{proc.pid}\n")
return proc.pid
def _is_running(match_fn) -> int | None:
"""Return PID of a running DECNET process matching ``match_fn(cmdline)``, or None."""
import psutil
for proc in psutil.process_iter(["pid", "cmdline"]):
try:
cmd = proc.info["cmdline"]
if cmd and match_fn(cmd):
return proc.info["pid"]
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
return None
def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
"""Return the microservice registry for health-check and relaunch.
On agents these run as systemd units invoking /usr/local/bin/decnet,
which doesn't include "decnet.cli" in its cmdline. On master dev boxes
they're launched via `python -m decnet.cli`. Match either form — cmd
is a list of argv tokens, so substring-check the joined string.
"""
_py = sys.executable
def _matches(sub: str, extras: tuple[str, ...] = ()):
def _check(cmd) -> bool:
joined = " ".join(cmd) if not isinstance(cmd, str) else cmd
if "decnet" not in joined:
return False
if sub not in joined:
return False
return all(e in joined for e in extras)
return _check
return [
("Collector", _matches("collect"),
[_py, "-m", "decnet.cli", "collect", "--daemon", "--log-file", log_file]),
("Mutator", _matches("mutate", ("--watch",)),
[_py, "-m", "decnet.cli", "mutate", "--daemon", "--watch"]),
("Prober", _matches("probe"),
[_py, "-m", "decnet.cli", "probe", "--daemon", "--log-file", log_file]),
("Profiler", _matches("profiler"),
[_py, "-m", "decnet.cli", "profiler", "--daemon"]),
("Sniffer", _matches("sniffer"),
[_py, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", log_file]),
("API",
lambda cmd: "uvicorn" in cmd and "decnet.web.api:app" in cmd,
[_py, "-m", "uvicorn", "decnet.web.api:app",
"--host", DECNET_API_HOST, "--port", str(DECNET_API_PORT)]),
]
def _kill_all_services() -> None:
"""Find and kill all running DECNET microservice processes."""
registry = _service_registry(str(DECNET_INGEST_LOG_FILE))
killed = 0
for name, match_fn, _launch_args in registry:
pid = _is_running(match_fn)
if pid is not None:
console.print(f"[yellow]Stopping {name} (PID {pid})...[/]")
os.kill(pid, signal.SIGTERM)
killed += 1
if killed:
console.print(f"[green]{killed} background process(es) stopped.[/]")
else:
console.print("[dim]No DECNET services were running.[/]")
_DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770"
def _swarmctl_base_url(url: Optional[str]) -> str:
return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL)
def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
"""Tiny sync wrapper around httpx; avoids leaking async into the CLI."""
import httpx
try:
resp = httpx.request(method, url, json=json_body, timeout=timeout)
except httpx.HTTPError as exc:
console.print(f"[red]Could not reach swarm controller at {url}: {exc}[/]")
console.print("[dim]Is `decnet swarmctl` running?[/]")
raise typer.Exit(2)
if resp.status_code >= 400:
try:
detail = resp.json().get("detail", resp.text)
except Exception: # nosec B110
detail = resp.text
console.print(f"[red]{method} {url} failed: {resp.status_code}{detail}[/]")
raise typer.Exit(1)
return resp

View File

@@ -1,120 +0,0 @@
from __future__ import annotations
import typer
from decnet.env import DECNET_API_PORT, DECNET_WEB_HOST, DECNET_WEB_PORT
from . import utils as _utils
from .utils import console, log
def register(app: typer.Typer) -> None:
@app.command(name="web")
def serve_web(
web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"),
host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"),
api_port: int = typer.Option(DECNET_API_PORT, "--api-port", help="Port the DECNET API is listening on"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
) -> None:
"""Serve the DECNET Web Dashboard frontend.
Proxies /api/* requests to the API server so the frontend can use
relative URLs (/api/v1/...) with no CORS configuration required.
"""
import http.client
import http.server
import os
import socketserver
from pathlib import Path
dist_dir = Path(__file__).resolve().parent.parent.parent / "decnet_web" / "dist"
if not dist_dir.exists():
console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]")
raise typer.Exit(1)
if daemon:
log.info("web daemonizing host=%s port=%d api_port=%d", host, web_port, api_port)
_utils._daemonize()
_api_port = api_port
class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
def do_GET(self):
if self.path.startswith("/api/"):
self._proxy("GET")
return
path = self.translate_path(self.path)
if not Path(path).exists() or Path(path).is_dir():
self.path = "/index.html"
return super().do_GET()
def do_POST(self):
if self.path.startswith("/api/"):
self._proxy("POST")
return
self.send_error(405)
def do_PUT(self):
if self.path.startswith("/api/"):
self._proxy("PUT")
return
self.send_error(405)
def do_DELETE(self):
if self.path.startswith("/api/"):
self._proxy("DELETE")
return
self.send_error(405)
def _proxy(self, method: str) -> None:
content_length = int(self.headers.get("Content-Length", 0))
body = self.rfile.read(content_length) if content_length else None
forward = {k: v for k, v in self.headers.items()
if k.lower() not in ("host", "connection")}
try:
conn = http.client.HTTPConnection("127.0.0.1", _api_port, timeout=120)
conn.request(method, self.path, body=body, headers=forward)
resp = conn.getresponse()
self.send_response(resp.status)
for key, val in resp.getheaders():
if key.lower() not in ("connection", "transfer-encoding"):
self.send_header(key, val)
self.end_headers()
content_type = resp.getheader("Content-Type", "")
if "text/event-stream" in content_type:
conn.sock.settimeout(None)
_read = getattr(resp, "read1", resp.read)
while True:
chunk = _read(4096)
if not chunk:
break
self.wfile.write(chunk)
self.wfile.flush()
except Exception as exc:
log.warning("web proxy error %s %s: %s", method, self.path, exc)
self.send_error(502, f"API proxy error: {exc}")
finally:
try:
conn.close()
except Exception: # nosec B110 — best-effort conn cleanup
pass
def log_message(self, fmt: str, *args: object) -> None:
log.debug("web %s", fmt % args)
os.chdir(dist_dir)
socketserver.TCPServer.allow_reuse_address = True
with socketserver.ThreadingTCPServer((host, web_port), SPAHTTPRequestHandler) as httpd:
console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]")
console.print(f"[dim]Proxying /api/* → http://127.0.0.1:{_api_port}[/]")
try:
httpd.serve_forever()
except KeyboardInterrupt:
console.print("\n[dim]Shutting down dashboard server.[/]")

View File

@@ -1,142 +0,0 @@
from __future__ import annotations
from typing import Optional
import typer
from decnet.env import DECNET_INGEST_LOG_FILE
from . import utils as _utils
from .utils import console, log
def register(app: typer.Typer) -> None:
@app.command()
def probe(
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path for RFC 5424 syslog + .json output (reads attackers from .json, writes results to both)"),
interval: int = typer.Option(300, "--interval", "-i", help="Seconds between probe cycles (default: 300)"),
timeout: float = typer.Option(5.0, "--timeout", help="Per-probe TCP timeout in seconds"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background (used by deploy, no console output)"),
) -> None:
"""Fingerprint attackers (JARM + HASSH + TCP/IP stack) discovered in the log stream."""
import asyncio
from decnet.prober import prober_worker
if daemon:
log.info("probe daemonizing log_file=%s interval=%d", log_file, interval)
_utils._daemonize()
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
return
log.info("probe command invoked log_file=%s interval=%d", log_file, interval)
console.print(f"[bold cyan]DECNET-PROBER[/] watching {log_file} for attackers (interval: {interval}s)")
console.print("[dim]Press Ctrl+C to stop[/]")
try:
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
except KeyboardInterrupt:
console.print("\n[yellow]DECNET-PROBER stopped.[/]")
@app.command()
def collect(
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
) -> None:
"""Stream Docker logs from all running decky service containers to a log file."""
import asyncio
from decnet.collector import log_collector_worker
if daemon:
log.info("collect daemonizing log_file=%s", log_file)
_utils._daemonize()
log.info("collect command invoked log_file=%s", log_file)
console.print(f"[bold cyan]Collector starting[/] → {log_file}")
asyncio.run(log_collector_worker(log_file))
@app.command()
def mutate(
watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"),
decky_name: Optional[str] = typer.Option(None, "--decky", help="Force mutate a specific decky immediately"),
force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
) -> None:
"""Manually trigger or continuously watch for decky mutation."""
import asyncio
from decnet.mutator import mutate_decky, mutate_all, run_watch_loop
from decnet.web.dependencies import repo
if daemon:
log.info("mutate daemonizing watch=%s", watch)
_utils._daemonize()
async def _run() -> None:
await repo.initialize()
if watch:
await run_watch_loop(repo)
elif decky_name:
await mutate_decky(decky_name, repo)
elif force_all:
await mutate_all(force=True, repo=repo)
else:
await mutate_all(force=False, repo=repo)
asyncio.run(_run())
@app.command(name="correlate")
def correlate(
log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"),
min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"),
output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"),
emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
) -> None:
"""Analyse logs for cross-decky traversals and print the attacker movement graph."""
import sys
import json as _json
from pathlib import Path
from decnet.correlation.engine import CorrelationEngine
if daemon:
log.info("correlate daemonizing log_file=%s", log_file)
_utils._daemonize()
engine = CorrelationEngine()
if log_file:
path = Path(log_file)
if not path.exists():
console.print(f"[red]Log file not found: {log_file}[/]")
raise typer.Exit(1)
engine.ingest_file(path)
elif not sys.stdin.isatty():
for line in sys.stdin:
engine.ingest(line)
else:
console.print("[red]Provide --log-file or pipe log data via stdin.[/]")
raise typer.Exit(1)
traversals = engine.traversals(min_deckies)
if output == "json":
console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2))
elif output == "syslog":
for line in engine.traversal_syslog_lines(min_deckies):
typer.echo(line)
else:
if not traversals:
console.print(
f"[yellow]No traversals detected "
f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]"
)
else:
console.print(engine.report_table(min_deckies))
console.print(
f"[dim]Parsed {engine.lines_parsed} lines · "
f"indexed {engine.events_indexed} events · "
f"{len(engine.all_attackers())} unique IPs · "
f"[bold]{len(traversals)}[/] traversal(s)[/]"
)
if emit_syslog:
for line in engine.traversal_syslog_lines(min_deckies):
typer.echo(line)

View File

@@ -1,13 +0,0 @@
from decnet.collector.worker import (
is_service_container,
is_service_event,
log_collector_worker,
parse_rfc5424,
)
__all__ = [
"is_service_container",
"is_service_event",
"log_collector_worker",
"parse_rfc5424",
]

View File

@@ -1,368 +0,0 @@
"""
Host-side Docker log collector.
Streams stdout from all running decky service containers via the Docker SDK,
writes RFC 5424 lines to <log_file> and parsed JSON records to <log_file>.json.
The ingester tails the .json file; rsyslog can consume the .log file independently.
"""
import asyncio
import json
import os
import re
import threading
import time
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from pathlib import Path
from typing import Any, Optional
from decnet.logging import get_logger
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer, inject_context as _inject_ctx
logger = get_logger("collector")
# ─── Ingestion rate limiter ───────────────────────────────────────────────────
#
# Rationale: connection-lifecycle events (connect/disconnect/accept/close) are
# emitted once per TCP connection. During a portscan or credential-stuffing
# run, a single attacker can generate hundreds of these per second from the
# honeypot services themselves — each becoming a tiny WAL-write transaction
# through the ingester, starving reads until the queue drains.
#
# The collector still writes every line to the raw .log file (forensic record
# for rsyslog/SIEM). Only the .json path — which feeds SQLite — is deduped.
#
# Dedup key: (attacker_ip, decky, service, event_type)
# Window: DECNET_COLLECTOR_RL_WINDOW_SEC seconds (default 1.0)
# Scope: DECNET_COLLECTOR_RL_EVENT_TYPES comma list
# (default: connect,disconnect,connection,accept,close)
# Events outside that set bypass the limiter untouched.
def _parse_float_env(name: str, default: float) -> float:
raw = os.environ.get(name)
if raw is None:
return default
try:
value = float(raw)
except ValueError:
logger.warning("collector: invalid %s=%r, using default %s", name, raw, default)
return default
return max(0.0, value)
_RL_WINDOW_SEC: float = _parse_float_env("DECNET_COLLECTOR_RL_WINDOW_SEC", 1.0)
_RL_EVENT_TYPES: frozenset[str] = frozenset(
t.strip()
for t in os.environ.get(
"DECNET_COLLECTOR_RL_EVENT_TYPES",
"connect,disconnect,connection,accept,close",
).split(",")
if t.strip()
)
_RL_MAX_ENTRIES: int = 10_000
_rl_lock: threading.Lock = threading.Lock()
_rl_last: dict[tuple[str, str, str, str], float] = {}
def _should_ingest(parsed: dict[str, Any]) -> bool:
"""
Return True if this parsed event should be written to the JSON ingestion
stream. Rate-limited connection-lifecycle events return False when another
event with the same (attacker_ip, decky, service, event_type) was emitted
inside the dedup window.
"""
event_type = parsed.get("event_type", "")
if _RL_WINDOW_SEC <= 0.0 or event_type not in _RL_EVENT_TYPES:
return True
key = (
parsed.get("attacker_ip", "Unknown"),
parsed.get("decky", ""),
parsed.get("service", ""),
event_type,
)
now = time.monotonic()
with _rl_lock:
last = _rl_last.get(key, 0.0)
if now - last < _RL_WINDOW_SEC:
return False
_rl_last[key] = now
# Opportunistic GC: when the map grows past the cap, drop entries older
# than 60 windows (well outside any realistic in-flight dedup range).
if len(_rl_last) > _RL_MAX_ENTRIES:
cutoff = now - (_RL_WINDOW_SEC * 60.0)
stale = [k for k, t in _rl_last.items() if t < cutoff]
for k in stale:
del _rl_last[k]
return True
def _reset_rate_limiter() -> None:
"""Test-only helper — clear dedup state between test cases."""
with _rl_lock:
_rl_last.clear()
# ─── RFC 5424 parser ──────────────────────────────────────────────────────────
_RFC5424_RE = re.compile(
r"^<\d+>1 "
r"(\S+) " # 1: TIMESTAMP
r"(\S+) " # 2: HOSTNAME (decky name)
r"(\S+) " # 3: APP-NAME (service)
r"\S+ " # PROCID — NILVALUE ("-") for syslog_bridge emitters,
# real PID for native syslog callers like sshd/sudo
# routed through rsyslog. Accept both; we don't consume it.
r"(\S+) " # 4: MSGID (event_type)
r"(.+)$", # 5: SD element + optional MSG
)
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
# Free-form `key=value` pairs in the MSG body. Used for lines that bypass the
# syslog_bridge SD format — e.g. the SSH container's PROMPT_COMMAND which
# calls `logger -t bash "CMD uid=0 user=root src=1.2.3.4 pwd=/root cmd=…"`.
# Values run until the next whitespace, so `cmd=…` at end-of-line is preserved
# as one unit; we only care about IP-shaped fields here anyway.
_MSG_KV_RE = re.compile(r'(\w+)=(\S+)')
def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
"""
Parse an RFC 5424 DECNET log line into a structured dict.
Returns None if the line does not match the expected format.
"""
m = _RFC5424_RE.match(line)
if not m:
return None
ts_raw, decky, service, event_type, sd_rest = m.groups()
fields: dict[str, str] = {}
msg: str = ""
if sd_rest.startswith("-"):
msg = sd_rest[1:].lstrip()
elif sd_rest.startswith("["):
block = _SD_BLOCK_RE.search(sd_rest)
if block:
for k, v in _PARAM_RE.findall(block.group(1)):
fields[k] = v.replace('\\"', '"').replace("\\\\", "\\").replace("\\]", "]")
msg_match = re.search(r'\]\s+(.+)$', sd_rest)
if msg_match:
msg = msg_match.group(1).strip()
else:
msg = sd_rest
attacker_ip = "Unknown"
for fname in _IP_FIELDS:
if fname in fields:
attacker_ip = fields[fname]
break
# Fallback for plain `logger` callers that don't use SD params (notably
# the SSH container's bash PROMPT_COMMAND: `logger -t bash "CMD … src=IP …"`).
# Scan the MSG body for IP-shaped `key=value` tokens ONLY — don't fold
# them into `fields`, because the frontend's parseEventBody already
# renders kv pairs from the msg and doubling them up produces noisy
# duplicate pills. This keeps attacker attribution working without
# changing the shape of `fields` for non-SD lines.
if attacker_ip == "Unknown" and msg:
for k, v in _MSG_KV_RE.findall(msg):
if k in _IP_FIELDS:
attacker_ip = v
break
try:
ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S")
except ValueError:
ts_formatted = ts_raw
return {
"timestamp": ts_formatted,
"decky": decky,
"service": service,
"event_type": event_type,
"attacker_ip": attacker_ip,
"fields": fields,
"msg": msg,
"raw_line": line,
}
# ─── Container helpers ────────────────────────────────────────────────────────
def _load_service_container_names() -> set[str]:
"""
Return the exact set of service container names from decnet-state.json.
Format: {decky_name}-{service_name}, e.g. 'omega-decky-smtp'.
Returns an empty set if no state file exists.
"""
from decnet.config import load_state
state = load_state()
if state is None:
return set()
config, _ = state
names: set[str] = set()
for decky in config.deckies:
for svc in decky.services:
names.add(f"{decky.name}-{svc.replace('_', '-')}")
return names
def is_service_container(container) -> bool:
"""Return True if this Docker container is a known DECNET service container."""
name = (container if isinstance(container, str) else container.name).lstrip("/")
return name in _load_service_container_names()
def is_service_event(attrs: dict) -> bool:
"""Return True if a Docker start event is for a known DECNET service container."""
name = attrs.get("name", "").lstrip("/")
return name in _load_service_container_names()
# ─── Blocking stream worker (runs in a thread) ────────────────────────────────
def _reopen_if_needed(path: Path, fh: Optional[Any]) -> Any:
"""Return fh if it still points to the same inode as path; otherwise close
fh and open a fresh handle. Handles the file being deleted (manual rm) or
rotated (logrotate rename + create)."""
try:
if fh is not None and os.fstat(fh.fileno()).st_ino == os.stat(path).st_ino:
return fh
except OSError:
pass
# File gone or inode changed — close stale handle and open a new one.
if fh is not None:
try:
fh.close()
except Exception: # nosec B110 — best-effort file handle cleanup
pass
path.parent.mkdir(parents=True, exist_ok=True)
return open(path, "a", encoding="utf-8")
@_traced("collector.stream_container")
def _stream_container(container_id: str, log_path: Path, json_path: Path) -> None:
"""Stream logs from one container and append to the host log files."""
import docker # type: ignore[import]
lf: Optional[Any] = None
jf: Optional[Any] = None
try:
client = docker.from_env()
container = client.containers.get(container_id)
log_stream = container.logs(stream=True, follow=True, stdout=True, stderr=False)
buf = ""
for chunk in log_stream:
buf += chunk.decode("utf-8", errors="replace")
while "\n" in buf:
line, buf = buf.split("\n", 1)
line = line.rstrip()
if not line:
continue
lf = _reopen_if_needed(log_path, lf)
lf.write(line + "\n")
lf.flush()
parsed = parse_rfc5424(line)
if parsed:
if _should_ingest(parsed):
_tracer = _get_tracer("collector")
with _tracer.start_as_current_span("collector.event") as _span:
_span.set_attribute("decky", parsed.get("decky", ""))
_span.set_attribute("service", parsed.get("service", ""))
_span.set_attribute("event_type", parsed.get("event_type", ""))
_span.set_attribute("attacker_ip", parsed.get("attacker_ip", ""))
_inject_ctx(parsed)
logger.debug("collector: event written decky=%s type=%s", parsed.get("decky"), parsed.get("event_type"))
jf = _reopen_if_needed(json_path, jf)
jf.write(json.dumps(parsed) + "\n")
jf.flush()
else:
logger.debug(
"collector: rate-limited decky=%s service=%s type=%s attacker=%s",
parsed.get("decky"), parsed.get("service"),
parsed.get("event_type"), parsed.get("attacker_ip"),
)
else:
logger.debug("collector: malformed RFC5424 line snippet=%r", line[:80])
except Exception as exc:
logger.debug("collector: log stream ended container_id=%s reason=%s", container_id, exc)
finally:
for fh in (lf, jf):
if fh is not None:
try:
fh.close()
except Exception: # nosec B110 — best-effort file handle cleanup
pass
# ─── Async collector ──────────────────────────────────────────────────────────
async def log_collector_worker(log_file: str) -> None:
"""
Background task: streams Docker logs from all running decky service
containers, writing RFC 5424 lines to log_file and parsed JSON records
to log_file.json for the ingester to consume.
Watches Docker events to pick up containers started after initial scan.
"""
import docker # type: ignore[import]
log_path = Path(log_file)
json_path = log_path.with_suffix(".json")
log_path.parent.mkdir(parents=True, exist_ok=True)
active: dict[str, asyncio.Task[None]] = {}
loop = asyncio.get_running_loop()
# Dedicated thread pool so long-running container log streams don't
# saturate the default asyncio executor and starve short-lived
# to_thread() calls elsewhere (e.g. load_state in the web API).
collector_pool = ThreadPoolExecutor(
max_workers=64, thread_name_prefix="decnet-collector",
)
def _spawn(container_id: str, container_name: str) -> None:
if container_id not in active or active[container_id].done():
active[container_id] = asyncio.ensure_future(
loop.run_in_executor(
collector_pool, _stream_container,
container_id, log_path, json_path,
),
loop=loop,
)
logger.info("collector: streaming container=%s", container_name)
try:
logger.info("collector started log_path=%s", log_path)
client = docker.from_env()
for container in client.containers.list():
if is_service_container(container):
_spawn(container.id, container.name.lstrip("/"))
def _watch_events() -> None:
for event in client.events(
decode=True,
filters={"type": "container", "event": "start"},
):
attrs = event.get("Actor", {}).get("Attributes", {})
cid = event.get("id", "")
name = attrs.get("name", "")
if cid and is_service_event(attrs):
loop.call_soon_threadsafe(_spawn, cid, name)
await loop.run_in_executor(collector_pool, _watch_events)
except asyncio.CancelledError:
logger.info("collector shutdown requested cancelling %d tasks", len(active))
for task in active.values():
task.cancel()
collector_pool.shutdown(wait=False)
raise
except Exception as exc:
logger.error("collector error: %s", exc)
finally:
collector_pool.shutdown(wait=False)

View File

@@ -6,12 +6,6 @@ Network model:
All service containers for that decky share the base's network namespace
via `network_mode: "service:<base>"`. From the outside, every service on
a given decky appears to come from the same IP — exactly like a real host.
Logging model:
Service containers write RFC 5424 lines to stdout. Docker captures them
via the json-file driver. The host-side collector (decnet.web.collector)
streams those logs and writes them to the host log file for the ingester
and rsyslog to consume. No bind mounts or shared volumes are needed.
"""
from pathlib import Path
@@ -23,19 +17,35 @@ from decnet.network import MACVLAN_NETWORK_NAME
from decnet.os_fingerprint import get_os_sysctls
from decnet.services.registry import get_service
_DOCKER_LOGGING = {
"driver": "json-file",
"options": {
"max-size": "10m",
"max-file": "5",
},
}
_CONTAINER_LOG_DIR = "/var/log/decnet"
_LOG_NETWORK = "decnet_logs"
def _resolve_log_file(log_file: str) -> tuple[str, str]:
"""
Return (host_dir, container_log_path) for a user-supplied log file path.
The host path is resolved to absolute so Docker can bind-mount it.
All containers share the same host directory, mounted at _CONTAINER_LOG_DIR.
"""
host_path = Path(log_file).resolve()
host_dir = str(host_path.parent)
container_path = f"{_CONTAINER_LOG_DIR}/{host_path.name}"
return host_dir, container_path
def generate_compose(config: DecnetConfig) -> dict:
"""Build and return the full docker-compose data structure."""
services: dict = {}
log_host_dir: str | None = None
log_container_path: str | None = None
if config.log_file:
log_host_dir, log_container_path = _resolve_log_file(config.log_file)
# Ensure the host log directory exists so Docker doesn't create it as root-owned
Path(log_host_dir).mkdir(parents=True, exist_ok=True)
for decky in config.deckies:
base_key = decky.name # e.g. "decky-01"
@@ -52,6 +62,8 @@ def generate_compose(config: DecnetConfig) -> dict:
}
},
}
if config.log_target:
base["networks"][_LOG_NETWORK] = {}
# Inject TCP/IP stack sysctls to spoof the claimed OS fingerprint.
# Only the base container needs this — service containers inherit the
@@ -64,21 +76,24 @@ def generate_compose(config: DecnetConfig) -> dict:
# --- Service containers: share base network namespace ---
for svc_name in decky.services:
svc = get_service(svc_name)
if svc.fleet_singleton:
continue
svc_cfg = decky.service_config.get(svc_name, {})
fragment = svc.compose_fragment(decky.name, service_cfg=svc_cfg)
fragment = svc.compose_fragment(
decky.name, log_target=config.log_target, service_cfg=svc_cfg
)
# Inject the per-decky base image into build services so containers
# vary by distro and don't all fingerprint as debian:bookworm-slim.
# Services that need a fixed upstream image (e.g. conpot) can pre-set
# build.args.BASE_IMAGE in their compose_fragment() to opt out.
if "build" in fragment:
args = fragment["build"].setdefault("args", {})
args.setdefault("BASE_IMAGE", decky.build_base)
fragment["build"].setdefault("args", {})["BASE_IMAGE"] = decky.build_base
fragment.setdefault("environment", {})
fragment["environment"]["HOSTNAME"] = decky.hostname
if log_host_dir and log_container_path:
fragment["environment"]["DECNET_LOG_FILE"] = log_container_path
fragment.setdefault("volumes", [])
mount = f"{log_host_dir}:{_CONTAINER_LOG_DIR}"
if mount not in fragment["volumes"]:
fragment["volumes"].append(mount)
# Share the base container's network — no own IP needed
fragment["network_mode"] = f"service:{base_key}"
@@ -88,9 +103,6 @@ def generate_compose(config: DecnetConfig) -> dict:
fragment.pop("hostname", None)
fragment.pop("networks", None)
# Rotate Docker logs so disk usage is bounded
fragment["logging"] = _DOCKER_LOGGING
services[f"{decky.name}-{svc_name}"] = fragment
# Network definitions
@@ -99,6 +111,8 @@ def generate_compose(config: DecnetConfig) -> dict:
"external": True, # created by network.py before compose up
}
}
if config.log_target:
networks[_LOG_NETWORK] = {"driver": "bridge", "internal": True}
return {
"version": "3.8",

View File

@@ -4,117 +4,61 @@ State is persisted to decnet-state.json in the working directory.
"""
import json
import logging
import os
import socket as _socket
from datetime import datetime, timezone
from pathlib import Path
from typing import Literal
from decnet.models import DeckyConfig, DecnetConfig # noqa: F401
from pydantic import BaseModel, field_validator
from decnet.distros import random_hostname as _random_hostname
# ---------------------------------------------------------------------------
# RFC 5424 syslog formatter
# ---------------------------------------------------------------------------
# Severity mapping: Python level → syslog severity (RFC 5424 §6.2.1)
_SYSLOG_SEVERITY: dict[int, int] = {
logging.CRITICAL: 2, # Critical
logging.ERROR: 3, # Error
logging.WARNING: 4, # Warning
logging.INFO: 6, # Informational
logging.DEBUG: 7, # Debug
}
_FACILITY_LOCAL0 = 16 # local0 (RFC 5424 §6.2.1 / POSIX)
class Rfc5424Formatter(logging.Formatter):
"""Formats log records as RFC 5424 syslog messages.
Output:
<PRIVAL>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID STRUCTURED-DATA MSG
Example:
<134>1 2026-04-12T21:48:03.123456+00:00 host decnet 1234 decnet.config - Dev mode active
"""
_hostname: str = _socket.gethostname()
_app: str = "decnet"
def format(self, record: logging.LogRecord) -> str:
severity = _SYSLOG_SEVERITY.get(record.levelno, 6)
prival = (_FACILITY_LOCAL0 * 8) + severity
ts = datetime.fromtimestamp(record.created, tz=timezone.utc).isoformat(timespec="microseconds")
msg = record.getMessage()
if record.exc_info:
msg += "\n" + self.formatException(record.exc_info)
app = getattr(record, "decnet_component", self._app)
return (
f"<{prival}>1 {ts} {self._hostname} {app}"
f" {os.getpid()} {record.name} - {msg}"
)
def _configure_logging(dev: bool) -> None:
"""Install RFC 5424 handlers on the root logger (idempotent).
Always adds a StreamHandler (stderr). Also adds a RotatingFileHandler
writing to DECNET_SYSTEM_LOGS (default: decnet.system.log in $PWD) so
all microservice daemons — which redirect stderr to /dev/null — still
produce readable logs. File handler is skipped under pytest.
"""
from decnet.logging.inode_aware_handler import InodeAwareRotatingFileHandler
root = logging.getLogger()
# Guard: if our StreamHandler is already installed, all handlers are set.
if any(isinstance(h, logging.StreamHandler) and isinstance(h.formatter, Rfc5424Formatter)
for h in root.handlers):
return
fmt = Rfc5424Formatter()
root.setLevel(logging.DEBUG if dev else logging.INFO)
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(fmt)
root.addHandler(stream_handler)
# Skip the file handler during pytest runs to avoid polluting the test cwd.
_in_pytest = any(k.startswith("PYTEST") for k in os.environ)
if not _in_pytest:
_log_path = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.log")
file_handler = InodeAwareRotatingFileHandler(
_log_path,
mode="a",
maxBytes=10 * 1024 * 1024, # 10 MB
backupCount=5,
encoding="utf-8",
)
file_handler.setFormatter(fmt)
root.addHandler(file_handler)
# Drop root ownership when invoked via sudo so non-root follow-up
# commands (e.g. `decnet api` after `sudo decnet deploy`) can append.
from decnet.privdrop import chown_to_invoking_user
chown_to_invoking_user(_log_path)
_dev = os.environ.get("DECNET_DEVELOPER", "").lower() == "true"
_configure_logging(_dev)
log = logging.getLogger(__name__)
if _dev:
log.debug("Developer mode: debug logging active")
# Calculate absolute path to the project root (where the config file resides)
_ROOT: Path = Path(__file__).parent.parent.absolute()
STATE_FILE: Path = _ROOT / "decnet-state.json"
DEFAULT_MUTATE_INTERVAL: int = 30 # default rotation interval in minutes
STATE_FILE = Path("decnet-state.json")
def random_hostname(distro_slug: str = "debian") -> str:
return _random_hostname(distro_slug)
class DeckyConfig(BaseModel):
name: str
ip: str
services: list[str]
distro: str # slug from distros.DISTROS, e.g. "debian", "ubuntu22"
base_image: str # Docker image for the base/IP-holder container
build_base: str = "debian:bookworm-slim" # apt-compatible image for service Dockerfiles
hostname: str
archetype: str | None = None # archetype slug if spawned from an archetype profile
service_config: dict[str, dict] = {} # optional per-service persona config
nmap_os: str = "linux" # OS family for TCP/IP stack spoofing (see os_fingerprint.py)
@field_validator("services")
@classmethod
def services_not_empty(cls, v: list[str]) -> list[str]:
if not v:
raise ValueError("A decky must have at least one service.")
return v
class DecnetConfig(BaseModel):
mode: Literal["unihost", "swarm"]
interface: str
subnet: str
gateway: str
deckies: list[DeckyConfig]
log_target: str | None = None # "ip:port" or None
log_file: str | None = None # path for RFC 5424 syslog file output
ipvlan: bool = False # use IPvlan L2 instead of MACVLAN (WiFi-friendly)
@field_validator("log_target")
@classmethod
def validate_log_target(cls, v: str | None) -> str | None:
if v is None:
return v
parts = v.rsplit(":", 1)
if len(parts) != 2 or not parts[1].isdigit():
raise ValueError("log_target must be in ip:port format, e.g. 192.168.1.5:5140")
return v
def save_state(config: DecnetConfig, compose_path: Path) -> None:
payload = {
"config": config.model_dump(),

View File

@@ -1,90 +0,0 @@
"""Parse /etc/decnet/decnet.ini and seed os.environ defaults.
The INI file is a convenience layer on top of the existing DECNET_* env
vars. It never overrides an explicit environment variable (uses
os.environ.setdefault). Call load_ini_config() once, very early, before
any decnet.env import, so env.py picks up the seeded values as if they
had been exported by the shell.
Shape::
[decnet]
mode = agent # or "master"
log-directory = /var/log/decnet
disallow-master = true
[agent]
master-host = 192.168.1.50
master-port = 8770
agent-port = 8765
agent-dir = /home/anti/.decnet/agent
...
[master]
api-host = 0.0.0.0
swarmctl-port = 8770
listener-port = 6514
...
Only the section matching `mode` is loaded. The other section is
ignored silently so an agent host never reads master secrets (and
vice versa). Keys are converted to SCREAMING_SNAKE_CASE and prefixed
with ``DECNET_`` — e.g. ``master-host`` → ``DECNET_MASTER_HOST``.
"""
from __future__ import annotations
import configparser
import os
from pathlib import Path
from typing import Optional
DEFAULT_CONFIG_PATH = Path("/etc/decnet/decnet.ini")
# The [decnet] section keys are role-agnostic and always exported.
_COMMON_KEYS = frozenset({"mode", "disallow-master", "log-directory"})
def _key_to_env(key: str) -> str:
return "DECNET_" + key.replace("-", "_").upper()
def load_ini_config(path: Optional[Path] = None) -> Optional[Path]:
"""Seed os.environ defaults from the DECNET INI file.
Returns the path that was actually loaded (so callers can log it), or
None if no file was read. Missing file is a no-op — callers fall back
to env vars / CLI flags / hardcoded defaults.
Precedence: real os.environ > INI > defaults. Real env vars are never
overwritten because we use setdefault().
"""
if path is None:
override = os.environ.get("DECNET_CONFIG")
path = Path(override) if override else DEFAULT_CONFIG_PATH
if not path.is_file():
return None
parser = configparser.ConfigParser()
parser.read(path)
# [decnet] first — mode/disallow-master/log-directory. These seed the
# mode decision for the section selection below.
if parser.has_section("decnet"):
for key, value in parser.items("decnet"):
os.environ.setdefault(_key_to_env(key), value)
mode = os.environ.get("DECNET_MODE", "master").lower()
if mode not in ("agent", "master"):
raise ValueError(
f"decnet.ini: [decnet] mode must be 'agent' or 'master', got '{mode}'"
)
# Role-specific section.
section = mode
if parser.has_section(section):
for key, value in parser.items(section):
os.environ.setdefault(_key_to_env(key), value)
return path

View File

@@ -5,9 +5,9 @@ from decnet.correlation.graph import AttackerTraversal, TraversalHop
from decnet.correlation.parser import LogEvent, parse_line
__all__ = [
"AttackerTraversal",
"CorrelationEngine",
"LogEvent",
"AttackerTraversal",
"TraversalHop",
"LogEvent",
"parse_line",
]

View File

@@ -22,6 +22,7 @@ Usage
from __future__ import annotations
import json
from collections import defaultdict
from pathlib import Path
@@ -33,7 +34,6 @@ from decnet.logging.syslog_formatter import (
SEVERITY_WARNING,
format_rfc5424,
)
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
class CorrelationEngine:
@@ -65,7 +65,6 @@ class CorrelationEngine:
self.events_indexed += 1
return event
@_traced("correlation.ingest_file")
def ingest_file(self, path: Path) -> int:
"""
Parse every line of *path* and index it.
@@ -75,18 +74,12 @@ class CorrelationEngine:
with open(path) as fh:
for line in fh:
self.ingest(line)
_tracer = _get_tracer("correlation")
with _tracer.start_as_current_span("correlation.ingest_file.summary") as _span:
_span.set_attribute("lines_parsed", self.lines_parsed)
_span.set_attribute("events_indexed", self.events_indexed)
_span.set_attribute("unique_ips", len(self._events))
return self.events_indexed
# ------------------------------------------------------------------ #
# Query #
# ------------------------------------------------------------------ #
@_traced("correlation.traversals")
def traversals(self, min_deckies: int = 2) -> list[AttackerTraversal]:
"""
Return all attackers that touched at least *min_deckies* distinct
@@ -143,7 +136,6 @@ class CorrelationEngine:
)
return table
@_traced("correlation.report_json")
def report_json(self, min_deckies: int = 2) -> dict:
"""Serialisable dict representation of all traversals."""
return {
@@ -156,7 +148,6 @@ class CorrelationEngine:
"traversals": [t.to_dict() for t in self.traversals(min_deckies)],
}
@_traced("correlation.traversal_syslog_lines")
def traversal_syslog_lines(self, min_deckies: int = 2) -> list[str]:
"""
Emit one RFC 5424 syslog line per detected traversal.

View File

@@ -6,7 +6,7 @@ the fields needed for cross-decky correlation: attacker IP, decky name,
service, event type, and timestamp.
Log format (produced by decnet.logging.syslog_formatter):
<PRI>1 TIMESTAMP HOSTNAME APP-NAME - MSGID [relay@55555 k1="v1" k2="v2"] [MSG]
<PRI>1 TIMESTAMP HOSTNAME APP-NAME - MSGID [decnet@55555 k1="v1" k2="v2"] [MSG]
The attacker IP may appear under several field names depending on service:
src_ip — ftp, smtp, http, most services
@@ -17,7 +17,7 @@ The attacker IP may appear under several field names depending on service:
from __future__ import annotations
import re
from dataclasses import dataclass
from dataclasses import dataclass, field
from datetime import datetime
# RFC 5424 line structure
@@ -31,14 +31,14 @@ _RFC5424_RE = re.compile(
r"(.+)$", # 5: SD element + optional MSG
)
# Structured data block: [relay@55555 k="v" ...]
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
# Structured data block: [decnet@55555 k="v" ...]
_SD_BLOCK_RE = re.compile(r'\[decnet@55555\s+(.*?)\]', re.DOTALL)
# Individual param: key="value" (with escaped chars inside value)
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
# Field names to probe for attacker IP, in priority order
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip")
@dataclass

View File

@@ -2,8 +2,7 @@
Deploy, teardown, and status via Docker SDK + subprocess docker compose.
"""
import shutil
import subprocess # nosec B404
import subprocess
import time
from pathlib import Path
@@ -11,14 +10,15 @@ import docker
from rich.console import Console
from rich.table import Table
from decnet.logging import get_logger
from decnet.telemetry import traced as _traced
from decnet.config import DecnetConfig, clear_state, load_state, save_state
from decnet.composer import write_compose
from decnet.network import (
MACVLAN_NETWORK_NAME,
allocate_ips,
create_ipvlan_network,
create_macvlan_network,
detect_interface,
detect_subnet,
get_host_ip,
ips_to_range,
remove_macvlan_network,
@@ -28,51 +28,13 @@ from decnet.network import (
teardown_host_macvlan,
)
log = get_logger("engine")
console = Console()
COMPOSE_FILE = Path("decnet-compose.yml")
_CANONICAL_LOGGING = Path(__file__).parent.parent / "templates" / "syslog_bridge.py"
def _sync_logging_helper(config: DecnetConfig) -> None:
"""Copy the canonical syslog_bridge.py into every active template build context."""
from decnet.services.registry import get_service
seen: set[Path] = set()
for decky in config.deckies:
for svc_name in decky.services:
svc = get_service(svc_name)
if svc is None:
continue
ctx = svc.dockerfile_context()
if ctx is None or ctx in seen:
continue
seen.add(ctx)
dest = ctx / "syslog_bridge.py"
if not dest.exists() or dest.read_bytes() != _CANONICAL_LOGGING.read_bytes():
shutil.copy2(_CANONICAL_LOGGING, dest)
def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
import os
# -p decnet pins the compose project name. Without it, docker compose
# derives the project from basename($PWD); when a daemon (systemd) runs
# with WorkingDirectory=/ that basename is empty and compose aborts with
# "project name must not be empty".
cmd = ["docker", "compose", "-p", "decnet", "-f", str(compose_file), *args]
merged = {**os.environ, **(env or {})}
result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603
if result.stdout:
print(result.stdout, end="")
if result.returncode != 0:
# Docker emits the useful detail ("Address already in use", which IP,
# which port) on stderr. Surface it to the structured log so the
# agent's journal carries it — without this the upstream traceback
# just shows the exit code.
if result.stderr:
log.error("docker compose %s failed: %s", " ".join(args), result.stderr.strip())
raise subprocess.CalledProcessError(
result.returncode, cmd, result.stdout, result.stderr
)
def _compose(*args: str, compose_file: Path = COMPOSE_FILE) -> None:
cmd = ["docker", "compose", "-f", str(compose_file), *args]
subprocess.run(cmd, check=True)
_PERMANENT_ERRORS = (
@@ -84,25 +46,17 @@ _PERMANENT_ERRORS = (
)
@_traced("engine.compose_with_retry")
def _compose_with_retry(
*args: str,
compose_file: Path = COMPOSE_FILE,
retries: int = 3,
delay: float = 5.0,
env: dict | None = None,
) -> None:
"""Run a docker compose command, retrying on transient failures."""
import os
last_exc: subprocess.CalledProcessError | None = None
# -p decnet pins the compose project name. Without it, docker compose
# derives the project from basename($PWD); when a daemon (systemd) runs
# with WorkingDirectory=/ that basename is empty and compose aborts with
# "project name must not be empty".
cmd = ["docker", "compose", "-p", "decnet", "-f", str(compose_file), *args]
merged = {**os.environ, **(env or {})}
cmd = ["docker", "compose", "-f", str(compose_file), *args]
for attempt in range(1, retries + 1):
result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
if result.stdout:
print(result.stdout, end="")
@@ -126,21 +80,16 @@ def _compose_with_retry(
else:
if result.stderr:
console.print(f"[red]{result.stderr.strip()}[/]")
log.error("docker compose %s failed after %d attempts: %s",
" ".join(args), retries, result.stderr.strip())
raise last_exc
@_traced("engine.deploy")
def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False, parallel: bool = False) -> None:
log.info("deployment started n_deckies=%d interface=%s subnet=%s dry_run=%s", len(config.deckies), config.interface, config.subnet, dry_run)
log.debug("deploy: deckies=%s", [d.name for d in config.deckies])
def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False) -> None:
client = docker.from_env()
# --- Network setup ---
ip_list = [d.ip for d in config.deckies]
decky_range = ips_to_range(ip_list)
host_ip = get_host_ip(config.interface)
log.debug("deploy: ip_range=%s host_ip=%s", decky_range, host_ip)
net_driver = "IPvlan L2" if config.ipvlan else "MACVLAN"
console.print(f"[bold cyan]Creating {net_driver} network[/] ({MACVLAN_NETWORK_NAME}) on {config.interface}")
@@ -164,53 +113,30 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False,
)
setup_host_macvlan(config.interface, host_ip, decky_range)
_sync_logging_helper(config)
# --- Compose generation ---
compose_path = write_compose(config, COMPOSE_FILE)
console.print(f"[bold cyan]Compose file written[/] → {compose_path}")
if dry_run:
log.info("deployment dry-run complete compose_path=%s", compose_path)
console.print("[yellow]Dry run — no containers started.[/]")
return
# --- Save state before bring-up ---
save_state(config, compose_path)
# Pre-up cleanup: a prior half-failed `up` can leave containers still
# holding the IPs/ports this run wants, which surfaces as the recurring
# "Address already in use" from Docker's IPAM. Best-effort — ignore
# failure (e.g. nothing to tear down on a clean host).
try:
_compose("down", "--remove-orphans", compose_file=compose_path)
except subprocess.CalledProcessError:
log.debug("pre-up cleanup: compose down failed (likely nothing to remove)")
build_env = {"DOCKER_BUILDKIT": "1"} if parallel else {}
# --- Bring up ---
console.print("[bold cyan]Building images and starting deckies...[/]")
build_args = ["build"]
if no_cache:
build_args.append("--no-cache")
if parallel:
console.print("[bold cyan]Parallel build enabled — building all images concurrently...[/]")
_compose_with_retry(*build_args, compose_file=compose_path, env=build_env)
_compose_with_retry("up", "-d", compose_file=compose_path, env=build_env)
else:
if no_cache:
_compose_with_retry("build", "--no-cache", compose_file=compose_path)
_compose_with_retry("up", "--build", "-d", compose_file=compose_path)
log.info("deployment complete n_deckies=%d", len(config.deckies))
# --- Status summary ---
_print_status(config)
@_traced("engine.teardown")
def teardown(decky_id: str | None = None) -> None:
log.info("teardown requested decky_id=%s", decky_id or "all")
state = load_state()
if state is None:
log.warning("teardown: no active deployment found")
console.print("[red]No active deployment found (no decnet-state.json).[/]")
return
@@ -218,13 +144,10 @@ def teardown(decky_id: str | None = None) -> None:
client = docker.from_env()
if decky_id:
decky = next((d for d in config.deckies if d.name == decky_id), None)
if decky is None:
console.print(f"[red]Decky '{decky_id}' not found in current deployment.[/]")
return
svc_names = [f"{decky_id}-{svc}" for svc in decky.services]
# Bring down only the services matching this decky
svc_names = [f"{decky_id}-{svc}" for svc in [d.services for d in config.deckies if d.name == decky_id]]
if not svc_names:
log.warning("teardown: decky %s has no services to stop", decky_id)
console.print(f"[red]Decky '{decky_id}' not found in current deployment.[/]")
return
_compose("stop", *svc_names, compose_file=compose_path)
_compose("rm", "-f", *svc_names, compose_file=compose_path)
@@ -239,9 +162,7 @@ def teardown(decky_id: str | None = None) -> None:
teardown_host_macvlan(decky_range)
remove_macvlan_network(client)
clear_state()
net_driver = "IPvlan" if config.ipvlan else "MACVLAN"
log.info("teardown complete all deckies removed network_driver=%s", net_driver)
console.print(f"[green]All deckies torn down. {net_driver} network removed.[/]")
@@ -261,7 +182,7 @@ def status() -> None:
table.add_column("Hostname")
table.add_column("Status")
running = {c.name: c.status for c in client.containers.list(all=True, ignore_removed=True)}
running = {c.name: c.status for c in client.containers.list(all=True)}
for decky in config.deckies:
statuses = []

View File

@@ -97,8 +97,8 @@ def random_hostname(distro_slug: str = "debian") -> str:
"""Generate a plausible hostname for the given distro style."""
profile = DISTROS.get(distro_slug)
style = profile.hostname_style if profile else "generic"
word = random.choice(_NAME_WORDS) # nosec B311
num = random.randint(10, 99) # nosec B311
word = random.choice(_NAME_WORDS)
num = random.randint(10, 99)
if style == "rhel":
# RHEL/CentOS/Fedora convention: word+num.localdomain
@@ -107,7 +107,7 @@ def random_hostname(distro_slug: str = "debian") -> str:
return f"{word}-{num}"
elif style == "rolling":
# Kali/Arch: just a word, no suffix
return f"{word}-{random.choice(_NAME_WORDS)}" # nosec B311
return f"{word}-{random.choice(_NAME_WORDS)}"
else:
# Debian/Ubuntu: SRV-WORD-nn
return f"SRV-{word.upper()}-{num}"
@@ -122,7 +122,7 @@ def get_distro(slug: str) -> DistroProfile:
def random_distro() -> DistroProfile:
return random.choice(list(DISTROS.values())) # nosec B311
return random.choice(list(DISTROS.values()))
def all_distros() -> dict[str, DistroProfile]:

View File

@@ -1,15 +0,0 @@
from decnet.engine.deployer import (
COMPOSE_FILE,
_compose_with_retry,
deploy,
status,
teardown,
)
__all__ = [
"COMPOSE_FILE",
"_compose_with_retry",
"deploy",
"status",
"teardown",
]

View File

@@ -1,153 +0,0 @@
import os
from pathlib import Path
from typing import Optional
from dotenv import load_dotenv
# Calculate absolute path to the project root
_ROOT: Path = Path(__file__).parent.parent.absolute()
# Load .env.local first, then fallback to .env.
# Also check CWD so deployments that install into site-packages (e.g. the
# self-updater's release slots) can ship a per-host .env.local at the
# process's working directory without having to edit site-packages.
load_dotenv(_ROOT / ".env.local")
load_dotenv(_ROOT / ".env")
load_dotenv(Path.cwd() / ".env.local")
load_dotenv(Path.cwd() / ".env")
def _port(name: str, default: int) -> int:
raw = os.environ.get(name, str(default))
try:
value = int(raw)
except ValueError:
raise ValueError(f"Environment variable '{name}' must be an integer, got '{raw}'.")
if not (1 <= value <= 65535):
raise ValueError(f"Environment variable '{name}' must be 165535, got {value}.")
return value
def _require_env(name: str) -> str:
"""Return the env var value or raise at startup if it is unset or a known-bad default."""
_KNOWN_BAD = {"fallback-secret-key-change-me", "admin", "secret", "password", "changeme"}
value = os.environ.get(name)
if not value:
raise ValueError(
f"Required environment variable '{name}' is not set. "
f"Set it in .env.local or export it before starting DECNET."
)
if any(k.startswith("PYTEST") for k in os.environ):
return value
if value.lower() in _KNOWN_BAD:
raise ValueError(
f"Environment variable '{name}' is set to an insecure default ('{value}'). "
f"Choose a strong, unique value before starting DECNET."
)
if name == "DECNET_JWT_SECRET" and len(value) < 32:
_developer = os.environ.get("DECNET_DEVELOPER", "False").lower() == "true"
if not _developer:
raise ValueError(
f"DECNET_JWT_SECRET is too short ({len(value)} bytes). "
f"Use at least 32 characters to satisfy HS256 requirements (RFC 7518 §3.2)."
)
return value
# System logging — all microservice daemons append here.
DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.log")
# Set to "true" to embed the profiler inside the API process.
# Leave unset (default) when the standalone `decnet profiler --daemon` is
# running — embedding both produces two workers sharing the same DB cursor,
# which causes events to be skipped or processed twice.
DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
# Set to "true" to embed the MACVLAN sniffer inside the API process.
# Leave unset (default) when the standalone `decnet sniffer --daemon` is
# running (which `decnet deploy` always does). Embedding both produces two
# workers sniffing the same interface — duplicated events and wasted CPU.
DECNET_EMBED_SNIFFER: bool = os.environ.get("DECNET_EMBED_SNIFFER", "").lower() == "true"
# Set to "true" to mount the Pyinstrument ASGI middleware on the FastAPI app.
# Produces per-request HTML flamegraphs under ./profiles/. Off by default so
# production and normal dev runs pay zero profiling overhead.
DECNET_PROFILE_REQUESTS: bool = os.environ.get("DECNET_PROFILE_REQUESTS", "").lower() == "true"
DECNET_PROFILE_DIR: str = os.environ.get("DECNET_PROFILE_DIR", "profiles")
# API Options
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "127.0.0.1")
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
# DECNET_JWT_SECRET is resolved lazily via module __getattr__ so that agent /
# updater / swarmctl subcommands (which never touch auth) can start without
# the master's JWT secret being present in the environment.
DECNET_INGEST_LOG_FILE: str | None = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
# SWARM log pipeline — RFC 5425 syslog-over-TLS between worker forwarders
# and the master listener. Plaintext syslog across hosts is forbidden.
DECNET_SWARM_SYSLOG_PORT: int = _port("DECNET_SWARM_SYSLOG_PORT", 6514)
DECNET_SWARM_MASTER_HOST: str | None = os.environ.get("DECNET_SWARM_MASTER_HOST")
# Worker-side identity + swarmctl locator, seeded by the enroll bundle's
# /etc/decnet/decnet.ini ([agent] host-uuid / master-host / swarmctl-port).
# The agent heartbeat loop uses these to self-identify to the master.
DECNET_HOST_UUID: str | None = os.environ.get("DECNET_HOST_UUID")
DECNET_MASTER_HOST: str | None = os.environ.get("DECNET_MASTER_HOST")
DECNET_SWARMCTL_PORT: int = _port("DECNET_SWARMCTL_PORT", 8770)
# Ingester batching: how many log rows to accumulate per commit, and the
# max wait (ms) before flushing a partial batch. Larger batches reduce
# SQLite write-lock contention; the timeout keeps latency bounded during
# low-traffic periods.
DECNET_BATCH_SIZE: int = int(os.environ.get("DECNET_BATCH_SIZE", "100"))
DECNET_BATCH_MAX_WAIT_MS: int = int(os.environ.get("DECNET_BATCH_MAX_WAIT_MS", "250"))
# Web Dashboard Options
DECNET_WEB_HOST: str = os.environ.get("DECNET_WEB_HOST", "127.0.0.1")
DECNET_WEB_PORT: int = _port("DECNET_WEB_PORT", 8080)
DECNET_ADMIN_USER: str = os.environ.get("DECNET_ADMIN_USER", "admin")
DECNET_ADMIN_PASSWORD: str = os.environ.get("DECNET_ADMIN_PASSWORD", "admin")
DECNET_DEVELOPER: bool = os.environ.get("DECNET_DEVELOPER", "False").lower() == "true"
# Host role — seeded by /etc/decnet/decnet.ini or exported directly.
# "master" = the central server (api, web, swarmctl, listener).
# "agent" = a worker node (agent, forwarder, updater). Workers gate their
# Typer CLI to hide master-only commands (see decnet/cli.py).
DECNET_MODE: str = os.environ.get("DECNET_MODE", "master").lower()
# When mode=agent, hide master-only Typer commands. Set to "false" for dual-
# role dev hosts where a single machine plays both sides.
DECNET_DISALLOW_MASTER: bool = (
os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true"
)
# Tracing — set to "true" to enable OpenTelemetry distributed tracing.
# Separate from DECNET_DEVELOPER so tracing can be toggled independently.
DECNET_DEVELOPER_TRACING: bool = os.environ.get("DECNET_DEVELOPER_TRACING", "").lower() == "true"
DECNET_OTEL_ENDPOINT: str = os.environ.get("DECNET_OTEL_ENDPOINT", "http://localhost:4317")
# Database Options
DECNET_DB_TYPE: str = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
DECNET_DB_URL: Optional[str] = os.environ.get("DECNET_DB_URL")
# MySQL component vars (used only when DECNET_DB_URL is not set)
DECNET_DB_HOST: str = os.environ.get("DECNET_DB_HOST", "localhost")
DECNET_DB_PORT: int = _port("DECNET_DB_PORT", 3306) if os.environ.get("DECNET_DB_PORT") else 3306
DECNET_DB_NAME: str = os.environ.get("DECNET_DB_NAME", "decnet")
DECNET_DB_USER: str = os.environ.get("DECNET_DB_USER", "decnet")
DECNET_DB_PASSWORD: Optional[str] = os.environ.get("DECNET_DB_PASSWORD")
# CORS — comma-separated list of allowed origins for the web dashboard API.
# Defaults to the configured web host/port. Override with DECNET_CORS_ORIGINS if needed.
# Example: DECNET_CORS_ORIGINS=http://192.168.1.50:9090,https://dashboard.example.com
_WILDCARD_ADDRS = {"0.0.0.0", "127.0.0.1", "::"} # nosec B104 — comparison only, not a bind
_web_hostname: str = "localhost" if DECNET_WEB_HOST in _WILDCARD_ADDRS else DECNET_WEB_HOST
_cors_default: str = f"http://{_web_hostname}:{DECNET_WEB_PORT}"
_cors_raw: str = os.environ.get("DECNET_CORS_ORIGINS", _cors_default)
DECNET_CORS_ORIGINS: list[str] = [o.strip() for o in _cors_raw.split(",") if o.strip()]
def __getattr__(name: str) -> str:
"""Lazy resolution for secrets only the master web/api process needs."""
if name == "DECNET_JWT_SECRET":
return _require_env("DECNET_JWT_SECRET")
raise AttributeError(f"module 'decnet.env' has no attribute {name!r}")

View File

@@ -1,177 +0,0 @@
"""
Fleet builder — shared logic for constructing DeckyConfig lists.
Used by both the CLI and the web API router to build deckies from
flags or INI config. Lives here (not in cli.py) so that the web layer
and the mutation engine can import it without depending on the CLI.
"""
import random
from typing import Optional
from decnet.archetypes import Archetype, get_archetype
from decnet.config import DeckyConfig, random_hostname
from decnet.distros import all_distros, get_distro, random_distro
from decnet.models import IniConfig
from decnet.services.registry import all_services
def all_service_names() -> list[str]:
"""Return all registered per-decky service names (excludes fleet singletons)."""
return sorted(
name for name, svc in all_services().items()
if not svc.fleet_singleton
)
def resolve_distros(
distros_explicit: list[str] | None,
randomize_distros: bool,
n: int,
archetype: Archetype | None = None,
) -> list[str]:
"""Return a list of n distro slugs based on flags or archetype preference."""
if distros_explicit:
return [distros_explicit[i % len(distros_explicit)] for i in range(n)]
if randomize_distros:
return [random_distro().slug for _ in range(n)]
if archetype:
pool = archetype.preferred_distros
return [pool[i % len(pool)] for i in range(n)]
slugs = list(all_distros().keys())
return [slugs[i % len(slugs)] for i in range(n)]
def build_deckies(
n: int,
ips: list[str],
services_explicit: list[str] | None,
randomize_services: bool,
distros_explicit: list[str] | None = None,
randomize_distros: bool = False,
archetype: Archetype | None = None,
mutate_interval: Optional[int] = None,
) -> list[DeckyConfig]:
"""Build a list of DeckyConfigs from CLI-style flags."""
deckies = []
used_combos: set[frozenset] = set()
distro_slugs = resolve_distros(distros_explicit, randomize_distros, n, archetype)
for i, ip in enumerate(ips):
name = f"decky-{i + 1:02d}"
distro = get_distro(distro_slugs[i])
hostname = random_hostname(distro.slug)
if services_explicit:
svc_list = services_explicit
elif archetype:
svc_list = list(archetype.services)
elif randomize_services:
svc_pool = all_service_names()
attempts = 0
while True:
count = random.randint(1, min(3, len(svc_pool))) # nosec B311
chosen = frozenset(random.sample(svc_pool, count)) # nosec B311
attempts += 1
if chosen not in used_combos or attempts > 20:
break
svc_list = list(chosen)
used_combos.add(chosen)
else:
raise ValueError("Provide services_explicit, archetype, or randomize_services=True.")
deckies.append(
DeckyConfig(
name=name,
ip=ip,
services=svc_list,
distro=distro.slug,
base_image=distro.image,
build_base=distro.build_base,
hostname=hostname,
archetype=archetype.slug if archetype else None,
nmap_os=archetype.nmap_os if archetype else "linux",
mutate_interval=mutate_interval,
)
)
return deckies
def build_deckies_from_ini(
ini: IniConfig,
subnet_cidr: str,
gateway: str,
host_ip: str,
randomize: bool,
cli_mutate_interval: int | None = None,
) -> list[DeckyConfig]:
"""Build DeckyConfig list from an IniConfig, auto-allocating missing IPs."""
from ipaddress import IPv4Address, IPv4Network
import time
now = time.time()
explicit_ips: set[IPv4Address] = {
IPv4Address(s.ip) for s in ini.deckies if s.ip
}
net = IPv4Network(subnet_cidr, strict=False)
reserved = {
net.network_address,
net.broadcast_address,
IPv4Address(gateway),
IPv4Address(host_ip),
} | explicit_ips
auto_pool = (str(addr) for addr in net.hosts() if addr not in reserved)
deckies: list[DeckyConfig] = []
for spec in ini.deckies:
arch: Archetype | None = None
if spec.archetype:
arch = get_archetype(spec.archetype)
distro_pool = arch.preferred_distros if arch else list(all_distros().keys())
distro = get_distro(distro_pool[len(deckies) % len(distro_pool)])
hostname = random_hostname(distro.slug)
ip = spec.ip or next(auto_pool, None)
if ip is None:
raise ValueError(f"Not enough free IPs in {subnet_cidr} while assigning IP for '{spec.name}'.")
if spec.services:
known = set(all_service_names())
unknown = [s for s in spec.services if s not in known]
if unknown:
raise ValueError(
f"Unknown service(s) in [{spec.name}]: {unknown}. "
f"Available: {all_service_names()}"
)
svc_list = spec.services
elif arch:
svc_list = list(arch.services)
elif randomize or (not spec.services and not arch):
svc_pool = all_service_names()
count = random.randint(1, min(3, len(svc_pool))) # nosec B311
svc_list = random.sample(svc_pool, count) # nosec B311
resolved_nmap_os = spec.nmap_os or (arch.nmap_os if arch else "linux")
decky_mutate_interval = cli_mutate_interval
if decky_mutate_interval is None:
decky_mutate_interval = spec.mutate_interval if spec.mutate_interval is not None else ini.mutate_interval
deckies.append(DeckyConfig(
name=spec.name,
ip=ip,
services=svc_list,
distro=distro.slug,
base_image=distro.image,
build_base=distro.build_base,
hostname=hostname,
archetype=arch.slug if arch else None,
service_config=spec.service_config,
nmap_os=resolved_nmap_os,
mutate_interval=decky_mutate_interval,
last_mutated=now,
))
return deckies

View File

@@ -6,6 +6,7 @@ Format:
net=192.168.1.0/24
gw=192.168.1.1
interface=wlp6s0
log_target=192.168.1.5:5140 # optional
[hostname-1]
ip=192.168.1.82 # optional
@@ -41,8 +42,37 @@ Format:
"""
import configparser
from dataclasses import dataclass, field
from pathlib import Path
from decnet.models import IniConfig, DeckySpec, CustomServiceSpec, validate_ini_string # noqa: F401
@dataclass
class DeckySpec:
name: str
ip: str | None = None
services: list[str] | None = None
archetype: str | None = None
service_config: dict[str, dict] = field(default_factory=dict)
nmap_os: str | None = None # explicit OS family override (linux/windows/bsd/embedded/cisco)
@dataclass
class CustomServiceSpec:
"""Spec for a user-defined (bring-your-own) service."""
name: str # service slug, e.g. "myservice" (section is "custom-myservice")
image: str # Docker image to use
exec_cmd: str # command to run inside the container
ports: list[int] = field(default_factory=list)
@dataclass
class IniConfig:
subnet: str | None = None
gateway: str | None = None
interface: str | None = None
log_target: str | None = None
deckies: list[DeckySpec] = field(default_factory=list)
custom_services: list[CustomServiceSpec] = field(default_factory=list)
def load_ini(path: str | Path) -> IniConfig:
@@ -51,21 +81,7 @@ def load_ini(path: str | Path) -> IniConfig:
read = cp.read(str(path))
if not read:
raise FileNotFoundError(f"Config file not found: {path}")
return _parse_configparser(cp)
def load_ini_from_string(content: str) -> IniConfig:
"""Parse a DECNET INI string and return an IniConfig."""
# Normalize line endings (CRLF → LF, bare CR → LF) so the validator
# and configparser both see the same line boundaries.
content = content.replace('\r\n', '\n').replace('\r', '\n')
validate_ini_string(content)
cp = configparser.ConfigParser(strict=False)
cp.read_string(content)
return _parse_configparser(cp)
def _parse_configparser(cp: configparser.ConfigParser) -> IniConfig:
cfg = IniConfig()
if cp.has_section("general"):
@@ -73,24 +89,14 @@ def _parse_configparser(cp: configparser.ConfigParser) -> IniConfig:
cfg.subnet = g.get("net")
cfg.gateway = g.get("gw")
cfg.interface = g.get("interface")
from decnet.services.registry import all_services
known_services = set(all_services().keys())
cfg.log_target = g.get("log_target") or g.get("log-target")
# First pass: collect decky sections and custom service definitions
for section in cp.sections():
if section == "general":
continue
# A service sub-section is identified if the section name has at least one dot
# AND the last segment is a known service name.
# e.g. "decky-01.ssh" -> sub-section
# e.g. "decky.webmail" -> decky section (if "webmail" is not a service)
if "." in section:
_, _, last_segment = section.rpartition(".")
if last_segment in known_services:
continue # sub-section handled in second pass
continue # subsections handled in second pass
if section.startswith("custom-"):
# Bring-your-own service definition
s = cp[section]
@@ -109,30 +115,17 @@ def _parse_configparser(cp: configparser.ConfigParser) -> IniConfig:
services = [sv.strip() for sv in svc_raw.split(",")] if svc_raw else None
archetype = s.get("archetype")
nmap_os = s.get("nmap_os") or s.get("nmap-os") or None
mi_raw = s.get("mutate_interval") or s.get("mutate-interval")
mutate_interval = None
if mi_raw:
try:
mutate_interval = int(mi_raw)
except ValueError:
raise ValueError(f"[{section}] mutate_interval= must be an integer, got '{mi_raw}'")
amount_raw = s.get("amount", "1")
try:
amount = int(amount_raw)
if amount < 1:
raise ValueError
if amount > 100:
raise ValueError(f"[{section}] amount={amount} exceeds maximum allowed (100).")
except ValueError as e:
if "exceeds maximum" in str(e):
raise e
except ValueError:
raise ValueError(f"[{section}] amount= must be a positive integer, got '{amount_raw}'")
if amount == 1:
cfg.deckies.append(DeckySpec(
name=section, ip=ip, services=services, archetype=archetype, nmap_os=nmap_os, mutate_interval=mutate_interval,
name=section, ip=ip, services=services, archetype=archetype, nmap_os=nmap_os,
))
else:
# Expand into N deckies; explicit ip is ignored (can't share one IP)
@@ -148,7 +141,6 @@ def _parse_configparser(cp: configparser.ConfigParser) -> IniConfig:
services=services,
archetype=archetype,
nmap_os=nmap_os,
mutate_interval=mutate_interval,
))
# Second pass: collect per-service subsections [decky-name.service]
@@ -157,11 +149,7 @@ def _parse_configparser(cp: configparser.ConfigParser) -> IniConfig:
for section in cp.sections():
if "." not in section:
continue
decky_name, dot, svc_name = section.rpartition(".")
if svc_name not in known_services:
continue # not a service sub-section
decky_name, _, svc_name = section.partition(".")
svc_cfg = {k: v for k, v in cp[section].items()}
if decky_name in decky_map:
# Direct match — single decky

View File

@@ -1,92 +0,0 @@
"""
DECNET application logging helpers.
Usage:
from decnet.logging import get_logger
log = get_logger("engine") # APP-NAME in RFC 5424 output becomes "engine"
The returned logger propagates to the root logger (configured in config.py with
Rfc5424Formatter), so level control via DECNET_DEVELOPER still applies globally.
When ``DECNET_DEVELOPER_TRACING`` is active, every LogRecord is enriched with
``otel_trace_id`` and ``otel_span_id`` from the current OTEL span context.
This lets you correlate log lines with Jaeger traces — click a log entry and
jump straight to the span that produced it.
"""
from __future__ import annotations
import logging
class _ComponentFilter(logging.Filter):
"""Injects *decnet_component* onto every LogRecord so Rfc5424Formatter can
use it as the RFC 5424 APP-NAME field instead of the hardcoded "decnet"."""
def __init__(self, component: str) -> None:
super().__init__()
self.component = component
def filter(self, record: logging.LogRecord) -> bool:
record.decnet_component = self.component # type: ignore[attr-defined]
return True
class _TraceContextFilter(logging.Filter):
"""Injects ``otel_trace_id`` and ``otel_span_id`` onto every LogRecord
from the active OTEL span context.
Installed once by ``enable_trace_context()`` on the root ``decnet`` logger
so all child loggers inherit the enrichment via propagation.
When no span is active, both fields are set to ``"0"`` (cheap string
comparison downstream, no None-checks needed).
"""
def filter(self, record: logging.LogRecord) -> bool:
try:
from opentelemetry import trace
span = trace.get_current_span()
ctx = span.get_span_context()
if ctx and ctx.trace_id:
record.otel_trace_id = format(ctx.trace_id, "032x") # type: ignore[attr-defined]
record.otel_span_id = format(ctx.span_id, "016x") # type: ignore[attr-defined]
else:
record.otel_trace_id = "0" # type: ignore[attr-defined]
record.otel_span_id = "0" # type: ignore[attr-defined]
except Exception:
record.otel_trace_id = "0" # type: ignore[attr-defined]
record.otel_span_id = "0" # type: ignore[attr-defined]
return True
_trace_filter_installed: bool = False
def enable_trace_context() -> None:
"""Install the OTEL trace-context filter on the root ``decnet`` logger.
Called once from ``decnet.telemetry.setup_tracing()`` after the
TracerProvider is initialised. Safe to call multiple times (idempotent).
"""
global _trace_filter_installed
if _trace_filter_installed:
return
root = logging.getLogger("decnet")
root.addFilter(_TraceContextFilter())
_trace_filter_installed = True
def get_logger(component: str) -> logging.Logger:
"""Return a named logger that self-identifies as *component* in RFC 5424.
Valid components: cli, engine, api, mutator, collector.
The logger is named ``decnet.<component>`` and propagates normally, so the
root handler (Rfc5424Formatter + level gate from DECNET_DEVELOPER) handles
output. Calling this function multiple times for the same component is safe.
"""
logger = logging.getLogger(f"decnet.{component}")
if not any(isinstance(f, _ComponentFilter) for f in logger.filters):
logger.addFilter(_ComponentFilter(component))
return logger

View File

@@ -1,3 +1,4 @@
from __future__ import annotations
"""
Rotating file handler for DECNET syslog output.
@@ -6,44 +7,34 @@ Path is controlled by the DECNET_LOG_FILE environment variable
(default: /var/log/decnet/decnet.log).
"""
from __future__ import annotations
import logging
import logging.handlers
import os
from pathlib import Path
from decnet.logging.inode_aware_handler import InodeAwareRotatingFileHandler
from decnet.privdrop import chown_to_invoking_user, chown_tree_to_invoking_user
from decnet.telemetry import traced as _traced
_LOG_FILE_ENV = "DECNET_LOG_FILE"
_DEFAULT_LOG_FILE = "/var/log/decnet/decnet.log"
_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
_BACKUP_COUNT = 5
_handler: InodeAwareRotatingFileHandler | None = None
_handler: logging.handlers.RotatingFileHandler | None = None
_logger: logging.Logger | None = None
@_traced("logging.init_file_handler")
def _init_file_handler() -> logging.Logger:
"""One-time initialisation of the rotating file handler."""
def _get_logger() -> logging.Logger:
global _handler, _logger
if _logger is not None:
return _logger
log_path = Path(os.environ.get(_LOG_FILE_ENV, _DEFAULT_LOG_FILE))
log_path.parent.mkdir(parents=True, exist_ok=True)
# When running under sudo, hand the parent dir back to the invoking user
# so a subsequent non-root `decnet api` can also write to it.
chown_tree_to_invoking_user(log_path.parent)
_handler = InodeAwareRotatingFileHandler(
_handler = logging.handlers.RotatingFileHandler(
log_path,
maxBytes=_MAX_BYTES,
backupCount=_BACKUP_COUNT,
encoding="utf-8",
)
chown_to_invoking_user(log_path)
_handler.setFormatter(logging.Formatter("%(message)s"))
_logger = logging.getLogger("decnet.syslog")
@@ -54,19 +45,14 @@ def _init_file_handler() -> logging.Logger:
return _logger
def _get_logger() -> logging.Logger:
if _logger is not None:
return _logger
return _init_file_handler()
def write_syslog(line: str) -> None:
"""Write a single RFC 5424 syslog line to the rotating log file."""
try:
_get_logger().info(line)
except Exception: # nosec B110
except Exception:
pass
def get_log_path() -> Path:
"""Return the configured log file path (for tests/inspection)."""
return Path(os.environ.get(_LOG_FILE_ENV, _DEFAULT_LOG_FILE))

View File

@@ -11,8 +11,6 @@ shared utilities for validating and parsing the log_target string.
import socket
from decnet.telemetry import traced as _traced
def parse_log_target(log_target: str) -> tuple[str, int]:
"""
@@ -25,7 +23,6 @@ def parse_log_target(log_target: str) -> tuple[str, int]:
return parts[0], int(parts[1])
@_traced("logging.probe_log_target")
def probe_log_target(log_target: str, timeout: float = 2.0) -> bool:
"""
Return True if the log target is reachable (TCP connect succeeds).

View File

@@ -1,60 +0,0 @@
"""
RotatingFileHandler that detects external deletion or rotation.
Stdlib ``RotatingFileHandler`` holds an open file descriptor for the
lifetime of the handler. If the target file is deleted (``rm``) or
rotated out (``logrotate`` without ``copytruncate``), the handler keeps
writing to the now-orphaned inode until its own size-based rotation
finally triggers — silently losing every line in between.
Stdlib ``WatchedFileHandler`` solves exactly this problem but doesn't
rotate by size. This subclass combines both: before each emit we stat
the configured path and compare its inode/device to the currently open
file; on mismatch we close and reopen.
Cheap: one ``os.stat`` per log record. Matches the pattern used by
``decnet/collector/worker.py:_reopen_if_needed``.
"""
from __future__ import annotations
import logging
import logging.handlers
import os
class InodeAwareRotatingFileHandler(logging.handlers.RotatingFileHandler):
"""RotatingFileHandler that reopens the target on external rotation/deletion."""
def _should_reopen(self) -> bool:
if self.stream is None:
return True
try:
disk_stat = os.stat(self.baseFilename)
except FileNotFoundError:
return True
except OSError:
return False
try:
open_stat = os.fstat(self.stream.fileno())
except OSError:
return True
return (disk_stat.st_ino != open_stat.st_ino
or disk_stat.st_dev != open_stat.st_dev)
def emit(self, record: logging.LogRecord) -> None:
if self._should_reopen():
try:
if self.stream is not None:
self.close()
except Exception: # nosec B110
pass
try:
self.stream = self._open()
except OSError:
# A logging handler MUST NOT crash its caller. If we can't
# reopen (e.g. file is root-owned after `sudo decnet deploy`
# and the current process is non-root), defer to the stdlib
# error path, which just prints a traceback to stderr.
self.handleError(record)
return
super().emit(record)

View File

@@ -1,3 +1,4 @@
from __future__ import annotations
"""
RFC 5424 syslog formatter for DECNET.
@@ -5,18 +6,16 @@ Produces fully-compliant syslog messages:
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
Facility: local0 (16)
PEN for structured data: relay@55555
PEN for structured data: decnet@55555
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any
FACILITY_LOCAL0 = 16
NILVALUE = "-"
_SD_ID = "relay@55555"
_SD_ID = "decnet@55555"
SEVERITY_INFO = 6
SEVERITY_WARNING = 4

View File

@@ -1,123 +0,0 @@
"""
DECNET Domain Models.
Centralized repository for all Pydantic specifications used throughout the project.
This file ensures that core domain logic has no dependencies on the web or database layers.
"""
from typing import Optional, List, Dict, Literal, Annotated, Any
from pydantic import BaseModel, ConfigDict, Field as PydanticField, field_validator, BeforeValidator
import configparser
# --- INI Specification Models ---
def validate_ini_string(v: Any) -> str:
"""Structural validator for DECNET INI strings using configparser."""
if not isinstance(v, str):
# This remains an internal type mismatch (caught by Pydantic usually)
raise ValueError("INI content must be a string")
# 512KB limit to prevent DoS/OOM
if len(v) > 512 * 1024:
raise ValueError("INI content is too large (max 512KB)")
if not v.strip():
# Using exact phrasing expected by tests
raise ValueError("INI content is empty")
parser = configparser.ConfigParser(interpolation=None, allow_no_value=True, strict=False)
try:
parser.read_string(v)
if not parser.sections():
raise ValueError("The provided INI content must contain at least one section (no sections found)")
except configparser.Error as e:
# If it's a generic parsing error, we check if it's effectively a "missing sections" error
if "no section headers" in str(e).lower():
raise ValueError("Invalid INI format: no sections found")
raise ValueError(f"Invalid INI format: {str(e)}")
return v
# Reusable type that enforces INI structure during initialization.
# Removed min_length=1 to make empty strings schema-compliant yet semantically invalid (mapped to 409).
IniContent = Annotated[str, BeforeValidator(validate_ini_string)]
class DeckySpec(BaseModel):
"""Configuration spec for a single decky as defined in the INI file."""
model_config = ConfigDict(strict=True, extra="forbid")
name: str = PydanticField(..., max_length=128, pattern=r"^[A-Za-z0-9\-_.]+$")
ip: Optional[str] = None
services: Optional[List[str]] = None
archetype: Optional[str] = None
service_config: Dict[str, Dict] = PydanticField(default_factory=dict)
nmap_os: Optional[str] = None
mutate_interval: Optional[int] = PydanticField(None, ge=1)
class CustomServiceSpec(BaseModel):
"""Spec for a user-defined (bring-your-own) service."""
model_config = ConfigDict(strict=True, extra="forbid")
name: str
image: str
exec_cmd: str
ports: List[int] = PydanticField(default_factory=list)
class IniConfig(BaseModel):
"""The complete structured representation of a DECNET INI file."""
model_config = ConfigDict(strict=True, extra="forbid")
subnet: Optional[str] = None
gateway: Optional[str] = None
interface: Optional[str] = None
mutate_interval: Optional[int] = PydanticField(None, ge=1)
deckies: List[DeckySpec] = PydanticField(default_factory=list, min_length=1)
custom_services: List[CustomServiceSpec] = PydanticField(default_factory=list)
@field_validator("deckies")
@classmethod
def at_least_one_decky(cls, v: List[DeckySpec]) -> List[DeckySpec]:
"""Ensure that an INI deployment always contains at least one machine."""
if not v:
raise ValueError("INI must contain at least one decky section")
return v
# --- Runtime Configuration Models ---
class DeckyConfig(BaseModel):
"""Full operational configuration for a deployed decky container."""
model_config = ConfigDict(strict=True, extra="forbid")
name: str
ip: str
services: list[str] = PydanticField(..., min_length=1)
distro: str # slug from distros.DISTROS, e.g. "debian", "ubuntu22"
base_image: str # Docker image for the base/IP-holder container
build_base: str = "debian:bookworm-slim" # apt-compatible image for service Dockerfiles
hostname: str
archetype: str | None = None # archetype slug if spawned from an archetype profile
service_config: dict[str, dict] = PydanticField(default_factory=dict)
nmap_os: str = "linux" # OS family for TCP/IP stack spoofing (see os_fingerprint.py)
mutate_interval: int | None = None # automatic rotation interval in minutes
last_mutated: float = 0.0 # timestamp of last mutation
last_login_attempt: float = 0.0 # timestamp of most recent interaction
# SWARM: the SwarmHost.uuid that runs this decky. None in unihost mode
# so existing state files deserialize unchanged.
host_uuid: str | None = None
@field_validator("services")
@classmethod
def services_not_empty(cls, v: list[str]) -> list[str]:
if not v:
raise ValueError("A decky must have at least one service.")
return v
class DecnetConfig(BaseModel):
"""Root configuration for the entire DECNET fleet deployment."""
mode: Literal["unihost", "swarm"]
interface: str
subnet: str
gateway: str
deckies: list[DeckyConfig] = PydanticField(..., min_length=1)
log_file: str | None = None # host path where the collector writes the log file
ipvlan: bool = False # use IPvlan L2 instead of MACVLAN (WiFi-friendly)
mutate_interval: int | None = 30 # global automatic rotation interval in minutes

View File

@@ -1,3 +0,0 @@
from decnet.mutator.engine import mutate_all, mutate_decky, run_watch_loop
__all__ = ["mutate_all", "mutate_decky", "run_watch_loop"]

View File

@@ -1,147 +0,0 @@
"""
Mutation Engine for DECNET.
Handles dynamic rotation of exposed honeypot services over time.
"""
import random
import time
from typing import Optional
from rich.console import Console
from decnet.archetypes import get_archetype
from decnet.fleet import all_service_names
from decnet.composer import write_compose
from decnet.config import DeckyConfig, DecnetConfig
from decnet.engine import _compose_with_retry
from decnet.logging import get_logger
from decnet.telemetry import traced as _traced
from pathlib import Path
import anyio
import asyncio
from decnet.web.db.repository import BaseRepository
log = get_logger("mutator")
console = Console()
@_traced("mutator.mutate_decky")
async def mutate_decky(decky_name: str, repo: BaseRepository) -> bool:
"""
Perform an Intra-Archetype Shuffle for a specific decky.
Returns True if mutation succeeded, False otherwise.
"""
log.debug("mutate_decky: start decky=%s", decky_name)
state_dict = await repo.get_state("deployment")
if state_dict is None:
log.error("mutate_decky: no active deployment found in database")
console.print("[red]No active deployment found in database.[/]")
return False
config = DecnetConfig(**state_dict["config"])
compose_path = Path(state_dict["compose_path"])
decky: Optional[DeckyConfig] = next((d for d in config.deckies if d.name == decky_name), None)
if not decky:
console.print(f"[red]Decky '{decky_name}' not found in state.[/]")
return False
if decky.archetype:
try:
arch = get_archetype(decky.archetype)
svc_pool = list(arch.services)
except ValueError:
svc_pool = all_service_names()
else:
svc_pool = all_service_names()
if not svc_pool:
console.print(f"[yellow]No services available for mutating '{decky_name}'.[/]")
return False
current_services = set(decky.services)
attempts = 0
while True:
count = random.randint(1, min(3, len(svc_pool))) # nosec B311
chosen = set(random.sample(svc_pool, count)) # nosec B311
attempts += 1
if chosen != current_services or attempts > 20:
break
decky.services = list(chosen)
decky.last_mutated = time.time()
# Save to DB
await repo.set_state("deployment", {"config": config.model_dump(), "compose_path": str(compose_path)})
# Still writes files for Docker to use
write_compose(config, compose_path)
log.info("mutation applied decky=%s services=%s", decky_name, ",".join(decky.services))
console.print(f"[cyan]Mutating '{decky_name}' to services: {', '.join(decky.services)}[/]")
try:
# Wrap blocking call in thread
await anyio.to_thread.run_sync(_compose_with_retry, "up", "-d", "--remove-orphans", compose_path)
except Exception as e:
log.error("mutation failed decky=%s error=%s", decky_name, e)
console.print(f"[red]Failed to mutate '{decky_name}': {e}[/]")
return False
return True
@_traced("mutator.mutate_all")
async def mutate_all(repo: BaseRepository, force: bool = False) -> None:
"""
Check all deckies and mutate those that are due.
If force=True, mutates all deckies regardless of schedule.
"""
log.debug("mutate_all: start force=%s", force)
state_dict = await repo.get_state("deployment")
if state_dict is None:
log.error("mutate_all: no active deployment found")
console.print("[red]No active deployment found.[/]")
return
config = DecnetConfig(**state_dict["config"])
now = time.time()
mutated_count = 0
for decky in config.deckies:
interval_mins = decky.mutate_interval or config.mutate_interval
if interval_mins is None and not force:
continue
if force:
due = True
else:
elapsed_secs = now - decky.last_mutated
due = elapsed_secs >= (interval_mins * 60)
if due:
success = await mutate_decky(decky.name, repo=repo)
if success:
mutated_count += 1
if mutated_count == 0 and not force:
log.debug("mutate_all: no deckies due for mutation")
console.print("[dim]No deckies are due for mutation.[/]")
else:
log.info("mutate_all: complete mutated_count=%d", mutated_count)
@_traced("mutator.watch_loop")
async def run_watch_loop(repo: BaseRepository, poll_interval_secs: int = 10) -> None:
"""Run an infinite loop checking for deckies that need mutation."""
log.info("mutator watch loop started poll_interval_secs=%d", poll_interval_secs)
console.print(f"[green]DECNET Mutator Watcher started (polling every {poll_interval_secs}s).[/]")
try:
while True:
await mutate_all(force=False, repo=repo)
await asyncio.sleep(poll_interval_secs)
except KeyboardInterrupt:
log.info("mutator watch loop stopped")
console.print("\n[dim]Mutator watcher stopped.[/]")

View File

@@ -8,8 +8,11 @@ Handles:
- IP allocation (sequential, skipping reserved addresses)
"""
import ipaddress
import os
import subprocess # nosec B404
import shutil
import socket
import subprocess
from ipaddress import IPv4Address, IPv4Interface, IPv4Network
import docker
@@ -24,7 +27,7 @@ HOST_IPVLAN_IFACE = "decnet_ipvlan0"
# ---------------------------------------------------------------------------
def _run(cmd: list[str], check: bool = True) -> subprocess.CompletedProcess:
return subprocess.run(cmd, capture_output=True, text=True, check=check) # nosec B603 B404
return subprocess.run(cmd, capture_output=True, text=True, check=check)
def detect_interface() -> str:
@@ -126,57 +129,22 @@ def allocate_ips(
# Docker MACVLAN network
# ---------------------------------------------------------------------------
def _ensure_network(
def create_macvlan_network(
client: docker.DockerClient,
*,
driver: str,
interface: str,
subnet: str,
gateway: str,
ip_range: str,
extra_options: dict | None = None,
) -> None:
"""Create the decnet docker network with ``driver``, replacing any
existing network of the same name that was built with a different driver.
Why the replace-on-driver-mismatch: macvlan and ipvlan slaves can't
coexist on the same parent interface. If an earlier run left behind a
macvlan-driver network and we're now asked for ipvlan (or vice versa),
short-circuiting on name alone leaves Docker attaching new containers
to the old driver and the host NIC ends up EBUSY on the next port
create. So: when driver disagrees, disconnect everything and DROP it.
"""
options = {"parent": interface}
if extra_options:
options.update(extra_options)
for net in client.networks.list(names=[MACVLAN_NETWORK_NAME]):
if net.attrs.get("Driver") == driver:
# Same driver — but if the IPAM pool drifted (different subnet,
# gateway, or ip-range than this deploy asks for), reusing it
# hands out addresses from the old pool and we race the real LAN.
# Compare and rebuild on mismatch.
pools = (net.attrs.get("IPAM") or {}).get("Config") or []
cur = pools[0] if pools else {}
if (
cur.get("Subnet") == subnet
and cur.get("Gateway") == gateway
and cur.get("IPRange") == ip_range
):
return # right driver AND matching pool, leave it alone
# Driver mismatch OR IPAM drift — tear it down. Disconnect any live
# containers first so `remove()` doesn't refuse with ErrNetworkInUse.
for cid in (net.attrs.get("Containers") or {}):
try:
net.disconnect(cid, force=True)
except docker.errors.APIError:
pass
net.remove()
"""Create the MACVLAN Docker network. No-op if it already exists."""
existing = [n.name for n in client.networks.list()]
if MACVLAN_NETWORK_NAME in existing:
return
client.networks.create(
name=MACVLAN_NETWORK_NAME,
driver=driver,
options=options,
driver="macvlan",
options={"parent": interface},
ipam=docker.types.IPAMConfig(
driver="default",
pool_configs=[
@@ -190,21 +158,6 @@ def _ensure_network(
)
def create_macvlan_network(
client: docker.DockerClient,
interface: str,
subnet: str,
gateway: str,
ip_range: str,
) -> None:
"""Create the MACVLAN Docker network, replacing an ipvlan-driver one of
the same name if necessary (parent-NIC can't host both drivers)."""
_ensure_network(
client, driver="macvlan", interface=interface,
subnet=subnet, gateway=gateway, ip_range=ip_range,
)
def create_ipvlan_network(
client: docker.DockerClient,
interface: str,
@@ -212,12 +165,25 @@ def create_ipvlan_network(
gateway: str,
ip_range: str,
) -> None:
"""Create an IPvlan L2 Docker network, replacing a macvlan-driver one of
the same name if necessary (parent-NIC can't host both drivers)."""
_ensure_network(
client, driver="ipvlan", interface=interface,
subnet=subnet, gateway=gateway, ip_range=ip_range,
extra_options={"ipvlan_mode": "l2"},
"""Create an IPvlan L2 Docker network. No-op if it already exists."""
existing = [n.name for n in client.networks.list()]
if MACVLAN_NETWORK_NAME in existing:
return
client.networks.create(
name=MACVLAN_NETWORK_NAME,
driver="ipvlan",
options={"parent": interface, "ipvlan_mode": "l2"},
ipam=docker.types.IPAMConfig(
driver="default",
pool_configs=[
docker.types.IPAMPool(
subnet=subnet,
gateway=gateway,
iprange=ip_range,
)
],
),
)
@@ -241,14 +207,10 @@ def _require_root() -> None:
def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
"""
Create a macvlan interface on the host so the deployer can reach deckies.
Idempotent — skips steps that are already done. Drops a stale ipvlan
host-helper first: the two drivers can share a parent NIC on paper but
leaving the opposite helper in place is just cruft after a driver swap.
Idempotent — skips steps that are already done.
"""
_require_root()
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
# Check if interface already exists
result = _run(["ip", "link", "show", HOST_MACVLAN_IFACE], check=False)
if result.returncode != 0:
@@ -268,14 +230,10 @@ def teardown_host_macvlan(decky_ip_range: str) -> None:
def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str) -> None:
"""
Create an IPvlan interface on the host so the deployer can reach deckies.
Idempotent — skips steps that are already done. Drops a stale macvlan
host-helper first so a prior macvlan deploy doesn't leave its slave
dangling on the parent NIC after the driver swap.
Idempotent — skips steps that are already done.
"""
_require_root()
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
result = _run(["ip", "link", "show", HOST_IPVLAN_IFACE], check=False)
if result.returncode != 0:
_run(["ip", "link", "add", HOST_IPVLAN_IFACE, "link", interface, "type", "ipvlan", "mode", "l2"])

View File

@@ -5,31 +5,17 @@ Maps an nmap OS family slug to a dict of Linux kernel sysctls that, when applied
to a container's network namespace, make its TCP/IP stack behaviour resemble the
claimed OS as closely as possible within the Linux kernel's constraints.
All sysctls listed here are network-namespace-scoped and safe to set per-container
without --privileged (beyond the NET_ADMIN capability already granted).
Primary discriminator leveraged by nmap: net.ipv4.ip_default_ttl (TTL)
Linux → 64
Windows → 128
BSD (FreeBSD/macOS)→ 64 (different TCP options, but same TTL as Linux)
Embedded / network → 255
Secondary discriminators (nmap OPS / WIN / ECN / T2T6 probe groups):
Secondary tuning (TCP behaviour):
net.ipv4.tcp_syn_retries SYN retransmits before giving up
net.ipv4.tcp_timestamps TCP timestamp option (OPS probes); Windows = off
net.ipv4.tcp_window_scaling Window scale option; embedded/Cisco typically off
net.ipv4.tcp_sack Selective ACK option; absent on most embedded stacks
net.ipv4.tcp_ecn ECN negotiation; Linux offers (2), Windows off (0)
net.ipv4.ip_no_pmtu_disc DF bit in ICMP replies (IE probes); embedded on
net.ipv4.tcp_fin_timeout FIN_WAIT_2 seconds (T2T6 timing); Windows shorter
ICMP tuning (nmap IE / U1 probe groups):
net.ipv4.icmp_ratelimit Min ms between ICMP error replies; Windows = 0 (none)
net.ipv4.icmp_ratemask Bitmask of ICMP types subject to rate limiting
Note: net.core.rmem_default is a global (non-namespaced) sysctl and cannot be
set per-container without --privileged; TCP window size is already correct for
Windows (64240) from the kernel's default tcp_rmem settings.
set per-container without --privileged; it is intentionally excluded.
"""
from __future__ import annotations
@@ -38,69 +24,27 @@ OS_SYSCTLS: dict[str, dict[str, str]] = {
"linux": {
"net.ipv4.ip_default_ttl": "64",
"net.ipv4.tcp_syn_retries": "6",
"net.ipv4.tcp_timestamps": "1",
"net.ipv4.tcp_window_scaling": "1",
"net.ipv4.tcp_sack": "1",
"net.ipv4.tcp_ecn": "2",
"net.ipv4.ip_no_pmtu_disc": "0",
"net.ipv4.tcp_fin_timeout": "60",
"net.ipv4.icmp_ratelimit": "1000",
"net.ipv4.icmp_ratemask": "6168",
},
"windows": {
"net.ipv4.ip_default_ttl": "128",
"net.ipv4.tcp_syn_retries": "2",
"net.ipv4.tcp_timestamps": "0",
"net.ipv4.tcp_window_scaling": "1",
"net.ipv4.tcp_sack": "1",
"net.ipv4.tcp_ecn": "0",
"net.ipv4.ip_no_pmtu_disc": "0",
"net.ipv4.tcp_fin_timeout": "30",
"net.ipv4.icmp_ratelimit": "0",
"net.ipv4.icmp_ratemask": "0",
},
"bsd": {
"net.ipv4.ip_default_ttl": "64",
"net.ipv4.tcp_syn_retries": "6",
"net.ipv4.tcp_timestamps": "1",
"net.ipv4.tcp_window_scaling": "1",
"net.ipv4.tcp_sack": "1",
"net.ipv4.tcp_ecn": "0",
"net.ipv4.ip_no_pmtu_disc": "0",
"net.ipv4.tcp_fin_timeout": "60",
"net.ipv4.icmp_ratelimit": "250",
"net.ipv4.icmp_ratemask": "6168",
},
"embedded": {
"net.ipv4.ip_default_ttl": "255",
"net.ipv4.tcp_syn_retries": "3",
"net.ipv4.tcp_timestamps": "0",
"net.ipv4.tcp_window_scaling": "0",
"net.ipv4.tcp_sack": "0",
"net.ipv4.tcp_ecn": "0",
"net.ipv4.ip_no_pmtu_disc": "1",
"net.ipv4.tcp_fin_timeout": "15",
"net.ipv4.icmp_ratelimit": "0",
"net.ipv4.icmp_ratemask": "0",
},
"cisco": {
"net.ipv4.ip_default_ttl": "255",
"net.ipv4.tcp_syn_retries": "2",
"net.ipv4.tcp_timestamps": "0",
"net.ipv4.tcp_window_scaling": "0",
"net.ipv4.tcp_sack": "0",
"net.ipv4.tcp_ecn": "0",
"net.ipv4.ip_no_pmtu_disc": "1",
"net.ipv4.tcp_fin_timeout": "15",
"net.ipv4.icmp_ratelimit": "0",
"net.ipv4.icmp_ratemask": "0",
},
}
_DEFAULT_OS = "linux"
_REQUIRED_SYSCTLS: frozenset[str] = frozenset(OS_SYSCTLS["linux"].keys())
def get_os_sysctls(nmap_os: str) -> dict[str, str]:
"""Return the sysctl dict for *nmap_os*. Falls back to Linux on unknown slugs."""
@@ -110,4 +54,3 @@ def get_os_sysctls(nmap_os: str) -> dict[str, str]:
def all_os_families() -> list[str]:
"""Return all registered nmap OS family slugs."""
return list(OS_SYSCTLS.keys())

View File

@@ -1,67 +0,0 @@
"""
Helpers for dropping root ownership on files created during privileged
operations (e.g. `sudo decnet deploy` needs root for MACVLAN, but its log
files should be owned by the invoking user so a subsequent non-root
`decnet api` can append to them).
When sudo invokes a process, it sets SUDO_UID / SUDO_GID in the
environment to the original user's IDs. We use those to chown files
back after creation.
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import Optional
def _sudo_ids() -> Optional[tuple[int, int]]:
"""Return (uid, gid) of the sudo-invoking user, or None when the
process was not launched via sudo / the env vars are missing."""
raw_uid = os.environ.get("SUDO_UID")
raw_gid = os.environ.get("SUDO_GID")
if not raw_uid or not raw_gid:
return None
try:
return int(raw_uid), int(raw_gid)
except ValueError:
return None
def chown_to_invoking_user(path: str | os.PathLike[str]) -> None:
"""Best-effort chown of *path* to the sudo-invoking user.
No-op when:
* not running as root (nothing to drop),
* not launched via sudo (no SUDO_UID/SUDO_GID),
* the path does not exist,
* chown fails (logged-only — never raises).
"""
if os.geteuid() != 0:
return
ids = _sudo_ids()
if ids is None:
return
uid, gid = ids
p = Path(path)
if not p.exists():
return
try:
os.chown(p, uid, gid)
except OSError:
# Best-effort; a failed chown is not fatal to logging.
pass
def chown_tree_to_invoking_user(root: str | os.PathLike[str]) -> None:
"""Apply :func:`chown_to_invoking_user` to *root* and every file/dir
beneath it. Used for parent directories that we just created with
``mkdir(parents=True)`` as root."""
if os.geteuid() != 0 or _sudo_ids() is None:
return
root_path = Path(root)
if not root_path.exists():
return
chown_to_invoking_user(root_path)
for entry in root_path.rglob("*"):
chown_to_invoking_user(entry)

View File

@@ -1,13 +0,0 @@
"""
DECNET-PROBER — standalone active network probing service.
Runs as a detached host-level process (no container). Sends crafted TLS
probes to discover C2 frameworks and other attacker infrastructure via
JARM fingerprinting. Results are written as RFC 5424 syslog + JSON to the
same log file the collector uses, so the existing ingestion pipeline picks
them up automatically.
"""
from decnet.prober.worker import prober_worker
__all__ = ["prober_worker"]

View File

@@ -1,252 +0,0 @@
"""
HASSHServer — SSH server fingerprinting via KEX_INIT algorithm ordering.
Connects to an SSH server, completes the version exchange, captures the
server's SSH_MSG_KEXINIT message, and hashes the server-to-client algorithm
fields (kex, encryption, MAC, compression) into a 32-character MD5 digest.
This is the *server* variant of HASSH (HASSHServer). It fingerprints what
the server *offers*, which identifies the SSH implementation (OpenSSH,
Paramiko, libssh, Cobalt Strike SSH, etc.).
Stdlib only (socket, struct, hashlib) plus decnet.telemetry for tracing (zero-cost when disabled).
"""
from __future__ import annotations
import hashlib
import socket
import struct
from typing import Any
from decnet.telemetry import traced as _traced
# SSH protocol constants
_SSH_MSG_KEXINIT = 20
_KEX_INIT_COOKIE_LEN = 16
_KEX_INIT_NAME_LISTS = 10 # 10 name-list fields in KEX_INIT
# Blend in as a normal OpenSSH client
_CLIENT_BANNER = b"SSH-2.0-OpenSSH_9.6\r\n"
# Max bytes to read for server banner
_MAX_BANNER_LEN = 256
# Max bytes for a single SSH packet (KEX_INIT is typically < 2KB)
_MAX_PACKET_LEN = 35000
# ─── SSH connection + KEX_INIT capture ──────────────────────────────────────
@_traced("prober.hassh_ssh_connect")
def _ssh_connect(
host: str,
port: int,
timeout: float,
) -> tuple[str, bytes] | None:
"""
TCP connect, exchange version strings, read server's KEX_INIT.
Returns (server_banner, kex_init_payload) or None on failure.
The kex_init_payload starts at the SSH_MSG_KEXINIT type byte.
"""
sock = None
try:
sock = socket.create_connection((host, port), timeout=timeout)
sock.settimeout(timeout)
# 1. Read server banner (line ending \r\n or \n)
banner = _read_banner(sock)
if banner is None or not banner.startswith("SSH-"):
return None
# 2. Send our client version string
sock.sendall(_CLIENT_BANNER)
# 3. Read the server's first binary packet (should be KEX_INIT)
payload = _read_ssh_packet(sock)
if payload is None or len(payload) < 1:
return None
if payload[0] != _SSH_MSG_KEXINIT:
return None
return (banner, payload)
except (OSError, socket.timeout, TimeoutError, ConnectionError):
return None
finally:
if sock is not None:
try:
sock.close()
except OSError:
pass
def _read_banner(sock: socket.socket) -> str | None:
"""Read the SSH version banner line from the socket."""
buf = b""
while len(buf) < _MAX_BANNER_LEN:
try:
byte = sock.recv(1)
except (OSError, socket.timeout, TimeoutError):
return None
if not byte:
return None
buf += byte
if buf.endswith(b"\n"):
break
try:
return buf.decode("utf-8", errors="replace").rstrip("\r\n")
except Exception:
return None
def _read_ssh_packet(sock: socket.socket) -> bytes | None:
"""
Read a single SSH binary packet and return its payload.
SSH binary packet format:
uint32 packet_length (not including itself or MAC)
byte padding_length
byte[] payload (packet_length - padding_length - 1)
byte[] padding
"""
header = _recv_exact(sock, 4)
if header is None:
return None
packet_length = struct.unpack("!I", header)[0]
if packet_length < 2 or packet_length > _MAX_PACKET_LEN:
return None
rest = _recv_exact(sock, packet_length)
if rest is None:
return None
padding_length = rest[0]
payload_length = packet_length - padding_length - 1
if payload_length < 1 or payload_length > len(rest) - 1:
return None
return rest[1 : 1 + payload_length]
def _recv_exact(sock: socket.socket, n: int) -> bytes | None:
"""Read exactly n bytes from socket, or None on failure."""
buf = b""
while len(buf) < n:
try:
chunk = sock.recv(n - len(buf))
except (OSError, socket.timeout, TimeoutError):
return None
if not chunk:
return None
buf += chunk
return buf
# ─── KEX_INIT parsing ──────────────────────────────────────────────────────
def _parse_kex_init(payload: bytes) -> dict[str, str] | None:
"""
Parse SSH_MSG_KEXINIT payload and extract the 10 name-list fields.
Payload layout:
byte SSH_MSG_KEXINIT (20)
byte[16] cookie
10 × name-list:
uint32 length
byte[] utf-8 string (comma-separated algorithm names)
bool first_kex_packet_follows
uint32 reserved
Returns dict with keys: kex_algorithms, server_host_key_algorithms,
encryption_client_to_server, encryption_server_to_client,
mac_client_to_server, mac_server_to_client,
compression_client_to_server, compression_server_to_client,
languages_client_to_server, languages_server_to_client.
"""
if len(payload) < 1 + _KEX_INIT_COOKIE_LEN + 4:
return None
offset = 1 + _KEX_INIT_COOKIE_LEN # skip type byte + cookie
field_names = [
"kex_algorithms",
"server_host_key_algorithms",
"encryption_client_to_server",
"encryption_server_to_client",
"mac_client_to_server",
"mac_server_to_client",
"compression_client_to_server",
"compression_server_to_client",
"languages_client_to_server",
"languages_server_to_client",
]
fields: dict[str, str] = {}
for name in field_names:
if offset + 4 > len(payload):
return None
length = struct.unpack("!I", payload[offset : offset + 4])[0]
offset += 4
if offset + length > len(payload):
return None
fields[name] = payload[offset : offset + length].decode(
"utf-8", errors="replace"
)
offset += length
return fields
# ─── HASSH computation ──────────────────────────────────────────────────────
def _compute_hassh(kex: str, enc: str, mac: str, comp: str) -> str:
"""
Compute HASSHServer hash: MD5 of "kex;enc_s2c;mac_s2c;comp_s2c".
Returns 32-character lowercase hex digest.
"""
raw = f"{kex};{enc};{mac};{comp}"
return hashlib.md5(raw.encode("utf-8"), usedforsecurity=False).hexdigest()
# ─── Public API ─────────────────────────────────────────────────────────────
@_traced("prober.hassh_server")
def hassh_server(
host: str,
port: int,
timeout: float = 5.0,
) -> dict[str, Any] | None:
"""
Connect to an SSH server and compute its HASSHServer fingerprint.
Returns a dict with the hash, banner, and raw algorithm fields,
or None if the host is not running an SSH server on the given port.
"""
result = _ssh_connect(host, port, timeout)
if result is None:
return None
banner, payload = result
fields = _parse_kex_init(payload)
if fields is None:
return None
kex = fields["kex_algorithms"]
enc = fields["encryption_server_to_client"]
mac = fields["mac_server_to_client"]
comp = fields["compression_server_to_client"]
return {
"hassh_server": _compute_hassh(kex, enc, mac, comp),
"banner": banner,
"kex_algorithms": kex,
"encryption_s2c": enc,
"mac_s2c": mac,
"compression_s2c": comp,
}

View File

@@ -1,506 +0,0 @@
"""
JARM TLS fingerprinting — pure stdlib implementation.
JARM sends 10 crafted TLS ClientHello packets to a target, each varying
TLS version, cipher suite order, extensions, and ALPN values. The
ServerHello responses are parsed and hashed to produce a 62-character
fingerprint that identifies the TLS server implementation.
Reference: https://github.com/salesforce/jarm
Only DECNET import is decnet.telemetry for tracing (zero-cost when disabled).
"""
from __future__ import annotations
import hashlib
import socket
import struct
import time
from typing import Any
from decnet.telemetry import traced as _traced
# ─── Constants ────────────────────────────────────────────────────────────────
JARM_EMPTY_HASH = "0" * 62
_INTER_PROBE_DELAY = 0.1 # seconds between probes to avoid IDS triggers
# TLS version bytes
_TLS_1_0 = b"\x03\x01"
_TLS_1_1 = b"\x03\x02"
_TLS_1_2 = b"\x03\x03"
_TLS_1_3 = b"\x03\x03" # TLS 1.3 uses 0x0303 in record layer
# TLS record types
_CONTENT_HANDSHAKE = 0x16
_HANDSHAKE_CLIENT_HELLO = 0x01
_HANDSHAKE_SERVER_HELLO = 0x02
# Extension types
_EXT_SERVER_NAME = 0x0000
_EXT_EC_POINT_FORMATS = 0x000B
_EXT_SUPPORTED_GROUPS = 0x000A
_EXT_SESSION_TICKET = 0x0023
_EXT_ENCRYPT_THEN_MAC = 0x0016
_EXT_EXTENDED_MASTER_SECRET = 0x0017
_EXT_SIGNATURE_ALGORITHMS = 0x000D
_EXT_SUPPORTED_VERSIONS = 0x002B
_EXT_PSK_KEY_EXCHANGE_MODES = 0x002D
_EXT_KEY_SHARE = 0x0033
_EXT_ALPN = 0x0010
_EXT_PADDING = 0x0015
# ─── Cipher suite lists per JARM spec ────────────────────────────────────────
# Forward cipher order (standard)
_CIPHERS_FORWARD = [
0x0016, 0x0033, 0x0067, 0xC09E, 0xC0A2, 0x009E, 0x0039, 0x006B,
0xC09F, 0xC0A3, 0x009F, 0x0045, 0x00BE, 0x0088, 0x00C4, 0x009A,
0xC008, 0xC009, 0xC023, 0xC0AC, 0xC0AE, 0xC02B, 0xC00A, 0xC024,
0xC0AD, 0xC0AF, 0xC02C, 0xC072, 0xC073, 0xCCA8, 0x1301, 0x1302,
0x1303, 0xC013, 0xC014, 0xC02F, 0x009C, 0xC02E, 0x002F, 0x0035,
0x000A, 0x0005, 0x0004,
]
# Reverse cipher order
_CIPHERS_REVERSE = list(reversed(_CIPHERS_FORWARD))
# TLS 1.3-only ciphers
_CIPHERS_TLS13 = [0x1301, 0x1302, 0x1303]
# Middle-out cipher order (interleaved from center)
def _middle_out(lst: list[int]) -> list[int]:
result: list[int] = []
mid = len(lst) // 2
for i in range(mid + 1):
if mid + i < len(lst):
result.append(lst[mid + i])
if mid - i >= 0 and mid - i != mid + i:
result.append(lst[mid - i])
return result
_CIPHERS_MIDDLE_OUT = _middle_out(_CIPHERS_FORWARD)
# Rare/uncommon extensions cipher list
_CIPHERS_RARE = [
0x0016, 0x0033, 0xC011, 0xC012, 0x0067, 0xC09E, 0xC0A2, 0x009E,
0x0039, 0x006B, 0xC09F, 0xC0A3, 0x009F, 0x0045, 0x00BE, 0x0088,
0x00C4, 0x009A, 0xC008, 0xC009, 0xC023, 0xC0AC, 0xC0AE, 0xC02B,
0xC00A, 0xC024, 0xC0AD, 0xC0AF, 0xC02C, 0xC072, 0xC073, 0xCCA8,
0x1301, 0x1302, 0x1303, 0xC013, 0xC014, 0xC02F, 0x009C, 0xC02E,
0x002F, 0x0035, 0x000A, 0x0005, 0x0004,
]
# ─── Probe definitions ────────────────────────────────────────────────────────
# Each probe: (tls_version, cipher_list, tls13_support, alpn, extensions_style)
# tls_version: record-layer version bytes
# cipher_list: which cipher suite ordering to use
# tls13_support: whether to include TLS 1.3 extensions (supported_versions, key_share, psk)
# alpn: ALPN protocol string or None
# extensions_style: "standard", "rare", or "no_extensions"
_PROBE_CONFIGS: list[dict[str, Any]] = [
# 0: TLS 1.2 forward
{"version": _TLS_1_2, "ciphers": _CIPHERS_FORWARD, "tls13": False, "alpn": None, "style": "standard"},
# 1: TLS 1.2 reverse
{"version": _TLS_1_2, "ciphers": _CIPHERS_REVERSE, "tls13": False, "alpn": None, "style": "standard"},
# 2: TLS 1.1 forward
{"version": _TLS_1_1, "ciphers": _CIPHERS_FORWARD, "tls13": False, "alpn": None, "style": "standard"},
# 3: TLS 1.3 forward
{"version": _TLS_1_2, "ciphers": _CIPHERS_FORWARD, "tls13": True, "alpn": "h2", "style": "standard"},
# 4: TLS 1.3 reverse
{"version": _TLS_1_2, "ciphers": _CIPHERS_REVERSE, "tls13": True, "alpn": "h2", "style": "standard"},
# 5: TLS 1.3 invalid (advertise 1.3 support but no key_share)
{"version": _TLS_1_2, "ciphers": _CIPHERS_FORWARD, "tls13": "no_key_share", "alpn": None, "style": "standard"},
# 6: TLS 1.3 middle-out
{"version": _TLS_1_2, "ciphers": _CIPHERS_MIDDLE_OUT, "tls13": True, "alpn": None, "style": "standard"},
# 7: TLS 1.0 forward
{"version": _TLS_1_0, "ciphers": _CIPHERS_FORWARD, "tls13": False, "alpn": None, "style": "standard"},
# 8: TLS 1.2 middle-out
{"version": _TLS_1_2, "ciphers": _CIPHERS_MIDDLE_OUT, "tls13": False, "alpn": None, "style": "standard"},
# 9: TLS 1.2 with rare extensions
{"version": _TLS_1_2, "ciphers": _CIPHERS_RARE, "tls13": False, "alpn": "http/1.1", "style": "rare"},
]
# ─── Extension builders ──────────────────────────────────────────────────────
def _ext(ext_type: int, data: bytes) -> bytes:
return struct.pack("!HH", ext_type, len(data)) + data
def _ext_sni(host: str) -> bytes:
host_bytes = host.encode("ascii")
# ServerNameList: length(2) + ServerName: type(1) + length(2) + name
sni_data = struct.pack("!HBH", len(host_bytes) + 3, 0, len(host_bytes)) + host_bytes
return _ext(_EXT_SERVER_NAME, sni_data)
def _ext_supported_groups() -> bytes:
groups = [0x0017, 0x0018, 0x0019, 0x001D, 0x0100, 0x0101] # secp256r1, secp384r1, secp521r1, x25519, ffdhe2048, ffdhe3072
data = struct.pack("!H", len(groups) * 2) + b"".join(struct.pack("!H", g) for g in groups)
return _ext(_EXT_SUPPORTED_GROUPS, data)
def _ext_ec_point_formats() -> bytes:
formats = b"\x00" # uncompressed only
return _ext(_EXT_EC_POINT_FORMATS, struct.pack("B", len(formats)) + formats)
def _ext_signature_algorithms() -> bytes:
algos = [
0x0401, 0x0501, 0x0601, # RSA PKCS1 SHA256/384/512
0x0201, # RSA PKCS1 SHA1
0x0403, 0x0503, 0x0603, # ECDSA SHA256/384/512
0x0203, # ECDSA SHA1
0x0804, 0x0805, 0x0806, # RSA-PSS SHA256/384/512
]
data = struct.pack("!H", len(algos) * 2) + b"".join(struct.pack("!H", a) for a in algos)
return _ext(_EXT_SIGNATURE_ALGORITHMS, data)
def _ext_supported_versions_13() -> bytes:
versions = [0x0304, 0x0303] # TLS 1.3, 1.2
data = struct.pack("B", len(versions) * 2) + b"".join(struct.pack("!H", v) for v in versions)
return _ext(_EXT_SUPPORTED_VERSIONS, data)
def _ext_psk_key_exchange_modes() -> bytes:
return _ext(_EXT_PSK_KEY_EXCHANGE_MODES, b"\x01\x01") # psk_dhe_ke
def _ext_key_share() -> bytes:
# x25519 key share with 32 random-looking bytes
key_data = b"\x00" * 32
entry = struct.pack("!HH", 0x001D, 32) + key_data # x25519 group
data = struct.pack("!H", len(entry)) + entry
return _ext(_EXT_KEY_SHARE, data)
def _ext_alpn(protocol: str) -> bytes:
proto_bytes = protocol.encode("ascii")
proto_entry = struct.pack("B", len(proto_bytes)) + proto_bytes
data = struct.pack("!H", len(proto_entry)) + proto_entry
return _ext(_EXT_ALPN, data)
def _ext_session_ticket() -> bytes:
return _ext(_EXT_SESSION_TICKET, b"")
def _ext_encrypt_then_mac() -> bytes:
return _ext(_EXT_ENCRYPT_THEN_MAC, b"")
def _ext_extended_master_secret() -> bytes:
return _ext(_EXT_EXTENDED_MASTER_SECRET, b"")
def _ext_padding(target_length: int, current_length: int) -> bytes:
pad_needed = target_length - current_length - 4 # 4 bytes for ext type + length
if pad_needed < 0:
return b""
return _ext(_EXT_PADDING, b"\x00" * pad_needed)
# ─── ClientHello builder ─────────────────────────────────────────────────────
def _build_client_hello(probe_index: int, host: str = "localhost") -> bytes:
"""
Construct one of 10 JARM-specified ClientHello packets.
Args:
probe_index: 0-9, selects the probe configuration
host: target hostname for SNI extension
Returns:
Complete TLS record bytes ready to send on the wire.
"""
cfg = _PROBE_CONFIGS[probe_index]
version: bytes = cfg["version"]
ciphers: list[int] = cfg["ciphers"]
tls13 = cfg["tls13"]
alpn: str | None = cfg["alpn"]
# Random (32 bytes)
random_bytes = b"\x00" * 32
# Session ID (32 bytes, all zeros)
session_id = b"\x00" * 32
# Cipher suites
cipher_bytes = b"".join(struct.pack("!H", c) for c in ciphers)
cipher_data = struct.pack("!H", len(cipher_bytes)) + cipher_bytes
# Compression methods (null only)
compression = b"\x01\x00"
# Extensions
extensions = b""
extensions += _ext_sni(host)
extensions += _ext_supported_groups()
extensions += _ext_ec_point_formats()
extensions += _ext_session_ticket()
extensions += _ext_encrypt_then_mac()
extensions += _ext_extended_master_secret()
extensions += _ext_signature_algorithms()
if tls13 == True: # noqa: E712
extensions += _ext_supported_versions_13()
extensions += _ext_psk_key_exchange_modes()
extensions += _ext_key_share()
elif tls13 == "no_key_share":
extensions += _ext_supported_versions_13()
extensions += _ext_psk_key_exchange_modes()
# Intentionally omit key_share
if alpn:
extensions += _ext_alpn(alpn)
ext_data = struct.pack("!H", len(extensions)) + extensions
# ClientHello body
body = (
version # client_version (2)
+ random_bytes # random (32)
+ struct.pack("B", len(session_id)) + session_id # session_id
+ cipher_data # cipher_suites
+ compression # compression_methods
+ ext_data # extensions
)
# Handshake header: type(1) + length(3)
handshake = struct.pack("B", _HANDSHAKE_CLIENT_HELLO) + struct.pack("!I", len(body))[1:] + body
# TLS record header: type(1) + version(2) + length(2)
record = struct.pack("B", _CONTENT_HANDSHAKE) + _TLS_1_0 + struct.pack("!H", len(handshake)) + handshake
return record
# ─── ServerHello parser ──────────────────────────────────────────────────────
def _parse_server_hello(data: bytes) -> str:
"""
Extract cipher suite and TLS version from a ServerHello response.
Returns a pipe-delimited string "cipher|version|extensions" that forms
one component of the JARM hash, or "|||" on parse failure.
"""
try:
if len(data) < 6:
return "|||"
# TLS record header
if data[0] != _CONTENT_HANDSHAKE:
return "|||"
struct.unpack_from("!H", data, 1)[0] # record_version (unused)
record_len = struct.unpack_from("!H", data, 3)[0]
hs = data[5: 5 + record_len]
if len(hs) < 4:
return "|||"
# Handshake header
if hs[0] != _HANDSHAKE_SERVER_HELLO:
return "|||"
hs_len = struct.unpack_from("!I", b"\x00" + hs[1:4])[0]
body = hs[4: 4 + hs_len]
if len(body) < 34:
return "|||"
pos = 0
# Server version
server_version = struct.unpack_from("!H", body, pos)[0]
pos += 2
# Random (32 bytes)
pos += 32
# Session ID
if pos >= len(body):
return "|||"
sid_len = body[pos]
pos += 1 + sid_len
# Cipher suite
if pos + 2 > len(body):
return "|||"
cipher = struct.unpack_from("!H", body, pos)[0]
pos += 2
# Compression method
if pos >= len(body):
return "|||"
pos += 1
# Parse extensions for supported_versions (to detect actual TLS 1.3)
actual_version = server_version
extensions_str = ""
if pos + 2 <= len(body):
ext_total = struct.unpack_from("!H", body, pos)[0]
pos += 2
ext_end = pos + ext_total
ext_types: list[str] = []
while pos + 4 <= ext_end and pos + 4 <= len(body):
ext_type = struct.unpack_from("!H", body, pos)[0]
ext_len = struct.unpack_from("!H", body, pos + 2)[0]
ext_types.append(f"{ext_type:04x}")
if ext_type == _EXT_SUPPORTED_VERSIONS and ext_len >= 2:
actual_version = struct.unpack_from("!H", body, pos + 4)[0]
pos += 4 + ext_len
extensions_str = "-".join(ext_types)
version_str = _version_to_str(actual_version)
cipher_str = f"{cipher:04x}"
return f"{cipher_str}|{version_str}|{extensions_str}"
except Exception:
return "|||"
def _version_to_str(version: int) -> str:
return {
0x0304: "tls13",
0x0303: "tls12",
0x0302: "tls11",
0x0301: "tls10",
0x0300: "ssl30",
}.get(version, f"{version:04x}")
# ─── Probe sender ────────────────────────────────────────────────────────────
@_traced("prober.jarm_send_probe")
def _send_probe(host: str, port: int, hello: bytes, timeout: float = 5.0) -> bytes | None:
"""
Open a TCP connection, send the ClientHello, and read the ServerHello.
Returns raw response bytes or None on any failure.
"""
try:
sock = socket.create_connection((host, port), timeout=timeout)
try:
sock.sendall(hello)
sock.settimeout(timeout)
response = b""
while True:
chunk = sock.recv(1484)
if not chunk:
break
response += chunk
# We only need the first TLS record (ServerHello)
if len(response) >= 5:
record_len = struct.unpack_from("!H", response, 3)[0]
if len(response) >= 5 + record_len:
break
return response if response else None
finally:
sock.close()
except (OSError, socket.error, socket.timeout):
return None
# ─── JARM hash computation ───────────────────────────────────────────────────
def _compute_jarm(responses: list[str]) -> str:
"""
Compute the final 62-character JARM hash from 10 probe response strings.
The first 30 characters are the raw cipher/version concatenation.
The remaining 32 characters are a truncated SHA256 of the extensions.
"""
if all(r == "|||" for r in responses):
return JARM_EMPTY_HASH
# Build the fuzzy hash
raw_parts: list[str] = []
ext_parts: list[str] = []
for r in responses:
parts = r.split("|")
if len(parts) >= 3 and parts[0] != "":
cipher = parts[0]
version = parts[1]
extensions = parts[2] if len(parts) > 2 else ""
# Map version to single char
ver_char = {
"tls13": "d", "tls12": "c", "tls11": "b",
"tls10": "a", "ssl30": "0",
}.get(version, "0")
raw_parts.append(f"{cipher}{ver_char}")
ext_parts.append(extensions)
else:
raw_parts.append("000")
ext_parts.append("")
# First 30 chars: cipher(4) + version(1) = 5 chars * 10 probes = 50... no
# JARM spec: first part is c|v per probe joined, then SHA256 of extensions
# Actual format: each response contributes 3 chars (cipher_first2 + ver_char)
# to the first 30, then all extensions hashed for the remaining 32.
fuzzy_raw = ""
for r in responses:
parts = r.split("|")
if len(parts) >= 3 and parts[0] != "":
cipher = parts[0] # 4-char hex
version = parts[1]
ver_char = {
"tls13": "d", "tls12": "c", "tls11": "b",
"tls10": "a", "ssl30": "0",
}.get(version, "0")
fuzzy_raw += f"{cipher[0:2]}{ver_char}"
else:
fuzzy_raw += "000"
# fuzzy_raw is 30 chars (3 * 10)
ext_str = ",".join(ext_parts)
ext_hash = hashlib.sha256(ext_str.encode()).hexdigest()[:32]
return fuzzy_raw + ext_hash
# ─── Public API ──────────────────────────────────────────────────────────────
@_traced("prober.jarm_hash")
def jarm_hash(host: str, port: int, timeout: float = 5.0) -> str:
"""
Compute the JARM fingerprint for a TLS server.
Sends 10 crafted ClientHello packets and hashes the responses.
Args:
host: target IP or hostname
port: target port
timeout: per-probe TCP timeout in seconds
Returns:
62-character JARM hash string, or all-zeros on total failure.
"""
responses: list[str] = []
for i in range(10):
hello = _build_client_hello(i, host=host)
raw = _send_probe(host, port, hello, timeout=timeout)
if raw is not None:
parsed = _parse_server_hello(raw)
responses.append(parsed)
else:
responses.append("|||")
if i < 9:
time.sleep(_INTER_PROBE_DELAY)
return _compute_jarm(responses)

View File

@@ -1,227 +0,0 @@
"""
TCP/IP stack fingerprinting via SYN-ACK analysis.
Sends a crafted TCP SYN packet to a target host:port, captures the
SYN-ACK response, and extracts OS/tool-identifying characteristics:
TTL, window size, DF bit, MSS, window scale, SACK support, timestamps,
and TCP options ordering.
Uses scapy for packet crafting and parsing. Requires root/CAP_NET_RAW.
"""
from __future__ import annotations
import hashlib
import random
from typing import Any
from decnet.telemetry import traced as _traced
# Lazy-import scapy to avoid breaking non-root usage of HASSH/JARM.
# The actual import happens inside functions that need it.
# ─── TCP option short codes ─────────────────────────────────────────────────
_OPT_CODES: dict[str, str] = {
"MSS": "M",
"WScale": "W",
"SAckOK": "S",
"SAck": "S",
"Timestamp": "T",
"NOP": "N",
"EOL": "E",
"AltChkSum": "A",
"AltChkSumOpt": "A",
"UTO": "U",
}
# ─── Packet construction ───────────────────────────────────────────────────
@_traced("prober.tcpfp_send_syn")
def _send_syn(
host: str,
port: int,
timeout: float,
) -> Any | None:
"""
Craft a TCP SYN with common options and send it. Returns the
SYN-ACK response packet or None on timeout/failure.
"""
from scapy.all import IP, TCP, conf, sr1
# Suppress scapy's noisy output
conf.verb = 0
src_port = random.randint(49152, 65535) # nosec B311 — ephemeral port, not crypto
pkt = (
IP(dst=host)
/ TCP(
sport=src_port,
dport=port,
flags="S",
options=[
("MSS", 1460),
("NOP", None),
("WScale", 7),
("NOP", None),
("NOP", None),
("Timestamp", (0, 0)),
("SAckOK", b""),
("EOL", None),
],
)
)
try:
resp = sr1(pkt, timeout=timeout, verbose=0)
except (OSError, PermissionError):
return None
if resp is None:
return None
# Verify it's a SYN-ACK (flags == 0x12)
from scapy.all import TCP as TCPLayer
if not resp.haslayer(TCPLayer):
return None
if resp[TCPLayer].flags != 0x12: # SYN-ACK
return None
# Send RST to clean up half-open connection
_send_rst(host, port, src_port, resp)
return resp
def _send_rst(
host: str,
dport: int,
sport: int,
resp: Any,
) -> None:
"""Send RST to clean up the half-open connection."""
try:
from scapy.all import IP, TCP, send
rst = (
IP(dst=host)
/ TCP(
sport=sport,
dport=dport,
flags="R",
seq=resp.ack,
)
)
send(rst, verbose=0)
except Exception: # nosec B110 — best-effort RST cleanup
pass
# ─── Response parsing ───────────────────────────────────────────────────────
def _parse_synack(resp: Any) -> dict[str, Any]:
"""
Extract fingerprint fields from a scapy SYN-ACK response packet.
"""
from scapy.all import IP, TCP
ip_layer = resp[IP]
tcp_layer = resp[TCP]
# IP fields
ttl = ip_layer.ttl
df_bit = 1 if (ip_layer.flags & 0x2) else 0 # DF = bit 1
ip_id = ip_layer.id
# TCP fields
window_size = tcp_layer.window
# Parse TCP options
mss = 0
window_scale = -1
sack_ok = 0
timestamp = 0
options_order = _extract_options_order(tcp_layer.options)
for opt_name, opt_value in tcp_layer.options:
if opt_name == "MSS":
mss = opt_value
elif opt_name == "WScale":
window_scale = opt_value
elif opt_name in ("SAckOK", "SAck"):
sack_ok = 1
elif opt_name == "Timestamp":
timestamp = 1
return {
"ttl": ttl,
"window_size": window_size,
"df_bit": df_bit,
"ip_id": ip_id,
"mss": mss,
"window_scale": window_scale,
"sack_ok": sack_ok,
"timestamp": timestamp,
"options_order": options_order,
}
def _extract_options_order(options: list[tuple[str, Any]]) -> str:
"""
Map scapy TCP option tuples to a short-code string.
E.g. [("MSS", 1460), ("NOP", None), ("WScale", 7)] → "M,N,W"
"""
codes = []
for opt_name, _ in options:
code = _OPT_CODES.get(opt_name, "?")
codes.append(code)
return ",".join(codes)
# ─── Fingerprint computation ───────────────────────────────────────────────
def _compute_fingerprint(fields: dict[str, Any]) -> tuple[str, str]:
"""
Compute fingerprint raw string and SHA256 hash from parsed fields.
Returns (raw_string, hash_hex_32).
"""
raw = (
f"{fields['ttl']}:{fields['window_size']}:{fields['df_bit']}:"
f"{fields['mss']}:{fields['window_scale']}:{fields['sack_ok']}:"
f"{fields['timestamp']}:{fields['options_order']}"
)
h = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32]
return raw, h
# ─── Public API ─────────────────────────────────────────────────────────────
@_traced("prober.tcp_fingerprint")
def tcp_fingerprint(
host: str,
port: int,
timeout: float = 5.0,
) -> dict[str, Any] | None:
"""
Send a TCP SYN to host:port and fingerprint the SYN-ACK response.
Returns a dict with the hash, raw fingerprint string, and individual
fields, or None if no SYN-ACK was received.
Requires root/CAP_NET_RAW.
"""
resp = _send_syn(host, port, timeout)
if resp is None:
return None
fields = _parse_synack(resp)
raw, h = _compute_fingerprint(fields)
return {
"tcpfp_hash": h,
"tcpfp_raw": raw,
**fields,
}

View File

@@ -1,478 +0,0 @@
"""
DECNET-PROBER standalone worker.
Runs as a detached host-level process. Discovers attacker IPs by tailing the
collector's JSON log file, then fingerprints them via multiple active probes:
- JARM (TLS server fingerprinting)
- HASSHServer (SSH server fingerprinting)
- TCP/IP stack fingerprinting (OS/tool identification)
Results are written as RFC 5424 syslog + JSON to the same log files.
Target discovery is fully automatic — every unique attacker IP seen in the
log stream gets probed. No manual target list required.
Tech debt: writing directly to the collector's log files couples the
prober to the collector's file format. A future refactor should introduce
a shared log-sink abstraction.
"""
from __future__ import annotations
import asyncio
import json
import re
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from decnet.logging import get_logger
from decnet.prober.hassh import hassh_server
from decnet.prober.jarm import JARM_EMPTY_HASH, jarm_hash
from decnet.prober.tcpfp import tcp_fingerprint
from decnet.telemetry import traced as _traced
logger = get_logger("prober")
# ─── Default ports per probe type ───────────────────────────────────────────
# JARM: common C2 callback / TLS server ports
DEFAULT_PROBE_PORTS: list[int] = [
443, 8443, 8080, 4443, 50050, 2222, 993, 995, 8888, 9001,
]
# HASSHServer: common SSH server ports
DEFAULT_SSH_PORTS: list[int] = [22, 2222, 22222, 2022]
# TCP/IP stack: probe on ports commonly open on attacker machines.
# Wide spread gives the best chance of a SYN-ACK for TTL/fingerprint extraction.
DEFAULT_TCPFP_PORTS: list[int] = [22, 80, 443, 8080, 8443, 445, 3389]
# ─── RFC 5424 formatting (inline, mirrors templates/*/decnet_logging.py) ─────
_FACILITY_LOCAL0 = 16
_SD_ID = "relay@55555"
_SEVERITY_INFO = 6
_SEVERITY_WARNING = 4
_MAX_HOSTNAME = 255
_MAX_APPNAME = 48
_MAX_MSGID = 32
def _sd_escape(value: str) -> str:
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
def _sd_element(fields: dict[str, Any]) -> str:
if not fields:
return "-"
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
return f"[{_SD_ID} {params}]"
def _syslog_line(
event_type: str,
severity: int = _SEVERITY_INFO,
msg: str | None = None,
**fields: Any,
) -> str:
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
ts = datetime.now(timezone.utc).isoformat()
hostname = "decnet-prober"
appname = "prober"
msgid = (event_type or "-")[:_MAX_MSGID]
sd = _sd_element(fields)
message = f" {msg}" if msg else ""
return f"{pri}1 {ts} {hostname} {appname} - {msgid} {sd}{message}"
# ─── RFC 5424 parser (subset of collector's, for JSON generation) ─────────────
_RFC5424_RE = re.compile(
r"^<\d+>1 "
r"(\S+) " # 1: TIMESTAMP
r"(\S+) " # 2: HOSTNAME
r"(\S+) " # 3: APP-NAME
r"- " # PROCID
r"(\S+) " # 4: MSGID (event_type)
r"(.+)$", # 5: SD + MSG
)
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip", "target_ip")
def _parse_to_json(line: str) -> dict[str, Any] | None:
m = _RFC5424_RE.match(line)
if not m:
return None
ts_raw, decky, service, event_type, sd_rest = m.groups()
fields: dict[str, str] = {}
msg = ""
if sd_rest.startswith("["):
block = _SD_BLOCK_RE.search(sd_rest)
if block:
for k, v in _PARAM_RE.findall(block.group(1)):
fields[k] = v.replace('\\"', '"').replace("\\\\", "\\").replace("\\]", "]")
msg_match = re.search(r'\]\s+(.+)$', sd_rest)
if msg_match:
msg = msg_match.group(1).strip()
attacker_ip = "Unknown"
for fname in _IP_FIELDS:
if fname in fields:
attacker_ip = fields[fname]
break
try:
ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S")
except ValueError:
ts_formatted = ts_raw
return {
"timestamp": ts_formatted,
"decky": decky,
"service": service,
"event_type": event_type,
"attacker_ip": attacker_ip,
"fields": fields,
"msg": msg,
"raw_line": line,
}
# ─── Log writer ──────────────────────────────────────────────────────────────
def _write_event(
log_path: Path,
json_path: Path,
event_type: str,
severity: int = _SEVERITY_INFO,
msg: str | None = None,
**fields: Any,
) -> None:
line = _syslog_line(event_type, severity=severity, msg=msg, **fields)
with open(log_path, "a", encoding="utf-8") as f:
f.write(line + "\n")
f.flush()
parsed = _parse_to_json(line)
if parsed:
with open(json_path, "a", encoding="utf-8") as f:
f.write(json.dumps(parsed) + "\n")
f.flush()
# ─── Target discovery from log stream ────────────────────────────────────────
@_traced("prober.discover_attackers")
def _discover_attackers(json_path: Path, position: int) -> tuple[set[str], int]:
"""
Read new JSON log lines from the given position and extract unique
attacker IPs. Returns (new_ips, new_position).
Only considers IPs that are not "Unknown" and come from events that
indicate real attacker interaction (not prober's own events).
"""
new_ips: set[str] = set()
if not json_path.exists():
return new_ips, position
size = json_path.stat().st_size
if size < position:
position = 0 # file rotated
if size == position:
return new_ips, position
with open(json_path, "r", encoding="utf-8", errors="replace") as f:
f.seek(position)
while True:
line = f.readline()
if not line:
break
if not line.endswith("\n"):
break # partial line
try:
record = json.loads(line.strip())
except json.JSONDecodeError:
position = f.tell()
continue
# Skip our own events
if record.get("service") == "prober":
position = f.tell()
continue
ip = record.get("attacker_ip", "Unknown")
if ip != "Unknown" and ip:
new_ips.add(ip)
position = f.tell()
return new_ips, position
# ─── Probe cycle ─────────────────────────────────────────────────────────────
@_traced("prober.probe_cycle")
def _probe_cycle(
targets: set[str],
probed: dict[str, dict[str, set[int]]],
jarm_ports: list[int],
ssh_ports: list[int],
tcpfp_ports: list[int],
log_path: Path,
json_path: Path,
timeout: float = 5.0,
) -> None:
"""
Probe all known attacker IPs with JARM, HASSH, and TCP/IP fingerprinting.
Args:
targets: set of attacker IPs to probe
probed: dict mapping IP -> {probe_type -> set of ports already probed}
jarm_ports: TLS ports for JARM fingerprinting
ssh_ports: SSH ports for HASSHServer fingerprinting
tcpfp_ports: ports for TCP/IP stack fingerprinting
log_path: RFC 5424 log file
json_path: JSON log file
timeout: per-probe TCP timeout
"""
for ip in sorted(targets):
ip_probed = probed.setdefault(ip, {})
# Phase 1: JARM (TLS fingerprinting)
_jarm_phase(ip, ip_probed, jarm_ports, log_path, json_path, timeout)
# Phase 2: HASSHServer (SSH fingerprinting)
_hassh_phase(ip, ip_probed, ssh_ports, log_path, json_path, timeout)
# Phase 3: TCP/IP stack fingerprinting
_tcpfp_phase(ip, ip_probed, tcpfp_ports, log_path, json_path, timeout)
@_traced("prober.jarm_phase")
def _jarm_phase(
ip: str,
ip_probed: dict[str, set[int]],
ports: list[int],
log_path: Path,
json_path: Path,
timeout: float,
) -> None:
"""JARM-fingerprint an IP on the given TLS ports."""
done = ip_probed.setdefault("jarm", set())
for port in ports:
if port in done:
continue
try:
h = jarm_hash(ip, port, timeout=timeout)
done.add(port)
if h == JARM_EMPTY_HASH:
continue
_write_event(
log_path, json_path,
"jarm_fingerprint",
target_ip=ip,
target_port=str(port),
jarm_hash=h,
msg=f"JARM {ip}:{port} = {h}",
)
logger.info("prober: JARM %s:%d = %s", ip, port, h)
except Exception as exc:
done.add(port)
_write_event(
log_path, json_path,
"prober_error",
severity=_SEVERITY_WARNING,
target_ip=ip,
target_port=str(port),
error=str(exc),
msg=f"JARM probe failed for {ip}:{port}: {exc}",
)
logger.warning("prober: JARM probe failed %s:%d: %s", ip, port, exc)
@_traced("prober.hassh_phase")
def _hassh_phase(
ip: str,
ip_probed: dict[str, set[int]],
ports: list[int],
log_path: Path,
json_path: Path,
timeout: float,
) -> None:
"""HASSHServer-fingerprint an IP on the given SSH ports."""
done = ip_probed.setdefault("hassh", set())
for port in ports:
if port in done:
continue
try:
result = hassh_server(ip, port, timeout=timeout)
done.add(port)
if result is None:
continue
_write_event(
log_path, json_path,
"hassh_fingerprint",
target_ip=ip,
target_port=str(port),
hassh_server_hash=result["hassh_server"],
ssh_banner=result["banner"],
kex_algorithms=result["kex_algorithms"],
encryption_s2c=result["encryption_s2c"],
mac_s2c=result["mac_s2c"],
compression_s2c=result["compression_s2c"],
msg=f"HASSH {ip}:{port} = {result['hassh_server']}",
)
logger.info("prober: HASSH %s:%d = %s", ip, port, result["hassh_server"])
except Exception as exc:
done.add(port)
_write_event(
log_path, json_path,
"prober_error",
severity=_SEVERITY_WARNING,
target_ip=ip,
target_port=str(port),
error=str(exc),
msg=f"HASSH probe failed for {ip}:{port}: {exc}",
)
logger.warning("prober: HASSH probe failed %s:%d: %s", ip, port, exc)
@_traced("prober.tcpfp_phase")
def _tcpfp_phase(
ip: str,
ip_probed: dict[str, set[int]],
ports: list[int],
log_path: Path,
json_path: Path,
timeout: float,
) -> None:
"""TCP/IP stack fingerprint an IP on the given ports."""
done = ip_probed.setdefault("tcpfp", set())
for port in ports:
if port in done:
continue
try:
result = tcp_fingerprint(ip, port, timeout=timeout)
done.add(port)
if result is None:
continue
_write_event(
log_path, json_path,
"tcpfp_fingerprint",
target_ip=ip,
target_port=str(port),
tcpfp_hash=result["tcpfp_hash"],
tcpfp_raw=result["tcpfp_raw"],
ttl=str(result["ttl"]),
window_size=str(result["window_size"]),
df_bit=str(result["df_bit"]),
mss=str(result["mss"]),
window_scale=str(result["window_scale"]),
sack_ok=str(result["sack_ok"]),
timestamp=str(result["timestamp"]),
options_order=result["options_order"],
msg=f"TCPFP {ip}:{port} = {result['tcpfp_hash']}",
)
logger.info("prober: TCPFP %s:%d = %s", ip, port, result["tcpfp_hash"])
except Exception as exc:
done.add(port)
_write_event(
log_path, json_path,
"prober_error",
severity=_SEVERITY_WARNING,
target_ip=ip,
target_port=str(port),
error=str(exc),
msg=f"TCPFP probe failed for {ip}:{port}: {exc}",
)
logger.warning("prober: TCPFP probe failed %s:%d: %s", ip, port, exc)
# ─── Main worker ─────────────────────────────────────────────────────────────
@_traced("prober.worker")
async def prober_worker(
log_file: str,
interval: int = 300,
timeout: float = 5.0,
ports: list[int] | None = None,
ssh_ports: list[int] | None = None,
tcpfp_ports: list[int] | None = None,
) -> None:
"""
Main entry point for the standalone prober process.
Discovers attacker IPs automatically by tailing the JSON log file,
then fingerprints each IP via JARM, HASSH, and TCP/IP stack probes.
Args:
log_file: base path for log files (RFC 5424 to .log, JSON to .json)
interval: seconds between probe cycles
timeout: per-probe TCP timeout
ports: JARM TLS ports (defaults to DEFAULT_PROBE_PORTS)
ssh_ports: HASSH SSH ports (defaults to DEFAULT_SSH_PORTS)
tcpfp_ports: TCP fingerprint ports (defaults to DEFAULT_TCPFP_PORTS)
"""
jarm_ports = ports or DEFAULT_PROBE_PORTS
hassh_ports = ssh_ports or DEFAULT_SSH_PORTS
tcp_ports = tcpfp_ports or DEFAULT_TCPFP_PORTS
all_ports_str = (
f"jarm={','.join(str(p) for p in jarm_ports)} "
f"ssh={','.join(str(p) for p in hassh_ports)} "
f"tcpfp={','.join(str(p) for p in tcp_ports)}"
)
log_path = Path(log_file)
json_path = log_path.with_suffix(".json")
log_path.parent.mkdir(parents=True, exist_ok=True)
logger.info(
"prober started interval=%ds %s log=%s",
interval, all_ports_str, log_path,
)
_write_event(
log_path, json_path,
"prober_startup",
interval=str(interval),
probe_ports=all_ports_str,
msg=f"DECNET-PROBER started, interval {interval}s, {all_ports_str}",
)
known_attackers: set[str] = set()
probed: dict[str, dict[str, set[int]]] = {} # IP -> {type -> ports}
log_position: int = 0
while True:
# Discover new attacker IPs from the log stream
new_ips, log_position = await asyncio.to_thread(
_discover_attackers, json_path, log_position,
)
if new_ips - known_attackers:
fresh = new_ips - known_attackers
known_attackers.update(fresh)
logger.info(
"prober: discovered %d new attacker(s), total=%d",
len(fresh), len(known_attackers),
)
if known_attackers:
await asyncio.to_thread(
_probe_cycle, known_attackers, probed,
jarm_ports, hassh_ports, tcp_ports,
log_path, json_path, timeout,
)
await asyncio.sleep(interval)

View File

@@ -1,5 +0,0 @@
"""DECNET profiler — standalone attacker profile builder worker."""
from decnet.profiler.worker import attacker_profile_worker
__all__ = ["attacker_profile_worker"]

View File

@@ -1,602 +0,0 @@
"""
Behavioral and timing analysis for DECNET attacker profiles.
Consumes the chronological `LogEvent` stream already built by
`decnet.correlation.engine.CorrelationEngine` and derives per-IP metrics:
- Inter-event timing statistics (mean / median / stdev / min / max)
- Coefficient-of-variation (jitter metric)
- Beaconing vs. interactive vs. scanning vs. brute_force vs. slow_scan
classification
- Tool attribution against known C2 frameworks (Cobalt Strike, Sliver,
Havoc, Mythic) using default beacon/jitter profiles — returns a list,
since multiple tools can be in use simultaneously
- Header-based tool detection (Nmap NSE, Gophish, Nikto, sqlmap, etc.)
from HTTP request events
- Recon → exfil phase sequencing (latency between the last recon event
and the first exfil-like event)
- OS / TCP fingerprint + retransmit rollup from sniffer-emitted events,
with TTL-based fallback when p0f returns no match
Pure-Python; no external dependencies. All functions are safe to call from
both sync and async contexts.
"""
from __future__ import annotations
import json
import re
import statistics
from collections import Counter
from typing import Any
from decnet.correlation.parser import LogEvent
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
# ─── Event-type taxonomy ────────────────────────────────────────────────────
# Sniffer-emitted packet events that feed into fingerprint rollup.
_SNIFFER_SYN_EVENT: str = "tcp_syn_fingerprint"
_SNIFFER_FLOW_EVENT: str = "tcp_flow_timing"
# Prober-emitted active-probe result (SYN-ACK fingerprint of attacker machine).
_PROBER_TCPFP_EVENT: str = "tcpfp_fingerprint"
# Canonical initial TTL for each coarse OS bucket. Used to derive hop
# distance when only the observed TTL is available (prober path).
_INITIAL_TTL: dict[str, int] = {
"linux": 64,
"windows": 128,
"embedded": 255,
}
# Events that signal "recon" phase (scans, probes, auth attempts).
_RECON_EVENT_TYPES: frozenset[str] = frozenset({
"scan", "connection", "banner", "probe",
"login_attempt", "auth", "auth_failure",
})
# Events that signal "exfil" / action-on-objective phase.
_EXFIL_EVENT_TYPES: frozenset[str] = frozenset({
"download", "upload", "file_transfer", "data_exfil",
"command", "exec", "query", "shell_input",
})
# Fields carrying payload byte counts (for "large payload" detection).
_PAYLOAD_SIZE_FIELDS: tuple[str, ...] = ("bytes", "size", "content_length")
# ─── C2 tool attribution signatures (beacon timing) ─────────────────────────
#
# Each entry lists the default beacon cadence profile of a popular C2.
# A profile *matches* an attacker when:
# - mean inter-event time is within ±`interval_tolerance` seconds, AND
# - jitter (cv = stdev / mean) is within ±`jitter_tolerance`
#
# Multiple matches are all returned (attacker may run multiple implants).
_TOOL_SIGNATURES: tuple[dict[str, Any], ...] = (
{
"name": "cobalt_strike",
"interval_s": 60.0,
"interval_tolerance_s": 8.0,
"jitter_cv": 0.20,
"jitter_tolerance": 0.05,
},
{
"name": "sliver",
"interval_s": 60.0,
"interval_tolerance_s": 10.0,
"jitter_cv": 0.30,
"jitter_tolerance": 0.08,
},
{
"name": "havoc",
"interval_s": 45.0,
"interval_tolerance_s": 8.0,
"jitter_cv": 0.10,
"jitter_tolerance": 0.03,
},
{
"name": "mythic",
"interval_s": 30.0,
"interval_tolerance_s": 6.0,
"jitter_cv": 0.15,
"jitter_tolerance": 0.03,
},
)
# ─── Header-based tool signatures ───────────────────────────────────────────
#
# Scanned against HTTP `request` events. `pattern` is a case-insensitive
# substring (or a regex anchored with ^ if it starts with that character).
# `header` is matched case-insensitively against the event's headers dict.
_HEADER_TOOL_SIGNATURES: tuple[dict[str, str], ...] = (
{"name": "nmap", "header": "user-agent", "pattern": "Nmap Scripting Engine"},
{"name": "gophish", "header": "x-mailer", "pattern": "gophish"},
{"name": "nikto", "header": "user-agent", "pattern": "Nikto"},
{"name": "sqlmap", "header": "user-agent", "pattern": "sqlmap"},
{"name": "nuclei", "header": "user-agent", "pattern": "Nuclei"},
{"name": "masscan", "header": "user-agent", "pattern": "masscan"},
{"name": "zgrab", "header": "user-agent", "pattern": "zgrab"},
{"name": "metasploit", "header": "user-agent", "pattern": "Metasploit"},
{"name": "curl", "header": "user-agent", "pattern": "^curl/"},
{"name": "python_requests", "header": "user-agent", "pattern": "python-requests"},
{"name": "gobuster", "header": "user-agent", "pattern": "gobuster"},
{"name": "dirbuster", "header": "user-agent", "pattern": "DirBuster"},
{"name": "hydra", "header": "user-agent", "pattern": "hydra"},
{"name": "wfuzz", "header": "user-agent", "pattern": "Wfuzz"},
)
# ─── TTL → coarse OS bucket (fallback when p0f returns nothing) ─────────────
def _os_from_ttl(ttl_str: str | None) -> str | None:
"""Derive a coarse OS guess from observed TTL when p0f has no match."""
if not ttl_str:
return None
try:
ttl = int(ttl_str)
except (TypeError, ValueError):
return None
if 55 <= ttl <= 70:
return "linux"
if 115 <= ttl <= 135:
return "windows"
if 235 <= ttl <= 255:
return "embedded"
return None
# ─── Timing stats ───────────────────────────────────────────────────────────
@_traced("profiler.timing_stats")
def timing_stats(events: list[LogEvent]) -> dict[str, Any]:
"""
Compute inter-arrival-time statistics across *events* (sorted by ts).
Returns a dict with:
mean_iat_s, median_iat_s, stdev_iat_s, min_iat_s, max_iat_s, cv,
event_count, duration_s
For n < 2 events the interval-based fields are None/0.
"""
if not events:
return {
"event_count": 0,
"duration_s": 0.0,
"mean_iat_s": None,
"median_iat_s": None,
"stdev_iat_s": None,
"min_iat_s": None,
"max_iat_s": None,
"cv": None,
}
sorted_events = sorted(events, key=lambda e: e.timestamp)
duration_s = (sorted_events[-1].timestamp - sorted_events[0].timestamp).total_seconds()
if len(sorted_events) < 2:
return {
"event_count": len(sorted_events),
"duration_s": round(duration_s, 3),
"mean_iat_s": None,
"median_iat_s": None,
"stdev_iat_s": None,
"min_iat_s": None,
"max_iat_s": None,
"cv": None,
}
iats = [
(sorted_events[i].timestamp - sorted_events[i - 1].timestamp).total_seconds()
for i in range(1, len(sorted_events))
]
# Exclude spuriously-negative (clock-skew) intervals.
iats = [v for v in iats if v >= 0]
if not iats:
return {
"event_count": len(sorted_events),
"duration_s": round(duration_s, 3),
"mean_iat_s": None,
"median_iat_s": None,
"stdev_iat_s": None,
"min_iat_s": None,
"max_iat_s": None,
"cv": None,
}
mean = statistics.fmean(iats)
median = statistics.median(iats)
stdev = statistics.pstdev(iats) if len(iats) > 1 else 0.0
cv = (stdev / mean) if mean > 0 else None
return {
"event_count": len(sorted_events),
"duration_s": round(duration_s, 3),
"mean_iat_s": round(mean, 3),
"median_iat_s": round(median, 3),
"stdev_iat_s": round(stdev, 3),
"min_iat_s": round(min(iats), 3),
"max_iat_s": round(max(iats), 3),
"cv": round(cv, 4) if cv is not None else None,
}
# ─── Behavior classification ────────────────────────────────────────────────
@_traced("profiler.classify_behavior")
def classify_behavior(stats: dict[str, Any], services_count: int) -> str:
"""
Coarse behavior bucket:
beaconing | interactive | scanning | brute_force | slow_scan | mixed | unknown
Heuristics (evaluated in priority order):
* `scanning` — ≥ 3 services touched OR mean IAT < 2 s, ≥ 3 events
* `brute_force` — 1 service, n ≥ 8, mean IAT < 5 s, CV < 0.6
* `beaconing` — CV < 0.35, mean IAT ≥ 5 s, ≥ 4 events
* `slow_scan` — ≥ 2 services, mean IAT ≥ 10 s, ≥ 4 events
* `interactive` — mean IAT < 5 s AND CV ≥ 0.5, ≥ 6 events
* `mixed` — catch-all for sessions with enough data
* `unknown` — too few data points
"""
n = stats.get("event_count") or 0
mean = stats.get("mean_iat_s")
cv = stats.get("cv")
if n < 3 or mean is None:
return "unknown"
# Slow scan / low-and-slow: multiple services with long gaps.
# Must be checked before generic scanning so slow multi-service sessions
# don't get mis-bucketed as a fast sweep.
if services_count >= 2 and mean >= 10.0 and n >= 4:
return "slow_scan"
# Scanning: broad service sweep (multi-service) or very rapid single-service bursts.
if n >= 3 and (
(services_count >= 3 and mean < 10.0)
or (services_count >= 2 and mean < 2.0)
):
return "scanning"
# Brute force: hammering one service rapidly and repeatedly.
if services_count == 1 and n >= 8 and mean < 5.0 and cv is not None and cv < 0.6:
return "brute_force"
# Beaconing: regular cadence over multiple events.
if cv is not None and cv < 0.35 and mean >= 5.0 and n >= 4:
return "beaconing"
# Interactive: short but irregular bursts (human or tool with think time).
if cv is not None and cv >= 0.5 and mean < 5.0 and n >= 6:
return "interactive"
return "mixed"
# ─── C2 tool attribution (beacon timing) ────────────────────────────────────
def guess_tools(mean_iat_s: float | None, cv: float | None) -> list[str]:
"""
Match (mean_iat, cv) against known C2 default beacon profiles.
Returns a list of all matching tool names (may be empty). Multiple
matches are all returned because an attacker can run several implants.
"""
if mean_iat_s is None or cv is None:
return []
hits: list[str] = []
for sig in _TOOL_SIGNATURES:
if abs(mean_iat_s - sig["interval_s"]) > sig["interval_tolerance_s"]:
continue
if abs(cv - sig["jitter_cv"]) > sig["jitter_tolerance"]:
continue
hits.append(sig["name"])
return hits
# Keep the old name as an alias so callers that expected a single string still
# compile, but mark it deprecated. Returns the first hit or None.
def guess_tool(mean_iat_s: float | None, cv: float | None) -> str | None:
"""Deprecated: use guess_tools() instead."""
hits = guess_tools(mean_iat_s, cv)
if len(hits) == 1:
return hits[0]
return None
# ─── Header-based tool detection ────────────────────────────────────────────
@_traced("profiler.detect_tools_from_headers")
def detect_tools_from_headers(events: list[LogEvent]) -> list[str]:
"""
Scan HTTP `request` events for tool-identifying headers.
Checks User-Agent, X-Mailer, and other headers case-insensitively
against `_HEADER_TOOL_SIGNATURES`. Returns a deduplicated list of
matched tool names in detection order.
"""
found: list[str] = []
seen: set[str] = set()
for e in events:
if e.event_type != "request":
continue
raw_headers = e.fields.get("headers")
if not raw_headers:
continue
# headers may arrive as a JSON string, a Python-repr string (legacy),
# or a dict already (in-memory / test paths).
if isinstance(raw_headers, str):
try:
headers: dict[str, str] = json.loads(raw_headers)
except (json.JSONDecodeError, ValueError):
# Backward-compat: events written before the JSON-encode fix
# were serialized as Python repr via str(dict). ast.literal_eval
# handles that safely (no arbitrary code execution).
try:
import ast as _ast
_parsed = _ast.literal_eval(raw_headers)
if isinstance(_parsed, dict):
headers = _parsed
else:
continue
except Exception: # nosec B112 — skip unparseable header values
continue
elif isinstance(raw_headers, dict):
headers = raw_headers
else:
continue
# Normalise header keys to lowercase for matching.
lc_headers: dict[str, str] = {k.lower(): str(v) for k, v in headers.items()}
for sig in _HEADER_TOOL_SIGNATURES:
name = sig["name"]
if name in seen:
continue
value = lc_headers.get(sig["header"])
if value is None:
continue
pattern = sig["pattern"]
if pattern.startswith("^"):
if re.match(pattern, value, re.IGNORECASE):
found.append(name)
seen.add(name)
else:
if pattern.lower() in value.lower():
found.append(name)
seen.add(name)
return found
# ─── Phase sequencing ───────────────────────────────────────────────────────
@_traced("profiler.phase_sequence")
def phase_sequence(events: list[LogEvent]) -> dict[str, Any]:
"""
Derive recon→exfil phase transition info.
Returns:
recon_end_ts : ISO timestamp of last recon-class event (or None)
exfil_start_ts : ISO timestamp of first exfil-class event (or None)
exfil_latency_s : seconds between them (None if not both present)
large_payload_count: count of events whose *fields* report a payload
≥ 1 MiB (heuristic for bulk data transfer)
"""
recon_end = None
exfil_start = None
large_payload_count = 0
for e in sorted(events, key=lambda x: x.timestamp):
if e.event_type in _RECON_EVENT_TYPES:
recon_end = e.timestamp
elif e.event_type in _EXFIL_EVENT_TYPES and exfil_start is None:
exfil_start = e.timestamp
for fname in _PAYLOAD_SIZE_FIELDS:
raw = e.fields.get(fname)
if raw is None:
continue
try:
if int(raw) >= 1_048_576:
large_payload_count += 1
break
except (TypeError, ValueError):
continue
latency: float | None = None
if recon_end is not None and exfil_start is not None and exfil_start >= recon_end:
latency = round((exfil_start - recon_end).total_seconds(), 3)
return {
"recon_end_ts": recon_end.isoformat() if recon_end else None,
"exfil_start_ts": exfil_start.isoformat() if exfil_start else None,
"exfil_latency_s": latency,
"large_payload_count": large_payload_count,
}
# ─── Sniffer rollup (OS fingerprint + retransmits) ──────────────────────────
@_traced("profiler.sniffer_rollup")
def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
"""
Roll up sniffer-emitted `tcp_syn_fingerprint` and `tcp_flow_timing`
events into a per-attacker summary.
OS guess priority:
1. Modal p0f label from os_guess field (if not "unknown"/empty).
2. TTL-based coarse bucket (linux / windows / embedded) as fallback.
Hop distance: median of non-zero reported values only.
"""
os_guesses: list[str] = []
ttl_values: list[str] = []
hops: list[int] = []
tcp_fp: dict[str, Any] | None = None
retransmits = 0
for e in events:
if e.event_type == _SNIFFER_SYN_EVENT:
og = e.fields.get("os_guess")
if og and og != "unknown":
os_guesses.append(og)
# Collect raw TTL for fallback OS derivation.
ttl_raw = e.fields.get("ttl") or e.fields.get("initial_ttl")
if ttl_raw:
ttl_values.append(ttl_raw)
# Only include hop distances that are valid and non-zero.
hop_raw = e.fields.get("hop_distance")
if hop_raw:
try:
hop_val = int(hop_raw)
if hop_val > 0:
hops.append(hop_val)
except (TypeError, ValueError):
pass
# Keep the latest fingerprint snapshot.
tcp_fp = {
"window": _int_or_none(e.fields.get("window")),
"wscale": _int_or_none(e.fields.get("wscale")),
"mss": _int_or_none(e.fields.get("mss")),
"options_sig": e.fields.get("options_sig", ""),
"has_sack": e.fields.get("has_sack") == "true",
"has_timestamps": e.fields.get("has_timestamps") == "true",
}
elif e.event_type == _SNIFFER_FLOW_EVENT:
try:
retransmits += int(e.fields.get("retransmits", "0"))
except (TypeError, ValueError):
pass
elif e.event_type == _PROBER_TCPFP_EVENT:
# Active-probe result: prober sent SYN to attacker, got SYN-ACK back.
# Field names differ from the passive sniffer (different emitter).
ttl_raw = e.fields.get("ttl")
if ttl_raw:
ttl_values.append(ttl_raw)
# Derive hop distance from observed TTL vs canonical initial TTL.
os_hint = _os_from_ttl(ttl_raw)
if os_hint:
initial = _INITIAL_TTL.get(os_hint)
if initial:
try:
hop_val = initial - int(ttl_raw)
if hop_val > 0:
hops.append(hop_val)
except (TypeError, ValueError):
pass
# Prober uses window_size/window_scale/options_order instead of
# the sniffer's window/wscale/options_sig.
tcp_fp = {
"window": _int_or_none(e.fields.get("window_size")),
"wscale": _int_or_none(e.fields.get("window_scale")),
"mss": _int_or_none(e.fields.get("mss")),
"options_sig": e.fields.get("options_order", ""),
"has_sack": e.fields.get("sack_ok") == "1",
"has_timestamps": e.fields.get("timestamp") == "1",
}
# Mode for the OS bucket — most frequently observed label.
os_guess: str | None = None
if os_guesses:
os_guess = Counter(os_guesses).most_common(1)[0][0]
else:
# TTL-based fallback: use the most common observed TTL value.
if ttl_values:
modal_ttl = Counter(ttl_values).most_common(1)[0][0]
os_guess = _os_from_ttl(modal_ttl)
# Median hop distance (robust to the occasional weird TTL).
hop_distance: int | None = None
if hops:
hop_distance = int(statistics.median(hops))
return {
"os_guess": os_guess,
"hop_distance": hop_distance,
"tcp_fingerprint": tcp_fp or {},
"retransmit_count": retransmits,
}
def _int_or_none(v: Any) -> int | None:
if v is None or v == "":
return None
try:
return int(v)
except (TypeError, ValueError):
return None
# ─── Composite: build the full AttackerBehavior record ──────────────────────
@_traced("profiler.build_behavior_record")
def build_behavior_record(events: list[LogEvent]) -> dict[str, Any]:
"""
Build the dict to persist in the `attacker_behavior` table.
Callers (profiler worker) pre-serialize JSON-typed fields; we do the
JSON encoding here to keep the repo layer schema-agnostic.
"""
# Timing stats are computed across *all* events (not filtered), because
# a C2 beacon often reuses the same "connection" event_type on each
# check-in. Filtering would throw that signal away.
stats = timing_stats(events)
services = {e.service for e in events}
behavior = classify_behavior(stats, len(services))
rollup = sniffer_rollup(events)
phase = phase_sequence(events)
# Combine beacon-timing tool matches with header-based detections.
beacon_tools = guess_tools(stats.get("mean_iat_s"), stats.get("cv"))
header_tools = detect_tools_from_headers(events)
all_tools: list[str] = list(dict.fromkeys(beacon_tools + header_tools)) # dedup, preserve order
# Promote TCP-level scanner identification to tool_guesses.
# p0f fingerprints nmap from the TCP handshake alone — this fires even
# when no HTTP service is present, making it far more reliable than the
# header-based path for raw port scans.
if rollup["os_guess"] == "nmap" and "nmap" not in all_tools:
all_tools.insert(0, "nmap")
# Beacon-specific projection: only surface interval/jitter when we've
# classified the flow as beaconing (otherwise these numbers are noise).
beacon_interval_s: float | None = None
beacon_jitter_pct: float | None = None
if behavior == "beaconing":
beacon_interval_s = stats.get("mean_iat_s")
cv = stats.get("cv")
beacon_jitter_pct = round(cv * 100, 2) if cv is not None else None
_tracer = _get_tracer("profiler")
with _tracer.start_as_current_span("profiler.behavior_summary") as _span:
_span.set_attribute("behavior_class", behavior)
_span.set_attribute("os_guess", rollup["os_guess"] or "unknown")
_span.set_attribute("tool_count", len(all_tools))
_span.set_attribute("event_count", stats.get("event_count", 0))
if all_tools:
_span.set_attribute("tools", ",".join(all_tools))
return {
"os_guess": rollup["os_guess"],
"hop_distance": rollup["hop_distance"],
"tcp_fingerprint": json.dumps(rollup["tcp_fingerprint"]),
"retransmit_count": rollup["retransmit_count"],
"behavior_class": behavior,
"beacon_interval_s": beacon_interval_s,
"beacon_jitter_pct": beacon_jitter_pct,
"tool_guesses": json.dumps(all_tools),
"timing_stats": json.dumps(stats),
"phase_sequence": json.dumps(phase),
}

View File

@@ -1,215 +0,0 @@
"""
Attacker profile builder — incremental background worker.
Maintains a persistent CorrelationEngine and a log-ID cursor across cycles.
On cold start (first cycle or process restart), performs one full build from
all stored logs. Subsequent cycles fetch only new logs via the cursor,
ingest them into the existing engine, and rebuild profiles for affected IPs
only.
Complexity per cycle: O(new_logs + affected_ips) instead of O(total_logs²).
"""
from __future__ import annotations
import asyncio
import json
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
from decnet.correlation.engine import CorrelationEngine
from decnet.correlation.parser import LogEvent
from decnet.logging import get_logger
from decnet.profiler.behavioral import build_behavior_record
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
from decnet.web.db.repository import BaseRepository
logger = get_logger("attacker_worker")
_BATCH_SIZE = 500
_STATE_KEY = "attacker_worker_cursor"
# Event types that indicate active command/query execution (not just connection/scan)
_COMMAND_EVENT_TYPES = frozenset({
"command", "exec", "query", "input", "shell_input",
"execute", "run", "sql_query", "redis_command",
})
# Fields that carry the executed command/query text
_COMMAND_FIELDS = ("command", "query", "input", "line", "sql", "cmd")
@dataclass
class _WorkerState:
engine: CorrelationEngine = field(default_factory=CorrelationEngine)
last_log_id: int = 0
initialized: bool = False
async def attacker_profile_worker(repo: BaseRepository, *, interval: int = 30) -> None:
"""Periodically updates the Attacker table incrementally. Designed to run as an asyncio Task."""
logger.info("attacker profile worker started interval=%ds", interval)
state = _WorkerState()
_saved_cursor = await repo.get_state(_STATE_KEY)
if _saved_cursor:
state.last_log_id = _saved_cursor.get("last_log_id", 0)
state.initialized = True
logger.info("attacker worker: resumed from cursor last_log_id=%d", state.last_log_id)
while True:
await asyncio.sleep(interval)
try:
await _incremental_update(repo, state)
except Exception as exc:
logger.error("attacker worker: update failed: %s", exc)
@_traced("profiler.incremental_update")
async def _incremental_update(repo: BaseRepository, state: _WorkerState) -> None:
was_cold = not state.initialized
affected_ips: set[str] = set()
while True:
batch = await repo.get_logs_after_id(state.last_log_id, limit=_BATCH_SIZE)
if not batch:
break
for row in batch:
event = state.engine.ingest(row["raw_line"])
if event and event.attacker_ip:
affected_ips.add(event.attacker_ip)
state.last_log_id = row["id"]
await asyncio.sleep(0) # yield to event loop after each batch
if len(batch) < _BATCH_SIZE:
break
state.initialized = True
if not affected_ips:
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
return
await _update_profiles(repo, state, affected_ips)
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
if was_cold:
logger.info("attacker worker: cold start rebuilt %d profiles", len(affected_ips))
else:
logger.info("attacker worker: updated %d profiles (incremental)", len(affected_ips))
@_traced("profiler.update_profiles")
async def _update_profiles(
repo: BaseRepository,
state: _WorkerState,
ips: set[str],
) -> None:
traversal_map = {t.attacker_ip: t for t in state.engine.traversals(min_deckies=2)}
bounties_map = await repo.get_bounties_for_ips(ips)
_tracer = _get_tracer("profiler")
for ip in ips:
events = state.engine._events.get(ip, [])
if not events:
continue
with _tracer.start_as_current_span("profiler.process_ip") as _span:
_span.set_attribute("attacker_ip", ip)
_span.set_attribute("event_count", len(events))
traversal = traversal_map.get(ip)
bounties = bounties_map.get(ip, [])
commands = _extract_commands_from_events(events)
record = _build_record(ip, events, traversal, bounties, commands)
attacker_uuid = await repo.upsert_attacker(record)
_span.set_attribute("is_traversal", traversal is not None)
_span.set_attribute("bounty_count", len(bounties))
_span.set_attribute("command_count", len(commands))
# Behavioral / fingerprint rollup lives in a sibling table so failures
# here never block the core attacker profile upsert.
try:
behavior = build_behavior_record(events)
await repo.upsert_attacker_behavior(attacker_uuid, behavior)
except Exception as exc:
_span.record_exception(exc)
logger.error("attacker worker: behavior upsert failed for %s: %s", ip, exc)
def _build_record(
ip: str,
events: list[LogEvent],
traversal: Any,
bounties: list[dict[str, Any]],
commands: list[dict[str, Any]],
) -> dict[str, Any]:
services = sorted({e.service for e in events})
deckies = (
traversal.deckies
if traversal
else _first_contact_deckies(events)
)
fingerprints = [b for b in bounties if b.get("bounty_type") == "fingerprint"]
credential_count = sum(1 for b in bounties if b.get("bounty_type") == "credential")
return {
"ip": ip,
"first_seen": min(e.timestamp for e in events),
"last_seen": max(e.timestamp for e in events),
"event_count": len(events),
"service_count": len(services),
"decky_count": len({e.decky for e in events}),
"services": json.dumps(services),
"deckies": json.dumps(deckies),
"traversal_path": traversal.path if traversal else None,
"is_traversal": traversal is not None,
"bounty_count": len(bounties),
"credential_count": credential_count,
"fingerprints": json.dumps(fingerprints),
"commands": json.dumps(commands),
"updated_at": datetime.now(timezone.utc),
}
def _first_contact_deckies(events: list[LogEvent]) -> list[str]:
"""Return unique deckies in first-contact order (for non-traversal attackers)."""
seen: list[str] = []
for e in sorted(events, key=lambda x: x.timestamp):
if e.decky not in seen:
seen.append(e.decky)
return seen
def _extract_commands_from_events(events: list[LogEvent]) -> list[dict[str, Any]]:
"""
Extract executed commands from LogEvent objects.
Works directly on LogEvent.fields (already a dict), so no JSON parsing needed.
"""
commands: list[dict[str, Any]] = []
for event in events:
if event.event_type not in _COMMAND_EVENT_TYPES:
continue
cmd_text: str | None = None
for key in _COMMAND_FIELDS:
val = event.fields.get(key)
if val:
cmd_text = str(val)
break
if not cmd_text:
continue
commands.append({
"service": event.service,
"decky": event.decky,
"command": cmd_text,
"timestamp": event.timestamp.isoformat(),
})
return commands

View File

@@ -13,7 +13,6 @@ class BaseService(ABC):
name: str # unique slug, e.g. "ssh", "smb"
ports: list[int] # ports this service listens on inside the container
default_image: str # Docker image tag, or "build" if a Dockerfile is needed
fleet_singleton: bool = False # True = runs once fleet-wide, not per-decky
@abstractmethod
def compose_fragment(

View File

@@ -1,35 +1,26 @@
from pathlib import Path
from decnet.services.base import BaseService
class ConpotService(BaseService):
"""ICS/SCADA honeypot covering Modbus (502), SNMP (161 UDP), and HTTP (80).
Uses a custom build context wrapping the official honeynet/conpot image
to fix Modbus binding to port 502.
Uses the official honeynet/conpot image which ships a default ICS profile
that emulates a Siemens S7-200 PLC.
"""
name = "conpot"
ports = [502, 161, 80]
default_image = "build"
default_image = "honeynet/conpot"
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
env = {
"CONPOT_TEMPLATE": "default",
"NODE_NAME": decky_name,
}
if log_target:
env["LOG_TARGET"] = log_target
return {
"build": {
"context": str(self.dockerfile_context()),
"args": {"BASE_IMAGE": "honeynet/conpot:latest"},
},
"image": "honeynet/conpot",
"container_name": f"{decky_name}-conpot",
"restart": "unless-stopped",
"environment": env,
"environment": {
"CONPOT_TEMPLATE": "default",
},
}
def dockerfile_context(self):
return Path(__file__).parent.parent / "templates" / "conpot"
return None

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "docker_api"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "docker_api"
class DockerAPIService(BaseService):

View File

@@ -2,7 +2,7 @@ from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "elasticsearch"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "elasticsearch"
class ElasticsearchService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ftp"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "ftp"
class FTPService(BaseService):

View File

@@ -2,7 +2,7 @@ import json
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "http"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "http"
class HTTPService(BaseService):

View File

@@ -1,59 +0,0 @@
import json
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "https"
class HTTPSService(BaseService):
name = "https"
ports = [443]
default_image = "build"
def compose_fragment(
self,
decky_name: str,
log_target: str | None = None,
service_cfg: dict | None = None,
) -> dict:
cfg = service_cfg or {}
fragment: dict = {
"build": {"context": str(TEMPLATES_DIR)},
"container_name": f"{decky_name}-https",
"restart": "unless-stopped",
"environment": {
"NODE_NAME": decky_name,
},
}
if log_target:
fragment["environment"]["LOG_TARGET"] = log_target
# Optional persona overrides — only injected when explicitly set
if "server_header" in cfg:
fragment["environment"]["SERVER_HEADER"] = cfg["server_header"]
if "response_code" in cfg:
fragment["environment"]["RESPONSE_CODE"] = str(cfg["response_code"])
if "fake_app" in cfg:
fragment["environment"]["FAKE_APP"] = cfg["fake_app"]
if "extra_headers" in cfg:
val = cfg["extra_headers"]
fragment["environment"]["EXTRA_HEADERS"] = (
json.dumps(val) if isinstance(val, dict) else val
)
if "custom_body" in cfg:
fragment["environment"]["CUSTOM_BODY"] = cfg["custom_body"]
if "files" in cfg:
files_path = str(Path(cfg["files"]).resolve())
fragment["environment"]["FILES_DIR"] = "/opt/html_files"
fragment.setdefault("volumes", []).append(f"{files_path}:/opt/html_files:ro")
if "tls_cert" in cfg:
fragment["environment"]["TLS_CERT"] = cfg["tls_cert"]
if "tls_key" in cfg:
fragment["environment"]["TLS_KEY"] = cfg["tls_key"]
if "tls_cn" in cfg:
fragment["environment"]["TLS_CN"] = cfg["tls_cn"]
return fragment
def dockerfile_context(self) -> Path | None:
return TEMPLATES_DIR

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "imap"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "imap"
class IMAPService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "k8s"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "k8s"
class KubernetesAPIService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ldap"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "ldap"
class LDAPService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "llmnr"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "llmnr"
class LLMNRService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mongodb"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mongodb"
class MongoDBService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mqtt"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mqtt"
class MQTTService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mssql"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mssql"
class MSSQLService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mysql"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mysql"
class MySQLService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "pop3"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "pop3"
class POP3Service(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "postgres"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "postgres"
class PostgresService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "rdp"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "rdp"
class RDPService(BaseService):

View File

@@ -0,0 +1,46 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "real_ssh"
class RealSSHService(BaseService):
"""
Fully interactive OpenSSH server — no honeypot emulation.
Used for the deaddeck (entry-point machine). Attackers get a real shell.
Credentials are intentionally weak to invite exploitation.
service_cfg keys:
password Root password (default: "admin")
hostname Override container hostname
"""
name = "real_ssh"
ports = [22]
default_image = "build"
def compose_fragment(
self,
decky_name: str,
log_target: str | None = None,
service_cfg: dict | None = None,
) -> dict:
cfg = service_cfg or {}
env: dict = {
"SSH_ROOT_PASSWORD": cfg.get("password", "admin"),
}
if "hostname" in cfg:
env["SSH_HOSTNAME"] = cfg["hostname"]
return {
"build": {"context": str(TEMPLATES_DIR)},
"container_name": f"{decky_name}-real-ssh",
"restart": "unless-stopped",
"cap_add": ["NET_BIND_SERVICE"],
"environment": env,
}
def dockerfile_context(self) -> Path:
return TEMPLATES_DIR

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "redis"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "redis"
class RedisService(BaseService):

View File

@@ -26,8 +26,6 @@ def _load_plugins() -> None:
continue
importlib.import_module(f"decnet.services.{module_info.name}")
for cls in BaseService.__subclasses__():
if not cls.__module__.startswith("decnet.services."):
continue
instance = cls()
_registry[instance.name] = instance
_loaded = True

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "sip"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "sip"
class SIPService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smb"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "smb"
class SMBService(BaseService):

View File

@@ -2,7 +2,7 @@ from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smtp"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "smtp"
class SMTPService(BaseService):

View File

@@ -1,43 +0,0 @@
from pathlib import Path
from decnet.services.base import BaseService
# Reuses the same template as the smtp service — only difference is
# SMTP_OPEN_RELAY=1 in the environment, which enables the open relay persona.
_TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smtp"
class SMTPRelayService(BaseService):
"""SMTP open relay bait — accepts any RCPT TO and delivers messages."""
name = "smtp_relay"
ports = [25, 587]
default_image = "build"
def compose_fragment(
self,
decky_name: str,
log_target: str | None = None,
service_cfg: dict | None = None,
) -> dict:
cfg = service_cfg or {}
fragment: dict = {
"build": {"context": str(_TEMPLATES_DIR)},
"container_name": f"{decky_name}-smtp_relay",
"restart": "unless-stopped",
"cap_add": ["NET_BIND_SERVICE"],
"environment": {
"NODE_NAME": decky_name,
"SMTP_OPEN_RELAY": "1",
},
}
if log_target:
fragment["environment"]["LOG_TARGET"] = log_target
if "banner" in cfg:
fragment["environment"]["SMTP_BANNER"] = cfg["banner"]
if "mta" in cfg:
fragment["environment"]["SMTP_MTA"] = cfg["mta"]
return fragment
def dockerfile_context(self) -> Path:
return _TEMPLATES_DIR

View File

@@ -1,41 +0,0 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "sniffer"
class SnifferService(BaseService):
"""
Passive network sniffer deployed alongside deckies on the MACVLAN.
Captures TLS handshakes in promiscuous mode and extracts JA3/JA3S hashes
plus connection metadata. Requires NET_RAW + NET_ADMIN capabilities.
No inbound ports — purely passive.
"""
name = "sniffer"
ports: list[int] = []
default_image = "build"
fleet_singleton = True
def compose_fragment(
self,
decky_name: str,
log_target: str | None = None,
service_cfg: dict | None = None,
) -> dict:
fragment: dict = {
"build": {"context": str(TEMPLATES_DIR)},
"container_name": f"{decky_name}-sniffer",
"restart": "unless-stopped",
"cap_add": ["NET_RAW", "NET_ADMIN"],
"environment": {
"NODE_NAME": decky_name,
},
}
if log_target:
fragment["environment"]["LOG_TARGET"] = log_target
return fragment
def dockerfile_context(self) -> Path | None:
return TEMPLATES_DIR

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "snmp"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "snmp"
class SNMPService(BaseService):

View File

@@ -1,26 +1,12 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ssh"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "cowrie"
class SSHService(BaseService):
"""
Interactive OpenSSH server for general-purpose deckies.
Replaced Cowrie emulation with a real sshd so fingerprinting tools and
experienced attackers cannot trivially identify the honeypot. Auth events,
sudo activity, and interactive commands are all forwarded to stdout as
RFC 5424 via the rsyslog bridge baked into the image.
service_cfg keys:
password Root password (default: "admin")
hostname Override container hostname
"""
name = "ssh"
ports = [22]
ports = [22, 2222]
default_image = "build"
def compose_fragment(
@@ -31,29 +17,35 @@ class SSHService(BaseService):
) -> dict:
cfg = service_cfg or {}
env: dict = {
"SSH_ROOT_PASSWORD": cfg.get("password", "admin"),
# NODE_NAME is the authoritative decky identifier for log
# attribution — matches the host path used for the artifacts
# bind mount below. The container hostname (optionally overridden
# via SSH_HOSTNAME) is cosmetic and may differ to keep the
# decoy looking heterogeneous.
"NODE_NAME": decky_name,
"COWRIE_HOSTNAME": decky_name,
"COWRIE_HONEYPOT_LISTEN_ENDPOINTS": "tcp:22:interface=0.0.0.0 tcp:2222:interface=0.0.0.0",
"COWRIE_SSH_LISTEN_ENDPOINTS": "tcp:22:interface=0.0.0.0 tcp:2222:interface=0.0.0.0",
}
if "hostname" in cfg:
env["SSH_HOSTNAME"] = cfg["hostname"]
if log_target:
host, port = log_target.rsplit(":", 1)
env["COWRIE_OUTPUT_TCP_ENABLED"] = "true"
env["COWRIE_OUTPUT_TCP_HOST"] = host
env["COWRIE_OUTPUT_TCP_PORT"] = port
# Optional persona overrides
if "kernel_version" in cfg:
env["COWRIE_HONEYPOT_KERNEL_VERSION"] = cfg["kernel_version"]
if "kernel_build_string" in cfg:
env["COWRIE_HONEYPOT_KERNEL_BUILD_STRING"] = cfg["kernel_build_string"]
if "hardware_platform" in cfg:
env["COWRIE_HONEYPOT_HARDWARE_PLATFORM"] = cfg["hardware_platform"]
if "ssh_banner" in cfg:
env["COWRIE_SSH_VERSION"] = cfg["ssh_banner"]
if "users" in cfg:
env["COWRIE_USERDB_ENTRIES"] = cfg["users"]
# File-catcher quarantine: bind-mount a per-decky host dir so attacker
# drops (scp/sftp/wget) are mirrored out-of-band for forensic analysis.
# The in-container path masquerades as systemd-coredump so `mount`/`df`
# from inside the container looks benign.
quarantine_host = f"/var/lib/decnet/artifacts/{decky_name}/ssh"
return {
"build": {"context": str(TEMPLATES_DIR)},
"container_name": f"{decky_name}-ssh",
"restart": "unless-stopped",
"cap_add": ["NET_BIND_SERVICE"],
"environment": env,
"volumes": [f"{quarantine_host}:/var/lib/systemd/coredump:rw"],
}
def dockerfile_context(self) -> Path:

View File

@@ -1,47 +1,31 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "telnet"
class TelnetService(BaseService):
"""
Real telnetd using busybox telnetd + rsyslog logging pipeline.
Replaced Cowrie emulation (which also started an SSH daemon on port 22)
with a real busybox telnetd so only port 23 is exposed and auth events
are logged as RFC 5424 via the same rsyslog bridge used by the SSH service.
service_cfg keys:
password Root password (default: "admin")
hostname Override container hostname
"""
name = "telnet"
ports = [23]
default_image = "build"
default_image = "cowrie/cowrie"
def compose_fragment(
self,
decky_name: str,
log_target: str | None = None,
service_cfg: dict | None = None,
) -> dict:
cfg = service_cfg or {}
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
env: dict = {
"TELNET_ROOT_PASSWORD": cfg.get("password", "admin"),
"COWRIE_HONEYPOT_HOSTNAME": decky_name,
"COWRIE_TELNET_ENABLED": "true",
"COWRIE_TELNET_LISTEN_ENDPOINTS": "tcp:23:interface=0.0.0.0",
# Disable SSH so this container is telnet-only
"COWRIE_SSH_ENABLED": "false",
}
if "hostname" in cfg:
env["TELNET_HOSTNAME"] = cfg["hostname"]
if log_target:
host, port = log_target.rsplit(":", 1)
env["COWRIE_OUTPUT_TCP_ENABLED"] = "true"
env["COWRIE_OUTPUT_TCP_HOST"] = host
env["COWRIE_OUTPUT_TCP_PORT"] = port
return {
"build": {"context": str(TEMPLATES_DIR)},
"image": "cowrie/cowrie",
"container_name": f"{decky_name}-telnet",
"restart": "unless-stopped",
"cap_add": ["NET_BIND_SERVICE"],
"environment": env,
}
def dockerfile_context(self) -> Path:
return TEMPLATES_DIR
def dockerfile_context(self):
return None

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "tftp"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "tftp"
class TFTPService(BaseService):

View File

@@ -1,7 +1,7 @@
from pathlib import Path
from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "vnc"
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "vnc"
class VNCService(BaseService):

View File

@@ -1,11 +0,0 @@
"""
Fleet-wide MACVLAN sniffer microservice.
Runs as a single host-side background task (not per-decky) that sniffs
all TLS traffic on the MACVLAN interface, extracts fingerprints, and
feeds events into the existing log pipeline.
"""
from decnet.sniffer.worker import sniffer_worker
__all__ = ["sniffer_worker"]

Some files were not shown because too many files have changed in this diff Show More