Compare commits
242 Commits
v0.2
...
e8d97281f7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e8d97281f7 | ||
| 8a2876fe86 | |||
| 3e8e4c9e1c | |||
| 64bc6fcb1d | |||
| af9d59d3ee | |||
| 4197441c01 | |||
| 1b70d6db87 | |||
| 038596776a | |||
| 692ac35ee4 | |||
| f064690452 | |||
| dd82cd3f39 | |||
| ff3e376726 | |||
| 47f2ca8d5f | |||
| da3e675f86 | |||
| 2febd921bc | |||
| 12b5c25cd7 | |||
| 5b70a34c94 | |||
| 4abfac1a98 | |||
| 9eca33938d | |||
| 195580c74d | |||
| 262a84ca53 | |||
| d1b7e94325 | |||
| 33d954a61c | |||
| bf01804736 | |||
| 62f7c88b90 | |||
| e411063075 | |||
| 148e51011c | |||
| 3ebd206bca | |||
| f576564f02 | |||
| 00d5799a79 | |||
| 14250cacad | |||
| 9d68bb45c7 | |||
| 07ec4bc269 | |||
| a63301c7a3 | |||
| df18cb44cc | |||
| 91549e6936 | |||
| e8e11b2896 | |||
| 585541016f | |||
| 5dad1bb315 | |||
| 6708f26e6b | |||
| 2bef3edb72 | |||
| d2cf1e8b3a | |||
| 6d7877c679 | |||
| ee9ade4cd5 | |||
| dad29249de | |||
| f91ba9a16e | |||
| 43b92c7bd6 | |||
| a0a241f65d | |||
| 42b5e4cd06 | |||
| 6245786289 | |||
| 5df995fda1 | |||
| 6d7567b6bb | |||
| dbaccde143 | |||
| b883f24ba2 | |||
| 79db999030 | |||
| cb1a1d1270 | |||
| 899ea559d9 | |||
| e67b6d7f73 | |||
| bc5f43c3f7 | |||
| ff4c993617 | |||
| e32fdf9cbf | |||
| 95ae175e1b | |||
| b4df9ea0a1 | |||
| 02f07c7962 | |||
| c6f7de30d2 | |||
| 37b22b76a5 | |||
| 43f140a87a | |||
| 3223bec615 | |||
| 2b1b962849 | |||
| 65fc9ac2b9 | |||
| 1e8b73c361 | |||
| 9b1299458d | |||
| 7894b9e073 | |||
| a266d6b17e | |||
| f5a5fec607 | |||
| 40d3e86e55 | |||
| ebeaf08a49 | |||
| 7765b36c50 | |||
| 8914c27220 | |||
| 4db9c7464c | |||
| 411a797120 | |||
| 3da5a2c4ee | |||
| bfc7af000a | |||
| 1e8ca4cc05 | |||
| a6430cac4c | |||
| 39d2077a3a | |||
| e2d6f857b5 | |||
| 811136e600 | |||
| 63b0a58527 | |||
| cd0057c129 | |||
| 0c77cdab32 | |||
| 8257bcc031 | |||
| d3b90679c5 | |||
| 6657d3e097 | |||
| 293da364a6 | |||
| d5e6ca1949 | |||
| a97696fa23 | |||
| 7864c72948 | |||
| 47a0480994 | |||
| 2bf886e18e | |||
| 8bdc5b98c9 | |||
| aa39be909a | |||
| 41fd496128 | |||
| 39dafaf384 | |||
| b0e00a6cc4 | |||
| 2843aafa1a | |||
| 766eeb3d83 | |||
| f462835373 | |||
| e356829234 | |||
| a5d6860124 | |||
| 8dd4c78b33 | |||
| 69510fb880 | |||
| 09d9f8595e | |||
| bfb3edbd4a | |||
| a773dddd5c | |||
| edc5c59f93 | |||
| 1f758a3669 | |||
| 6c22f9ba59 | |||
| 20fa1f9a63 | |||
| fb69a06ab3 | |||
| 1446f6da94 | |||
| e967aaabfb | |||
| 255c2e5eb7 | |||
| 2dd86fb3bb | |||
| 3106d03135 | |||
| 3cc5ba36e8 | |||
| 6301504c0e | |||
| de4b64d857 | |||
| b5d7bf818f | |||
| 257f780d0f | |||
| a10aee282f | |||
| 11b9e85874 | |||
| 45039bd621 | |||
| 4ea1c2ff4f | |||
| bb8d782e42 | |||
| 342916ca63 | |||
| d3f4bbb62b | |||
| 32340bea0d | |||
| f1e14280c0 | |||
| 931f33fb06 | |||
| 467511e997 | |||
| 3945e72e11 | |||
| bd406090a7 | |||
| e22d057e68 | |||
| cb12e7c475 | |||
| c29ca977fd | |||
| bf4afac70f | |||
| 4b15b7eb35 | |||
| 140d2fbaad | |||
| 064c8760b6 | |||
| 6572c5cbaf | |||
| ba448bae13 | |||
| 1a18377b0a | |||
| 319c1dbb61 | |||
| c1d8102253 | |||
| 49f3002c94 | |||
| 9b59f8672e | |||
| 296979003d | |||
| 89099b903d | |||
| 29578d9d99 | |||
| 70d8ffc607 | |||
| 04db13afae | |||
| d1a88e75bd | |||
| 65ddb0b359 | |||
| b437bc8eec | |||
| a1ca5d699b | |||
| e9d151734d | |||
| 0ab97d0ade | |||
| 60de16be84 | |||
| 82ec7f3117 | |||
| 11d749f13d | |||
| a4798946c1 | |||
| d869eb3d23 | |||
| 89887ec6fd | |||
| 02e73a19d5 | |||
| b3efd646f6 | |||
| 2ec64ef2ef | |||
| e67624452e | |||
| e05b632e56 | |||
| c8f05df4d9 | |||
| 935a9a58d2 | |||
| 63efe6c7ba | |||
| 314e6c6388 | |||
| 12aa98a83c | |||
| 7dbc71d664 | |||
| dae3687089 | |||
| 187194786f | |||
| 9de320421e | |||
| dd4e2aad91 | |||
| 7d10b78d50 | |||
| ddfb232590 | |||
| d7da3a7fc7 | |||
| 947efe7bd1 | |||
| c603531fd2 | |||
| a78126b1ba | |||
| 0ee23b8700 | |||
| 0952a0b71e | |||
| 4683274021 | |||
| ab187f70a1 | |||
| 172a002d41 | |||
| f6cb90ee66 | |||
| 2d65d74069 | |||
| d5eb60cb41 | |||
| 47f2da1d50 | |||
| 53fdeee208 | |||
| a2ba7a7f3c | |||
| 3eab6e8773 | |||
| 5a7ff285cd | |||
| 1d73957832 | |||
| c2eceb147d | |||
| 09d9c0ec74 | |||
| 2dcf47985e | |||
| 5585e4ec58 | |||
| ce2699455b | |||
| df3f04c10e | |||
| 7ff5703250 | |||
| a6c7cfdf66 | |||
| 7ecb126c8e | |||
| f3bb0b31ae | |||
| 8c249f6987 | |||
| 24e0d98425 | |||
| 7756747787 | |||
| e312e072e4 | |||
| 5631d09aa8 | |||
| c2f7622fbb | |||
| 8335c5dc4c | |||
| b71db65149 | |||
| fd62413935 | |||
| ea340065c6 | |||
| a022b4fed6 | |||
| 3dc5b509f6 | |||
| c9be447a38 | |||
| 62db686b42 | |||
| 57d395d6d7 | |||
| ac094965b5 | |||
| 435c004760 | |||
| 89a2132c61 | |||
| 3d01ca2c2a | |||
| 8124424e96 | |||
| a4da9b8f32 | |||
| 448cb9cee0 | |||
| 035499f255 |
@@ -28,7 +28,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- run: pip install bandit
|
- run: pip install bandit
|
||||||
- run: bandit -r decnet/ -ll -x decnet/services/registry.py
|
- run: bandit -r decnet/ -ll -x decnet/services/registry.py -x decnet/templates/
|
||||||
|
|
||||||
pip-audit:
|
pip-audit:
|
||||||
name: Dependency audit (pip-audit)
|
name: Dependency audit (pip-audit)
|
||||||
@@ -40,7 +40,7 @@ jobs:
|
|||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- run: pip install pip-audit
|
- run: pip install pip-audit
|
||||||
- run: pip install -e .[dev]
|
- run: pip install -e .[dev]
|
||||||
- run: pip-audit --skip-editable
|
- run: pip-audit --skip-editable --ignore-vuln CVE-2025-65896
|
||||||
|
|
||||||
test-standard:
|
test-standard:
|
||||||
name: Test (Standard)
|
name: Test (Standard)
|
||||||
@@ -48,7 +48,7 @@ jobs:
|
|||||||
needs: [lint, bandit, pip-audit]
|
needs: [lint, bandit, pip-audit]
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.11", "3.12"]
|
python-version: ["3.11"]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
@@ -64,6 +64,19 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.11"]
|
python-version: ["3.11"]
|
||||||
|
services:
|
||||||
|
mysql:
|
||||||
|
image: mysql:8.0
|
||||||
|
env:
|
||||||
|
MYSQL_ROOT_PASSWORD: root
|
||||||
|
MYSQL_DATABASE: decnet_test
|
||||||
|
ports:
|
||||||
|
- 3307:3306
|
||||||
|
options: >-
|
||||||
|
--health-cmd="mysqladmin ping -h 127.0.0.1"
|
||||||
|
--health-interval=10s
|
||||||
|
--health-timeout=5s
|
||||||
|
--health-retries=5
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
@@ -71,6 +84,12 @@ jobs:
|
|||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- run: pip install -e .[dev]
|
- run: pip install -e .[dev]
|
||||||
- run: pytest -m live
|
- run: pytest -m live
|
||||||
|
env:
|
||||||
|
DECNET_MYSQL_HOST: 127.0.0.1
|
||||||
|
DECNET_MYSQL_PORT: 3307
|
||||||
|
DECNET_MYSQL_USER: root
|
||||||
|
DECNET_MYSQL_PASSWORD: root
|
||||||
|
DECNET_MYSQL_DATABASE: decnet_test
|
||||||
|
|
||||||
test-fuzz:
|
test-fuzz:
|
||||||
name: Test (Fuzz)
|
name: Test (Fuzz)
|
||||||
@@ -86,6 +105,8 @@ jobs:
|
|||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- run: pip install -e .[dev]
|
- run: pip install -e .[dev]
|
||||||
- run: pytest -m fuzz
|
- run: pytest -m fuzz
|
||||||
|
env:
|
||||||
|
SCHEMATHESIS_CONFIG: schemathesis.ci.toml
|
||||||
|
|
||||||
merge-to-testing:
|
merge-to-testing:
|
||||||
name: Merge dev → testing
|
name: Merge dev → testing
|
||||||
|
|||||||
@@ -33,13 +33,13 @@ jobs:
|
|||||||
id: version
|
id: version
|
||||||
run: |
|
run: |
|
||||||
# Calculate next version (v0.x)
|
# Calculate next version (v0.x)
|
||||||
LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0")
|
LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
|
||||||
NEXT_VER=$(python3 -c "
|
NEXT_VER=$(python3 -c "
|
||||||
tag = '$LATEST_TAG'.lstrip('v')
|
tag = '$LATEST_TAG'.lstrip('v')
|
||||||
parts = tag.split('.')
|
parts = tag.split('.')
|
||||||
major = int(parts[0]) if parts[0] else 0
|
major = int(parts[0]) if parts[0] else 0
|
||||||
minor = int(parts[1]) if len(parts) > 1 else 0
|
minor = int(parts[1]) if len(parts) > 1 else 0
|
||||||
print(f'{major}.{minor + 1}')
|
print(f'{major}.{minor + 1}.0')
|
||||||
")
|
")
|
||||||
|
|
||||||
echo "Next version: $NEXT_VER (calculated from $LATEST_TAG)"
|
echo "Next version: $NEXT_VER (calculated from $LATEST_TAG)"
|
||||||
@@ -49,7 +49,11 @@ jobs:
|
|||||||
|
|
||||||
git add pyproject.toml
|
git add pyproject.toml
|
||||||
git commit -m "chore: auto-release v$NEXT_VER [skip ci]" || echo "No changes to commit"
|
git commit -m "chore: auto-release v$NEXT_VER [skip ci]" || echo "No changes to commit"
|
||||||
git tag -a "v$NEXT_VER" -m "Auto-release v$NEXT_VER"
|
CHANGELOG=$(git log ${LATEST_TAG}..HEAD --oneline --no-decorate --no-merges)
|
||||||
|
git tag -a "v$NEXT_VER" -m "Auto-release v$NEXT_VER
|
||||||
|
|
||||||
|
Changes since $LATEST_TAG:
|
||||||
|
$CHANGELOG"
|
||||||
git push origin main --follow-tags
|
git push origin main --follow-tags
|
||||||
|
|
||||||
echo "version=$NEXT_VER" >> $GITHUB_OUTPUT
|
echo "version=$NEXT_VER" >> $GITHUB_OUTPUT
|
||||||
@@ -111,13 +115,13 @@ jobs:
|
|||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
- name: Install Trivy
|
||||||
|
run: |
|
||||||
|
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
|
||||||
|
|
||||||
- name: Scan with Trivy
|
- name: Scan with Trivy
|
||||||
uses: aquasecurity/trivy-action@master
|
run: |
|
||||||
with:
|
trivy image --exit-code 1 --severity CRITICAL --ignore-unfixed decnet-${{ matrix.service }}:scan
|
||||||
image-ref: decnet-${{ matrix.service }}:scan
|
|
||||||
exit-code: "1"
|
|
||||||
severity: CRITICAL
|
|
||||||
ignore-unfixed: true
|
|
||||||
|
|
||||||
- name: Push image
|
- name: Push image
|
||||||
if: success()
|
if: success()
|
||||||
|
|||||||
11
.gitignore
vendored
11
.gitignore
vendored
@@ -1,6 +1,7 @@
|
|||||||
.venv/
|
.venv/
|
||||||
logs/
|
logs/
|
||||||
.claude/
|
.claude/*
|
||||||
|
CLAUDE.md
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
*.pyo
|
*.pyo
|
||||||
@@ -10,7 +11,6 @@ build/
|
|||||||
decnet-compose.yml
|
decnet-compose.yml
|
||||||
decnet-state.json
|
decnet-state.json
|
||||||
*.ini
|
*.ini
|
||||||
.env
|
|
||||||
decnet.log*
|
decnet.log*
|
||||||
*.loggy
|
*.loggy
|
||||||
*.nmap
|
*.nmap
|
||||||
@@ -18,8 +18,13 @@ linterfails.log
|
|||||||
webmail
|
webmail
|
||||||
windows1
|
windows1
|
||||||
*.db
|
*.db
|
||||||
|
*.db-shm
|
||||||
|
*.db-wal
|
||||||
|
decnet.*.log
|
||||||
decnet.json
|
decnet.json
|
||||||
.env
|
.env*
|
||||||
.env.local
|
.env.local
|
||||||
.coverage
|
.coverage
|
||||||
.hypothesis/
|
.hypothesis/
|
||||||
|
profiles/*
|
||||||
|
tests/test_decnet.db*
|
||||||
|
|||||||
58
CLAUDE.md
58
CLAUDE.md
@@ -1,58 +0,0 @@
|
|||||||
# CLAUDE.md
|
|
||||||
|
|
||||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
||||||
|
|
||||||
## Commands
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Install (dev)
|
|
||||||
pip install -e .
|
|
||||||
|
|
||||||
# List registered service plugins
|
|
||||||
decnet services
|
|
||||||
|
|
||||||
# Dry-run (generates compose, no containers)
|
|
||||||
decnet deploy --mode unihost --deckies 3 --randomize-services --dry-run
|
|
||||||
|
|
||||||
# Full deploy (requires root for MACVLAN)
|
|
||||||
sudo decnet deploy --mode unihost --deckies 5 --interface eth0 --randomize-services
|
|
||||||
sudo decnet deploy --mode unihost --deckies 3 --services ssh,smb --log-target 192.168.1.5:5140
|
|
||||||
|
|
||||||
# Status / teardown
|
|
||||||
decnet status
|
|
||||||
sudo decnet teardown --all
|
|
||||||
sudo decnet teardown --id decky-01
|
|
||||||
```
|
|
||||||
|
|
||||||
## Project Overview
|
|
||||||
|
|
||||||
DECNET is a honeypot/deception network framework. It deploys fake machines (called **deckies**) with realistic services (RDP, SMB, SSH, FTP, etc.) to lure and profile attackers. All attacker interactions are aggregated to an isolated logging network (ELK stack / SIEM).
|
|
||||||
|
|
||||||
## Deployment Models
|
|
||||||
|
|
||||||
**UNIHOST** — one real host spins up _n_ deckies via a container orchestrator. Simpler, single-machine deployment.
|
|
||||||
|
|
||||||
**SWARM (MULTIHOST)** — _n_ real hosts each running deckies. Orchestrated via Ansible/sshpass or similar tooling.
|
|
||||||
|
|
||||||
## Core Technology Choices
|
|
||||||
|
|
||||||
- **Containers**: Docker Compose is the starting point but other orchestration frameworks should be evaluated if they serve the project better. `debian:bookworm-slim` is the default base image; mixing in Ubuntu, CentOS, or other distros is encouraged to make the decoy network look heterogeneous.
|
|
||||||
- **Networking**: Deckies need to appear as real machines on the LAN (own MACs/IPs). MACVLAN and IPVLAN are candidates; the right driver depends on the host environment. WSL has known limitations — bare metal or a VM is preferred for testing.
|
|
||||||
- **Log pipeline**: Logstash → ELK stack → SIEM (isolated network, not reachable from decoy network)
|
|
||||||
|
|
||||||
## Architecture Constraints
|
|
||||||
|
|
||||||
- The decoy network must be reachable from the outside (attacker-facing).
|
|
||||||
- The logging/aggregation network must be isolated from the decoy network.
|
|
||||||
- A publicly accessible real server acts as the bridge between the two networks.
|
|
||||||
- Deckies should differ in exposed services and OS fingerprints to appear as a heterogeneous network.
|
|
||||||
- **IMPORTANT**: The system now strictly enforces dependency injection for storage. Do not import `SQLiteRepository` directly in new features; instead, use `get_repository()` from the factory or the FastAPI `get_repo` dependency.
|
|
||||||
|
|
||||||
## Development and testing
|
|
||||||
|
|
||||||
- For every new feature, pytests must me made.
|
|
||||||
- Pytest is the main testing framework in use.
|
|
||||||
- NEVER pass broken code to the user.
|
|
||||||
- Broken means: not running, not passing 100% tests, etc.
|
|
||||||
- After tests pass with 100%, always git commit your changes.
|
|
||||||
- NEVER add "Co-Authored-By" or any Claude attribution lines to git commit messages.
|
|
||||||
104
GEMINI.md
104
GEMINI.md
@@ -1,104 +0,0 @@
|
|||||||
# DECNET (Deception Network) Project Context
|
|
||||||
|
|
||||||
DECNET is a high-fidelity honeypot framework designed to deploy heterogeneous fleets of fake machines (called **deckies**) that appear as real hosts on a local network.
|
|
||||||
|
|
||||||
## Project Overview
|
|
||||||
|
|
||||||
- **Core Purpose:** To lure, profile, and log attacker interactions within a controlled, deceptive environment.
|
|
||||||
- **Key Technology:** Linux-native container networking (MACVLAN/IPvlan) combined with Docker to give each decoy its own MAC address, IP, and realistic TCP/IP stack behavior.
|
|
||||||
- **Main Components:**
|
|
||||||
- **Deckies:** Group of containers sharing a network namespace (one base container + multiple service containers).
|
|
||||||
- **Archetypes:** Pre-defined machine profiles (e.g., `windows-workstation`, `linux-server`) that bundle services and OS fingerprints.
|
|
||||||
- **Services:** Modular honeypot plugins (SSH, SMB, RDP, etc.) built as `BaseService` subclasses.
|
|
||||||
- **OS Fingerprinting:** Sysctl-based TCP/IP stack tuning to spoof OS detection (nmap).
|
|
||||||
- **Logging Pipeline:** RFC 5424 syslog forwarding to an isolated SIEM/ELK stack.
|
|
||||||
|
|
||||||
## Technical Stack
|
|
||||||
|
|
||||||
- **Language:** Python 3.11+
|
|
||||||
- **CLI Framework:** [Typer](https://typer.tiangolo.com/)
|
|
||||||
- **Data Validation:** [Pydantic v2](https://docs.pydantic.dev/)
|
|
||||||
- **Orchestration:** Docker Engine 24+ (via Docker SDK for Python)
|
|
||||||
- **Networking:** MACVLAN (default) or IPvlan L2 (for WiFi/restricted environments).
|
|
||||||
- **Testing:** Pytest (100% pass requirement).
|
|
||||||
- **Formatting/Linting:** Ruff, Bandit (SAST), pip-audit.
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
```text
|
|
||||||
Host NIC (eth0)
|
|
||||||
└── MACVLAN Bridge
|
|
||||||
├── Decky-01 (192.168.1.10) -> [Base] + [SSH] + [HTTP]
|
|
||||||
├── Decky-02 (192.168.1.11) -> [Base] + [SMB] + [RDP]
|
|
||||||
└── ...
|
|
||||||
```
|
|
||||||
|
|
||||||
- **Base Container:** Owns the IP/MAC, sets `sysctls` for OS spoofing, and runs `sleep infinity`.
|
|
||||||
- **Service Containers:** Use `network_mode: service:<base>` to share the identity and networking of the base container.
|
|
||||||
- **Isolation:** Decoy traffic is strictly separated from the logging network.
|
|
||||||
|
|
||||||
## Key Commands
|
|
||||||
|
|
||||||
### Development & Maintenance
|
|
||||||
- **Install (Dev):**
|
|
||||||
- `rm .venv -rf`
|
|
||||||
- `python3 -m venv .venv`
|
|
||||||
- `source .venv/bin/activate`
|
|
||||||
- `pip install -e .`
|
|
||||||
- **Run Tests:** `pytest` (Run before any commit)
|
|
||||||
- **Linting:** `ruff check .`
|
|
||||||
- **Security Scan:** `bandit -r decnet/`
|
|
||||||
- **Web Git:** git.resacachile.cl (Gitea)
|
|
||||||
|
|
||||||
### CLI Usage
|
|
||||||
- **List Services:** `decnet services`
|
|
||||||
- **List Archetypes:** `decnet archetypes`
|
|
||||||
- **Dry Run (Compose Gen):** `decnet deploy --deckies 3 --randomize-services --dry-run`
|
|
||||||
- **Deploy (Full):** `sudo .venv/bin/decnet deploy --interface eth0 --deckies 5 --randomize-services`
|
|
||||||
- **Status:** `decnet status`
|
|
||||||
- **Teardown:** `sudo .venv/bin/decnet teardown --all`
|
|
||||||
|
|
||||||
## Development Conventions
|
|
||||||
|
|
||||||
- **Code Style:**
|
|
||||||
- Strict adherence to Ruff/PEP8.
|
|
||||||
- **Always use typed variables**. If any non-types variables are found, they must be corrected.
|
|
||||||
- The correct way is `x: int = 1`, never `x : int = 1`.
|
|
||||||
- If assignment is present, always use a space between the type and the equal sign `x: int = 1`.
|
|
||||||
- **Never** use lowercase L (l), uppercase o (O) or uppercase i (i) in single-character names.
|
|
||||||
- **Internal vars are to be declared with an underscore** (_internal_variable_name).
|
|
||||||
- **Internal to internal vars are to be declared with double underscore** (__internal_variable_name).
|
|
||||||
- Always use snake_case for code.
|
|
||||||
- Always use PascalCase for classes and generics.
|
|
||||||
- **Testing:** New features MUST include a `pytest` case. 100% test pass rate is mandatory before merging.
|
|
||||||
- **Plugin System:**
|
|
||||||
- New services go in `decnet/services/<name>.py`.
|
|
||||||
- Subclass `decnet.services.base.BaseService`.
|
|
||||||
- The registry uses auto-discovery; no manual registration required.
|
|
||||||
- **Configuration:**
|
|
||||||
- Use Pydantic models in `decnet/config.py` for any new settings.
|
|
||||||
- INI file parsing is handled in `decnet/ini_loader.py`.
|
|
||||||
- **State Management:**
|
|
||||||
- Runtime state is persisted in `decnet-state.json`.
|
|
||||||
- Do not modify this file manually.
|
|
||||||
- **General Development Guidelines**:
|
|
||||||
- **Never** commit broken code, or before running `pytest`s or `bandit` at the project level.
|
|
||||||
- **No matter how small** the changes, they must be committed.
|
|
||||||
- **If new features are addedd** new tests must be added, too.
|
|
||||||
- **Never present broken code to the user**. Test, validate, then present.
|
|
||||||
- **Extensive testing** for every function must be created.
|
|
||||||
- **Always develop in the `dev` branch, never in `main`.**
|
|
||||||
- **Test in the `testing` branch.**
|
|
||||||
- **IMPORTANT**: The system now strictly enforces dependency injection for storage. Do not import `SQLiteRepository` directly in new features; instead, use `get_repository()` from the factory or the FastAPI `get_repo` dependency.
|
|
||||||
|
|
||||||
## Directory Structure
|
|
||||||
|
|
||||||
- `decnet/`: Main source code.
|
|
||||||
- `services/`: Honeypot service implementations.
|
|
||||||
- `logging/`: Syslog formatting and forwarding logic.
|
|
||||||
- `correlation/`: (In Progress) Logic for grouping attacker events.
|
|
||||||
- `templates/`: Dockerfiles and entrypoint scripts for services.
|
|
||||||
- `tests/`: Pytest suite.
|
|
||||||
- `pyproject.toml`: Dependency and entry point definitions.
|
|
||||||
- `CLAUDE.md`: Claude-specific environment guidance.
|
|
||||||
- `DEVELOPMENT.md`: Roadmap and TODOs.
|
|
||||||
116
README.md
116
README.md
@@ -180,7 +180,6 @@ Archetypes are pre-packaged machine identities. One slug sets services, preferre
|
|||||||
|
|
||||||
| Slug | Services | OS Fingerprint | Description |
|
| Slug | Services | OS Fingerprint | Description |
|
||||||
|---|---|---|---|
|
|---|---|---|---|
|
||||||
| `deaddeck` | ssh | linux | Initial machine to be exploited. Real SSH container. |
|
|
||||||
| `windows-workstation` | smb, rdp | windows | Corporate Windows desktop |
|
| `windows-workstation` | smb, rdp | windows | Corporate Windows desktop |
|
||||||
| `windows-server` | smb, rdp, ldap | windows | Windows domain member |
|
| `windows-server` | smb, rdp, ldap | windows | Windows domain member |
|
||||||
| `domain-controller` | ldap, smb, rdp, llmnr | windows | Active Directory DC |
|
| `domain-controller` | ldap, smb, rdp, llmnr | windows | Active Directory DC |
|
||||||
@@ -271,11 +270,6 @@ List live at any time with `decnet services`.
|
|||||||
Most services accept persona configuration to make honeypot responses more convincing. Config is passed via INI subsections (`[decky-name.service]`) or the `service_config` field in code.
|
Most services accept persona configuration to make honeypot responses more convincing. Config is passed via INI subsections (`[decky-name.service]`) or the `service_config` field in code.
|
||||||
|
|
||||||
```ini
|
```ini
|
||||||
[deaddeck-1]
|
|
||||||
amount=1
|
|
||||||
archetype=deaddeck
|
|
||||||
ssh.password=admin
|
|
||||||
|
|
||||||
[decky-webmail.http]
|
[decky-webmail.http]
|
||||||
server_header = Apache/2.4.54 (Debian)
|
server_header = Apache/2.4.54 (Debian)
|
||||||
fake_app = wordpress
|
fake_app = wordpress
|
||||||
@@ -514,6 +508,10 @@ DECNET_WEB_HOST=0.0.0.0
|
|||||||
DECNET_WEB_PORT=8080
|
DECNET_WEB_PORT=8080
|
||||||
DECNET_ADMIN_USER=admin
|
DECNET_ADMIN_USER=admin
|
||||||
DECNET_ADMIN_PASSWORD=admin
|
DECNET_ADMIN_PASSWORD=admin
|
||||||
|
|
||||||
|
# Database pool tuning (applies to both SQLite and MySQL)
|
||||||
|
DECNET_DB_POOL_SIZE=20 # base pool connections (default: 20)
|
||||||
|
DECNET_DB_MAX_OVERFLOW=40 # extra connections under burst (default: 40)
|
||||||
```
|
```
|
||||||
|
|
||||||
Copy `.env.example` to `.env.local` and modify it to suit your environment.
|
Copy `.env.example` to `.env.local` and modify it to suit your environment.
|
||||||
@@ -682,6 +680,112 @@ The test suite covers:
|
|||||||
|
|
||||||
Every new feature requires passing tests before merging.
|
Every new feature requires passing tests before merging.
|
||||||
|
|
||||||
|
### Stress Testing
|
||||||
|
|
||||||
|
A [Locust](https://locust.io)-based stress test suite lives in `tests/stress/`. It hammers every API endpoint with realistic traffic patterns to find throughput ceilings and latency degradation.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run via pytest (starts its own server)
|
||||||
|
pytest -m stress tests/stress/ -v -x -n0 -s
|
||||||
|
|
||||||
|
# Crank it up
|
||||||
|
STRESS_USERS=2000 STRESS_SPAWN_RATE=200 STRESS_DURATION=120 pytest -m stress tests/stress/ -v -x -n0 -s
|
||||||
|
|
||||||
|
# Standalone Locust web UI against a running server
|
||||||
|
locust -f tests/stress/locustfile.py --host http://localhost:8000
|
||||||
|
```
|
||||||
|
|
||||||
|
| Env var | Default | Description |
|
||||||
|
|---|---|---|
|
||||||
|
| `STRESS_USERS` | `500` | Total simulated users |
|
||||||
|
| `STRESS_SPAWN_RATE` | `50` | Users spawned per second |
|
||||||
|
| `STRESS_DURATION` | `60` | Test duration in seconds |
|
||||||
|
| `STRESS_WORKERS` | CPU count (max 4) | Uvicorn workers for the test server |
|
||||||
|
| `STRESS_MIN_RPS` | `500` | Minimum RPS to pass baseline test |
|
||||||
|
| `STRESS_MAX_P99_MS` | `200` | Maximum p99 latency (ms) to pass |
|
||||||
|
| `STRESS_SPIKE_USERS` | `1000` | Users for thundering herd test |
|
||||||
|
| `STRESS_SUSTAINED_USERS` | `200` | Users for sustained load test |
|
||||||
|
|
||||||
|
#### Measured baseline
|
||||||
|
|
||||||
|
Reference numbers from recent Locust runs against a MySQL backend
|
||||||
|
(asyncmy driver). All runs hold zero failures throughout.
|
||||||
|
|
||||||
|
**Single worker** (unless noted):
|
||||||
|
|
||||||
|
| Metric | 500u, tracing on | 1500u, tracing on | 1500u, tracing **off** | 1500u, tracing off, **pinned to 1 core** | 1500u, tracing off, **12 workers** |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| Requests served | 396,672 | 232,648 | 277,214 | 3,532 | 308,024 |
|
||||||
|
| Failures | 0 | 0 | 0 | 0 | 0 |
|
||||||
|
| Throughput (current RPS) | ~960 | ~880 | ~990 | ~46 | ~1,585 |
|
||||||
|
| Average latency | 465 ms | 1,774 ms | 1,489 ms | 21.7 s | 930 ms |
|
||||||
|
| Median (p50) | 100 ms | 690 ms | 340 ms | 270 ms | 700 ms |
|
||||||
|
| p95 | 1.9 s | 6.5 s | 5.7 s | 115 s | 2.7 s |
|
||||||
|
| p99 | 2.9 s | 9.5 s | 8.4 s | 122 s | 4.2 s |
|
||||||
|
| Max observed | 8.3 s | 24.4 s | 20.9 s | 124.5 s | 16.5 s |
|
||||||
|
|
||||||
|
Ramp is 15 users/s for the 500u column, 40 users/s otherwise.
|
||||||
|
|
||||||
|
Takeaways:
|
||||||
|
|
||||||
|
- **Tracing off**: at 1500 users, flipping `DECNET_TRACING=false`
|
||||||
|
halves p50 (690 → 340 ms) and pushes RPS from ~880 past the
|
||||||
|
500-user figure on a single worker.
|
||||||
|
- **12 workers**: RPS scales ~1.6× over a single worker (~990 →
|
||||||
|
~1585). Sublinear because the workload is DB-bound — MySQL and the
|
||||||
|
connection pool become the new ceiling, not Python. p99 drops from
|
||||||
|
8.4 s to 4.2 s.
|
||||||
|
- **Connection math**: `DECNET_DB_POOL_SIZE=20` × `DECNET_DB_MAX_OVERFLOW=40`
|
||||||
|
× 12 workers = 720 connections at peak. MySQL's default
|
||||||
|
`max_connections=151` needs bumping (we used 2000) before running
|
||||||
|
multi-worker load.
|
||||||
|
- **Single-core pinning**: ~46 RPS with p95 near two minutes. Interesting
|
||||||
|
as a "physics floor" datapoint — not a production config.
|
||||||
|
|
||||||
|
Top endpoints by volume: `/api/v1/attackers`, `/api/v1/deckies`,
|
||||||
|
`/api/v1/bounty`, `/api/v1/logs/histogram`, `/api/v1/config`,
|
||||||
|
`/api/v1/health`, `/api/v1/auth/login`, `/api/v1/logs`.
|
||||||
|
|
||||||
|
Notes on tuning:
|
||||||
|
|
||||||
|
- **Python 3.14 is currently a no-go for the API server.** Under heavy
|
||||||
|
concurrent async load the reworked 3.14 GC segfaults inside
|
||||||
|
`mark_all_reachable` (observed in `_PyGC_Collect` during pending-GC
|
||||||
|
on 3.14.3). Stick to Python 3.11–3.13 until upstream stabilises.
|
||||||
|
- Router-level TTL caches on hot count/stats endpoints (`/stats`,
|
||||||
|
`/logs` count, `/attackers` count, `/bounty`, `/logs/histogram`,
|
||||||
|
`/deckies`, `/config`) collapse concurrent duplicate work onto a
|
||||||
|
single DB hit per window — essential to reach this RPS on one worker.
|
||||||
|
- Turning off request tracing (`DECNET_TRACING=false`) is the next
|
||||||
|
free headroom: tracing was still on during the run above.
|
||||||
|
- On SQLite, `DECNET_DB_POOL_PRE_PING=false` skips the per-checkout
|
||||||
|
`SELECT 1`. On MySQL, keep it `true` — network disconnects are real.
|
||||||
|
|
||||||
|
#### System tuning: open file limit
|
||||||
|
|
||||||
|
Under heavy load (500+ concurrent users), the server will exhaust the default Linux open file limit (`ulimit -n`), causing `OSError: [Errno 24] Too many open files`. Most distros default to **1024**, which is far too low for stress testing or production use.
|
||||||
|
|
||||||
|
**Before running stress tests:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check current limit
|
||||||
|
ulimit -n
|
||||||
|
|
||||||
|
# Bump for this shell session
|
||||||
|
ulimit -n 65536
|
||||||
|
```
|
||||||
|
|
||||||
|
**Permanent fix** — add to `/etc/security/limits.conf`:
|
||||||
|
|
||||||
|
```
|
||||||
|
* soft nofile 65536
|
||||||
|
* hard nofile 65536
|
||||||
|
```
|
||||||
|
|
||||||
|
Or for systemd-managed services, add `LimitNOFILE=65536` to the unit file.
|
||||||
|
|
||||||
|
> This applies to production deployments too — any server handling hundreds of concurrent connections needs a raised file descriptor limit.
|
||||||
|
|
||||||
# AI Disclosure
|
# AI Disclosure
|
||||||
|
|
||||||
This project has been made with lots, and I mean lots of help from AIs. While most of the design was made by me, most of the coding was done by AI models.
|
This project has been made with lots, and I mean lots of help from AIs. While most of the design was made by me, most of the coding was done by AI models.
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
Collector starting → /home/anti/Tools/DECNET/decnet.log
|
|
||||||
64
decnet.ini.example
Normal file
64
decnet.ini.example
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
; /etc/decnet/decnet.ini — DECNET host configuration
|
||||||
|
;
|
||||||
|
; Copy to /etc/decnet/decnet.ini and edit. Values here seed os.environ at
|
||||||
|
; CLI startup via setdefault() — real env vars still win, so you can
|
||||||
|
; override any value on the shell without editing this file.
|
||||||
|
;
|
||||||
|
; A missing file is fine; every daemon has sensible defaults. The main
|
||||||
|
; reason to use this file is to skip typing the same flags on every
|
||||||
|
; `decnet` invocation and to pin a host's role via `mode`.
|
||||||
|
|
||||||
|
[decnet]
|
||||||
|
; mode = agent | master
|
||||||
|
; agent — worker host (runs `decnet agent`, `decnet forwarder`, `decnet updater`).
|
||||||
|
; Master-only commands (api, swarmctl, swarm, deploy, teardown, ...)
|
||||||
|
; are hidden from `decnet --help` and refuse to run.
|
||||||
|
; master — central server (runs `decnet api`, `decnet web`, `decnet swarmctl`,
|
||||||
|
; `decnet listener`). All commands visible.
|
||||||
|
mode = agent
|
||||||
|
|
||||||
|
; disallow-master = true (default when mode=agent)
|
||||||
|
; Set to false for hybrid dev hosts that legitimately run both roles.
|
||||||
|
disallow-master = true
|
||||||
|
|
||||||
|
; log-directory — root for DECNET's per-component logs. Systemd units set
|
||||||
|
; DECNET_SYSTEM_LOGS=<log-directory>/decnet.<component>.log so agent, forwarder,
|
||||||
|
; and engine each get their own file. The forwarder tails decnet.log.
|
||||||
|
log-directory = /var/log/decnet
|
||||||
|
|
||||||
|
|
||||||
|
; ─── Agent-only settings (read when mode=agent) ───────────────────────────
|
||||||
|
[agent]
|
||||||
|
; Where the master's syslog-TLS listener lives. DECNET_SWARM_MASTER_HOST.
|
||||||
|
master-host = 192.168.1.50
|
||||||
|
; Master listener port (RFC 5425 default 6514). DECNET_SWARM_SYSLOG_PORT.
|
||||||
|
swarm-syslog-port = 6514
|
||||||
|
; Bind address/port for this worker's agent API (mTLS).
|
||||||
|
agent-port = 8765
|
||||||
|
; Cert bundle dir — must contain ca.crt, worker.crt, worker.key from enroll.
|
||||||
|
; DECNET_AGENT_DIR — honored by the forwarder child as well.
|
||||||
|
agent-dir = /home/anti/.decnet/agent
|
||||||
|
; Updater cert bundle (required for `decnet updater`).
|
||||||
|
updater-dir = /home/anti/.decnet/updater
|
||||||
|
|
||||||
|
|
||||||
|
; ─── Master-only settings (read when mode=master) ─────────────────────────
|
||||||
|
[master]
|
||||||
|
; Main API (REST for the React dashboard). DECNET_API_HOST / _PORT.
|
||||||
|
api-host = 0.0.0.0
|
||||||
|
api-port = 8000
|
||||||
|
; React dev-server dashboard (`decnet web`). DECNET_WEB_HOST / _PORT.
|
||||||
|
web-host = 0.0.0.0
|
||||||
|
web-port = 8080
|
||||||
|
; Swarm controller (master-internal). DECNET_SWARMCTL_HOST isn't exposed
|
||||||
|
; under that name today — this block is the forward-compatible spelling.
|
||||||
|
; swarmctl-host = 127.0.0.1
|
||||||
|
; swarmctl-port = 8770
|
||||||
|
; Syslog-over-TLS listener bind address and port. DECNET_LISTENER_HOST and
|
||||||
|
; DECNET_SWARM_SYSLOG_PORT. The listener is auto-spawned by `decnet swarmctl`.
|
||||||
|
listener-host = 0.0.0.0
|
||||||
|
swarm-syslog-port = 6514
|
||||||
|
; Master CA dir (for enroll / swarm cert issuance).
|
||||||
|
; ca-dir = /home/anti/.decnet/ca
|
||||||
|
; JWT secret for the web API. MUST be set; 32+ bytes. Keep out of git.
|
||||||
|
; jwt-secret = REPLACE_ME_WITH_A_32_BYTE_SECRET
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
"""DECNET — honeypot deception-network framework.
|
||||||
|
|
||||||
|
This __init__ runs once, on the first `import decnet.*`. It seeds
|
||||||
|
os.environ from /etc/decnet/decnet.ini (if present) so that later
|
||||||
|
module-level reads in decnet.env pick up the INI values as if they had
|
||||||
|
been exported by the shell. Real env vars always win via setdefault().
|
||||||
|
|
||||||
|
Kept minimal on purpose — any heavier work belongs in a submodule.
|
||||||
|
"""
|
||||||
|
from decnet.config_ini import load_ini_config as _load_ini_config
|
||||||
|
|
||||||
|
_load_ini_config()
|
||||||
|
|||||||
7
decnet/agent/__init__.py
Normal file
7
decnet/agent/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
"""DECNET worker agent — runs on every SWARM worker host.
|
||||||
|
|
||||||
|
Exposes an mTLS-protected FastAPI service the master's SWARM controller
|
||||||
|
calls to deploy, mutate, and tear down deckies locally. The agent reuses
|
||||||
|
the existing `decnet.engine.deployer` code path unchanged, so a worker runs
|
||||||
|
deckies the same way `decnet deploy --mode unihost` does today.
|
||||||
|
"""
|
||||||
144
decnet/agent/app.py
Normal file
144
decnet/agent/app.py
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
"""Worker-side FastAPI app.
|
||||||
|
|
||||||
|
Protected by mTLS at the ASGI/uvicorn transport layer: uvicorn is started
|
||||||
|
with ``--ssl-ca-certs`` + ``--ssl-cert-reqs 2`` (CERT_REQUIRED), so any
|
||||||
|
client that cannot prove a cert signed by the DECNET CA is rejected before
|
||||||
|
reaching a handler. Once past the TLS handshake, all peers are trusted
|
||||||
|
equally (the only entity holding a CA-signed cert is the master
|
||||||
|
controller).
|
||||||
|
|
||||||
|
Endpoints mirror the existing unihost CLI verbs:
|
||||||
|
|
||||||
|
* ``POST /deploy`` — body: serialized ``DecnetConfig``
|
||||||
|
* ``POST /teardown`` — body: optional ``{"decky_id": "..."}``
|
||||||
|
* ``POST /mutate`` — body: ``{"decky_id": "...", "services": [...]}``
|
||||||
|
* ``GET /status`` — deployment snapshot
|
||||||
|
* ``GET /health`` — liveness probe, does NOT require mTLS? No — mTLS
|
||||||
|
still required; master pings it with its cert.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from decnet.agent import executor as _exec
|
||||||
|
from decnet.agent import heartbeat as _heartbeat
|
||||||
|
from decnet.config import DecnetConfig
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
|
||||||
|
log = get_logger("agent.app")
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def _lifespan(app: FastAPI):
|
||||||
|
# Best-effort: if identity/bundle plumbing isn't configured (e.g. dev
|
||||||
|
# runs or non-enrolled hosts), heartbeat.start() is a silent no-op.
|
||||||
|
_heartbeat.start()
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
await _heartbeat.stop()
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="DECNET SWARM Agent",
|
||||||
|
version="0.1.0",
|
||||||
|
docs_url=None, # no interactive docs on worker — narrow attack surface
|
||||||
|
redoc_url=None,
|
||||||
|
openapi_url=None,
|
||||||
|
lifespan=_lifespan,
|
||||||
|
responses={
|
||||||
|
400: {"description": "Malformed request body"},
|
||||||
|
500: {"description": "Executor error"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ schemas
|
||||||
|
|
||||||
|
class DeployRequest(BaseModel):
|
||||||
|
config: DecnetConfig = Field(..., description="Full DecnetConfig to materialise on this worker")
|
||||||
|
dry_run: bool = False
|
||||||
|
no_cache: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class TeardownRequest(BaseModel):
|
||||||
|
decky_id: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class MutateRequest(BaseModel):
|
||||||
|
decky_id: str
|
||||||
|
services: list[str]
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ routes
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health() -> dict[str, str]:
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/status")
|
||||||
|
async def status() -> dict:
|
||||||
|
return await _exec.status()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/deploy",
|
||||||
|
responses={500: {"description": "Deployer raised an exception materialising the config"}},
|
||||||
|
)
|
||||||
|
async def deploy(req: DeployRequest) -> dict:
|
||||||
|
try:
|
||||||
|
await _exec.deploy(req.config, dry_run=req.dry_run, no_cache=req.no_cache)
|
||||||
|
except Exception as exc:
|
||||||
|
log.exception("agent.deploy failed")
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||||
|
return {"status": "deployed", "deckies": len(req.config.deckies)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/teardown",
|
||||||
|
responses={500: {"description": "Teardown raised an exception"}},
|
||||||
|
)
|
||||||
|
async def teardown(req: TeardownRequest) -> dict:
|
||||||
|
try:
|
||||||
|
await _exec.teardown(req.decky_id)
|
||||||
|
except Exception as exc:
|
||||||
|
log.exception("agent.teardown failed")
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||||
|
return {"status": "torn_down", "decky_id": req.decky_id}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/self-destruct",
|
||||||
|
responses={500: {"description": "Reaper could not be scheduled"}},
|
||||||
|
)
|
||||||
|
async def self_destruct() -> dict:
|
||||||
|
"""Stop all DECNET services on this worker and delete the install
|
||||||
|
footprint. Called by the master during decommission. Logs under
|
||||||
|
/var/log/decnet* are preserved. Fire-and-forget — returns 202 before
|
||||||
|
the reaper starts deleting files."""
|
||||||
|
try:
|
||||||
|
await _exec.self_destruct()
|
||||||
|
except Exception as exc:
|
||||||
|
log.exception("agent.self_destruct failed")
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||||
|
return {"status": "self_destruct_scheduled"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/mutate",
|
||||||
|
responses={501: {"description": "Worker-side mutate not yet implemented"}},
|
||||||
|
)
|
||||||
|
async def mutate(req: MutateRequest) -> dict:
|
||||||
|
# TODO: implement worker-side mutate. Currently the master performs
|
||||||
|
# mutation by re-sending a full /deploy with the updated DecnetConfig;
|
||||||
|
# this avoids duplicating mutation logic on the worker for v1. When
|
||||||
|
# ready, replace the 501 with a real redeploy-of-a-single-decky path.
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=501,
|
||||||
|
detail="Per-decky mutate is performed via /deploy with updated services",
|
||||||
|
)
|
||||||
223
decnet/agent/executor.py
Normal file
223
decnet/agent/executor.py
Normal file
@@ -0,0 +1,223 @@
|
|||||||
|
"""Thin adapter between the agent's HTTP endpoints and the existing
|
||||||
|
``decnet.engine.deployer`` code path.
|
||||||
|
|
||||||
|
Kept deliberately small: the agent does not re-implement deployment logic,
|
||||||
|
it only translates a master RPC into the same function calls the unihost
|
||||||
|
CLI already uses. Everything runs in a worker thread (the deployer is
|
||||||
|
blocking) so the FastAPI event loop stays responsive.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from ipaddress import IPv4Network
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from decnet.engine import deployer as _deployer
|
||||||
|
from decnet.config import DecnetConfig, load_state, clear_state
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.network import (
|
||||||
|
allocate_ips,
|
||||||
|
detect_interface,
|
||||||
|
detect_subnet,
|
||||||
|
get_host_ip,
|
||||||
|
)
|
||||||
|
|
||||||
|
log = get_logger("agent.executor")
|
||||||
|
|
||||||
|
|
||||||
|
def _relocalize(config: DecnetConfig) -> DecnetConfig:
|
||||||
|
"""Rewrite a master-built config to the worker's local network reality.
|
||||||
|
|
||||||
|
The master populates ``interface``/``subnet``/``gateway`` from its own
|
||||||
|
box before dispatching, which blows up the deployer on any worker whose
|
||||||
|
NIC name differs (common in heterogeneous fleets — master on ``wlp6s0``,
|
||||||
|
worker on ``enp0s3``). We always re-detect locally; if the worker sits
|
||||||
|
on a different subnet than the master, decky IPs are re-allocated from
|
||||||
|
the worker's subnet so they're actually reachable.
|
||||||
|
"""
|
||||||
|
local_iface = detect_interface()
|
||||||
|
local_subnet, local_gateway = detect_subnet(local_iface)
|
||||||
|
local_host_ip = get_host_ip(local_iface)
|
||||||
|
|
||||||
|
updates: dict[str, Any] = {
|
||||||
|
"interface": local_iface,
|
||||||
|
"subnet": local_subnet,
|
||||||
|
"gateway": local_gateway,
|
||||||
|
}
|
||||||
|
|
||||||
|
master_net = IPv4Network(config.subnet, strict=False) if config.subnet else None
|
||||||
|
local_net = IPv4Network(local_subnet, strict=False)
|
||||||
|
if master_net is None or master_net != local_net:
|
||||||
|
log.info(
|
||||||
|
"agent.deploy subnet mismatch master=%s local=%s — re-allocating decky IPs",
|
||||||
|
config.subnet, local_subnet,
|
||||||
|
)
|
||||||
|
fresh_ips = allocate_ips(
|
||||||
|
subnet=local_subnet,
|
||||||
|
gateway=local_gateway,
|
||||||
|
host_ip=local_host_ip,
|
||||||
|
count=len(config.deckies),
|
||||||
|
)
|
||||||
|
new_deckies = [d.model_copy(update={"ip": ip}) for d, ip in zip(config.deckies, fresh_ips)]
|
||||||
|
updates["deckies"] = new_deckies
|
||||||
|
|
||||||
|
return config.model_copy(update=updates)
|
||||||
|
|
||||||
|
|
||||||
|
async def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False) -> None:
|
||||||
|
"""Run the blocking deployer off-loop. The deployer itself calls
|
||||||
|
save_state() internally once the compose file is materialised."""
|
||||||
|
log.info(
|
||||||
|
"agent.deploy mode=%s deckies=%d interface=%s (incoming)",
|
||||||
|
config.mode, len(config.deckies), config.interface,
|
||||||
|
)
|
||||||
|
if config.mode == "swarm":
|
||||||
|
config = _relocalize(config)
|
||||||
|
log.info(
|
||||||
|
"agent.deploy relocalized interface=%s subnet=%s gateway=%s",
|
||||||
|
config.interface, config.subnet, config.gateway,
|
||||||
|
)
|
||||||
|
await asyncio.to_thread(_deployer.deploy, config, dry_run, no_cache, False)
|
||||||
|
|
||||||
|
|
||||||
|
async def teardown(decky_id: str | None = None) -> None:
|
||||||
|
log.info("agent.teardown decky_id=%s", decky_id)
|
||||||
|
await asyncio.to_thread(_deployer.teardown, decky_id)
|
||||||
|
if decky_id is None:
|
||||||
|
await asyncio.to_thread(clear_state)
|
||||||
|
|
||||||
|
|
||||||
|
def _decky_runtime_states(config: DecnetConfig) -> dict[str, dict[str, Any]]:
|
||||||
|
"""Map decky_name → {"running": bool, "services": {svc: container_state}}.
|
||||||
|
|
||||||
|
Queried so the master can tell, after a partial-failure deploy, which
|
||||||
|
deckies actually came up instead of tainting the whole shard as failed.
|
||||||
|
Best-effort: a docker error returns an empty map, not an exception.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import docker # local import — agent-only path
|
||||||
|
client = docker.from_env()
|
||||||
|
live = {c.name: c.status for c in client.containers.list(all=True, ignore_removed=True)}
|
||||||
|
except Exception: # pragma: no cover — defensive
|
||||||
|
log.exception("_decky_runtime_states: docker query failed")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
out: dict[str, dict[str, Any]] = {}
|
||||||
|
for d in config.deckies:
|
||||||
|
svc_states = {
|
||||||
|
svc: live.get(f"{d.name}-{svc.replace('_', '-')}", "absent")
|
||||||
|
for svc in d.services
|
||||||
|
}
|
||||||
|
out[d.name] = {
|
||||||
|
"running": bool(svc_states) and all(s == "running" for s in svc_states.values()),
|
||||||
|
"services": svc_states,
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
_REAPER_SCRIPT = r"""#!/bin/bash
|
||||||
|
# DECNET agent self-destruct reaper.
|
||||||
|
# Runs detached from the agent process so it survives the agent's death.
|
||||||
|
# Waits briefly for the HTTP response to drain, then stops services,
|
||||||
|
# wipes install paths, and preserves logs.
|
||||||
|
set +e
|
||||||
|
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
# Stop decky containers started by the local deployer (best-effort).
|
||||||
|
if command -v docker >/dev/null 2>&1; then
|
||||||
|
docker ps -q --filter "label=com.docker.compose.project=decnet" | xargs -r docker stop
|
||||||
|
docker ps -aq --filter "label=com.docker.compose.project=decnet" | xargs -r docker rm -f
|
||||||
|
docker network rm decnet_lan 2>/dev/null
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Stop+disable every systemd unit the installer may have dropped.
|
||||||
|
for unit in decnet-agent decnet-engine decnet-collector decnet-forwarder decnet-prober decnet-sniffer decnet-updater; do
|
||||||
|
systemctl stop "$unit" 2>/dev/null
|
||||||
|
systemctl disable "$unit" 2>/dev/null
|
||||||
|
done
|
||||||
|
|
||||||
|
# Nuke install paths. Logs under /var/log/decnet* are intentionally
|
||||||
|
# preserved — the operator typically wants them for forensic review.
|
||||||
|
rm -rf /opt/decnet* /var/lib/decnet/* /usr/local/bin/decnet* /etc/decnet
|
||||||
|
rm -f /etc/systemd/system/decnet-*.service /etc/systemd/system/decnet-*.timer
|
||||||
|
|
||||||
|
systemctl daemon-reload 2>/dev/null
|
||||||
|
rm -f "$0"
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def self_destruct() -> None:
|
||||||
|
"""Tear down deckies, then spawn a detached reaper that wipes the
|
||||||
|
install footprint. Returns immediately so the HTTP response can drain
|
||||||
|
before the reaper starts deleting files out from under the agent."""
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
# Best-effort teardown first — the reaper also runs docker stop, but
|
||||||
|
# going through the deployer gives the host-macvlan/ipvlan helper a
|
||||||
|
# chance to clean up routes cleanly.
|
||||||
|
try:
|
||||||
|
await asyncio.to_thread(_deployer.teardown, None)
|
||||||
|
await asyncio.to_thread(clear_state)
|
||||||
|
except Exception:
|
||||||
|
log.exception("self_destruct: pre-reap teardown failed — reaper will force-stop containers")
|
||||||
|
|
||||||
|
# Reaper lives under /tmp so it survives rm -rf /opt/decnet*.
|
||||||
|
fd, path = tempfile.mkstemp(prefix="decnet-reaper-", suffix=".sh", dir="/tmp") # nosec B108 — reaper must outlive /opt/decnet removal
|
||||||
|
try:
|
||||||
|
os.write(fd, _REAPER_SCRIPT.encode())
|
||||||
|
finally:
|
||||||
|
os.close(fd)
|
||||||
|
os.chmod(path, 0o700) # nosec B103 — root-owned reaper, needs exec
|
||||||
|
|
||||||
|
# The reaper MUST run outside decnet-agent.service's cgroup — otherwise
|
||||||
|
# `systemctl stop decnet-agent` SIGTERMs the whole cgroup (reaper included)
|
||||||
|
# before rm -rf completes. `start_new_session=True` gets us a fresh POSIX
|
||||||
|
# session but does NOT escape the systemd cgroup. So we prefer
|
||||||
|
# `systemd-run --scope` (launches the command in a transient scope
|
||||||
|
# detached from the caller's service), falling back to a bare Popen if
|
||||||
|
# systemd-run is unavailable (non-systemd host / container).
|
||||||
|
systemd_run = shutil.which("systemd-run")
|
||||||
|
if systemd_run:
|
||||||
|
argv = [
|
||||||
|
systemd_run,
|
||||||
|
"--collect",
|
||||||
|
"--unit", f"decnet-reaper-{os.getpid()}",
|
||||||
|
"--description", "DECNET agent self-destruct reaper",
|
||||||
|
"/bin/bash", path,
|
||||||
|
]
|
||||||
|
spawn_kwargs = {"start_new_session": True}
|
||||||
|
else:
|
||||||
|
argv = ["/bin/bash", path]
|
||||||
|
spawn_kwargs = {"start_new_session": True}
|
||||||
|
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
argv,
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
close_fds=True,
|
||||||
|
**spawn_kwargs,
|
||||||
|
)
|
||||||
|
log.warning(
|
||||||
|
"self_destruct: reaper spawned path=%s via=%s — agent will die in ~3s",
|
||||||
|
path, "systemd-run" if systemd_run else "popen",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def status() -> dict[str, Any]:
|
||||||
|
state = await asyncio.to_thread(load_state)
|
||||||
|
if state is None:
|
||||||
|
return {"deployed": False, "deckies": []}
|
||||||
|
config, _compose_path = state
|
||||||
|
runtime = await asyncio.to_thread(_decky_runtime_states, config)
|
||||||
|
return {
|
||||||
|
"deployed": True,
|
||||||
|
"mode": config.mode,
|
||||||
|
"compose_path": str(_compose_path),
|
||||||
|
"deckies": [d.model_dump() for d in config.deckies],
|
||||||
|
"runtime": runtime,
|
||||||
|
}
|
||||||
134
decnet/agent/heartbeat.py
Normal file
134
decnet/agent/heartbeat.py
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
"""Agent → master liveness heartbeat loop.
|
||||||
|
|
||||||
|
Every ``INTERVAL_S`` seconds the worker posts ``executor.status()`` to
|
||||||
|
``POST <master>/swarm/heartbeat`` over mTLS. The master pins the
|
||||||
|
presented client cert's SHA-256 against the ``SwarmHost`` row for the
|
||||||
|
claimed ``host_uuid``; a match refreshes ``last_heartbeat`` + each
|
||||||
|
``DeckyShard``'s snapshot + runtime state.
|
||||||
|
|
||||||
|
Identity comes from ``/etc/decnet/decnet.ini`` (seeded by the enroll
|
||||||
|
bundle) — specifically ``DECNET_HOST_UUID`` and ``DECNET_MASTER_HOST``.
|
||||||
|
The worker's existing ``~/.decnet/agent/`` bundle (or
|
||||||
|
``/etc/decnet/agent/``) provides the mTLS client cert.
|
||||||
|
|
||||||
|
Started/stopped via the agent FastAPI app's lifespan. If identity
|
||||||
|
plumbing is missing (pre-enrollment dev runs) the loop logs at DEBUG and
|
||||||
|
declines to start — callers don't have to guard it.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import pathlib
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from decnet.agent import executor as _exec
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm import pki
|
||||||
|
from decnet.swarm.log_forwarder import build_worker_ssl_context
|
||||||
|
|
||||||
|
log = get_logger("agent.heartbeat")
|
||||||
|
|
||||||
|
INTERVAL_S = 30.0
|
||||||
|
_TIMEOUT = httpx.Timeout(connect=5.0, read=10.0, write=5.0, pool=5.0)
|
||||||
|
|
||||||
|
_task: Optional[asyncio.Task] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_agent_dir() -> pathlib.Path:
|
||||||
|
"""Match the agent-dir resolution order used by the agent server:
|
||||||
|
DECNET_AGENT_DIR env, else /etc/decnet/agent (production install),
|
||||||
|
else ~/.decnet/agent (dev)."""
|
||||||
|
import os
|
||||||
|
env = os.environ.get("DECNET_AGENT_DIR")
|
||||||
|
if env:
|
||||||
|
return pathlib.Path(env)
|
||||||
|
system = pathlib.Path("/etc/decnet/agent")
|
||||||
|
if system.exists():
|
||||||
|
return system
|
||||||
|
return pki.DEFAULT_AGENT_DIR
|
||||||
|
|
||||||
|
|
||||||
|
async def _tick(client: httpx.AsyncClient, url: str, host_uuid: str, agent_version: str) -> None:
|
||||||
|
snap = await _exec.status()
|
||||||
|
resp = await client.post(
|
||||||
|
url,
|
||||||
|
json={
|
||||||
|
"host_uuid": host_uuid,
|
||||||
|
"agent_version": agent_version,
|
||||||
|
"status": snap,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
# 403 / 404 are terminal-ish — we still keep looping because an
|
||||||
|
# operator may re-enrol the host mid-session, but we log loudly so
|
||||||
|
# prod ops can spot cert-pinning drift.
|
||||||
|
if resp.status_code == 204:
|
||||||
|
return
|
||||||
|
log.warning(
|
||||||
|
"heartbeat rejected status=%d body=%s",
|
||||||
|
resp.status_code, resp.text[:200],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _loop(url: str, host_uuid: str, agent_version: str, ssl_ctx) -> None:
|
||||||
|
log.info("heartbeat loop starting url=%s host_uuid=%s interval=%ss",
|
||||||
|
url, host_uuid, INTERVAL_S)
|
||||||
|
async with httpx.AsyncClient(verify=ssl_ctx, timeout=_TIMEOUT) as client:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await _tick(client, url, host_uuid, agent_version)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
log.exception("heartbeat tick failed — will retry in %ss", INTERVAL_S)
|
||||||
|
await asyncio.sleep(INTERVAL_S)
|
||||||
|
|
||||||
|
|
||||||
|
def start() -> Optional[asyncio.Task]:
|
||||||
|
"""Kick off the background heartbeat task. No-op if identity is
|
||||||
|
unconfigured (dev mode) — the caller doesn't need to check."""
|
||||||
|
global _task
|
||||||
|
from decnet.env import (
|
||||||
|
DECNET_HOST_UUID,
|
||||||
|
DECNET_MASTER_HOST,
|
||||||
|
DECNET_SWARMCTL_PORT,
|
||||||
|
)
|
||||||
|
|
||||||
|
if _task is not None and not _task.done():
|
||||||
|
return _task
|
||||||
|
if not DECNET_HOST_UUID or not DECNET_MASTER_HOST:
|
||||||
|
log.debug("heartbeat not starting — DECNET_HOST_UUID or DECNET_MASTER_HOST unset")
|
||||||
|
return None
|
||||||
|
|
||||||
|
agent_dir = _resolve_agent_dir()
|
||||||
|
try:
|
||||||
|
ssl_ctx = build_worker_ssl_context(agent_dir)
|
||||||
|
except Exception:
|
||||||
|
log.exception("heartbeat not starting — worker SSL context unavailable at %s", agent_dir)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from decnet import __version__ as _v
|
||||||
|
agent_version = _v
|
||||||
|
except Exception:
|
||||||
|
agent_version = "unknown"
|
||||||
|
|
||||||
|
url = f"https://{DECNET_MASTER_HOST}:{DECNET_SWARMCTL_PORT}/swarm/heartbeat"
|
||||||
|
_task = asyncio.create_task(
|
||||||
|
_loop(url, DECNET_HOST_UUID, agent_version, ssl_ctx),
|
||||||
|
name="agent-heartbeat",
|
||||||
|
)
|
||||||
|
return _task
|
||||||
|
|
||||||
|
|
||||||
|
async def stop() -> None:
|
||||||
|
global _task
|
||||||
|
if _task is None:
|
||||||
|
return
|
||||||
|
_task.cancel()
|
||||||
|
try:
|
||||||
|
await _task
|
||||||
|
except (asyncio.CancelledError, Exception):
|
||||||
|
pass
|
||||||
|
_task = None
|
||||||
70
decnet/agent/server.py
Normal file
70
decnet/agent/server.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""Worker-agent uvicorn launcher.
|
||||||
|
|
||||||
|
Starts ``decnet.agent.app:app`` over HTTPS with mTLS enforcement. The
|
||||||
|
worker must already have a bundle in ``~/.decnet/agent/`` (delivered by
|
||||||
|
``decnet swarm enroll`` from the master); if it does not, we refuse to
|
||||||
|
start — unauthenticated agents are not a supported mode.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import signal
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm import pki
|
||||||
|
|
||||||
|
log = get_logger("agent.server")
|
||||||
|
|
||||||
|
|
||||||
|
def run(host: str, port: int, agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR) -> int:
|
||||||
|
bundle = pki.load_worker_bundle(agent_dir)
|
||||||
|
if bundle is None:
|
||||||
|
print(
|
||||||
|
f"[agent] No cert bundle at {agent_dir}. "
|
||||||
|
f"Run `decnet swarm enroll` from the master first.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
keyfile = agent_dir / "worker.key"
|
||||||
|
certfile = agent_dir / "worker.crt"
|
||||||
|
cafile = agent_dir / "ca.crt"
|
||||||
|
|
||||||
|
cmd = [
|
||||||
|
sys.executable,
|
||||||
|
"-m",
|
||||||
|
"uvicorn",
|
||||||
|
"decnet.agent.app:app",
|
||||||
|
"--host",
|
||||||
|
host,
|
||||||
|
"--port",
|
||||||
|
str(port),
|
||||||
|
"--ssl-keyfile",
|
||||||
|
str(keyfile),
|
||||||
|
"--ssl-certfile",
|
||||||
|
str(certfile),
|
||||||
|
"--ssl-ca-certs",
|
||||||
|
str(cafile),
|
||||||
|
# 2 == ssl.CERT_REQUIRED — clients MUST present a CA-signed cert.
|
||||||
|
"--ssl-cert-reqs",
|
||||||
|
"2",
|
||||||
|
]
|
||||||
|
log.info("agent starting host=%s port=%d bundle=%s", host, port, agent_dir)
|
||||||
|
# Own process group for clean Ctrl+C / SIGTERM propagation to uvicorn
|
||||||
|
# workers (same pattern as `decnet api`).
|
||||||
|
proc = subprocess.Popen(cmd, start_new_session=True) # nosec B603
|
||||||
|
try:
|
||||||
|
return proc.wait()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
try:
|
||||||
|
os.killpg(proc.pid, signal.SIGTERM)
|
||||||
|
try:
|
||||||
|
return proc.wait(timeout=10)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
os.killpg(proc.pid, signal.SIGKILL)
|
||||||
|
return proc.wait()
|
||||||
|
except ProcessLookupError:
|
||||||
|
return 0
|
||||||
478
decnet/cli.py
478
decnet/cli.py
@@ -1,478 +0,0 @@
|
|||||||
"""
|
|
||||||
DECNET CLI — entry point for all commands.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
decnet deploy --mode unihost --deckies 5 --randomize-services
|
|
||||||
decnet status
|
|
||||||
decnet teardown [--all | --id decky-01]
|
|
||||||
decnet services
|
|
||||||
"""
|
|
||||||
|
|
||||||
import signal
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import typer
|
|
||||||
from rich.console import Console
|
|
||||||
from rich.table import Table
|
|
||||||
|
|
||||||
from decnet.env import (
|
|
||||||
DECNET_API_HOST,
|
|
||||||
DECNET_API_PORT,
|
|
||||||
DECNET_INGEST_LOG_FILE,
|
|
||||||
DECNET_WEB_HOST,
|
|
||||||
DECNET_WEB_PORT,
|
|
||||||
)
|
|
||||||
from decnet.archetypes import Archetype, all_archetypes, get_archetype
|
|
||||||
from decnet.config import (
|
|
||||||
DecnetConfig,
|
|
||||||
)
|
|
||||||
from decnet.distros import all_distros, get_distro
|
|
||||||
from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini
|
|
||||||
from decnet.ini_loader import load_ini
|
|
||||||
from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
|
|
||||||
from decnet.services.registry import all_services
|
|
||||||
|
|
||||||
app = typer.Typer(
|
|
||||||
name="decnet",
|
|
||||||
help="Deploy a deception network of honeypot deckies on your LAN.",
|
|
||||||
no_args_is_help=True,
|
|
||||||
)
|
|
||||||
console = Console()
|
|
||||||
|
|
||||||
|
|
||||||
def _kill_api() -> None:
|
|
||||||
"""Find and kill any running DECNET API (uvicorn) or mutator processes."""
|
|
||||||
import psutil
|
|
||||||
import os
|
|
||||||
|
|
||||||
_killed: bool = False
|
|
||||||
for _proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
|
||||||
try:
|
|
||||||
_cmd = _proc.info['cmdline']
|
|
||||||
if not _cmd:
|
|
||||||
continue
|
|
||||||
if "uvicorn" in _cmd and "decnet.web.api:app" in _cmd:
|
|
||||||
console.print(f"[yellow]Stopping DECNET API (PID {_proc.info['pid']})...[/]")
|
|
||||||
os.kill(_proc.info['pid'], signal.SIGTERM)
|
|
||||||
_killed = True
|
|
||||||
elif "decnet.cli" in _cmd and "mutate" in _cmd and "--watch" in _cmd:
|
|
||||||
console.print(f"[yellow]Stopping DECNET Mutator Watcher (PID {_proc.info['pid']})...[/]")
|
|
||||||
os.kill(_proc.info['pid'], signal.SIGTERM)
|
|
||||||
_killed = True
|
|
||||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if _killed:
|
|
||||||
console.print("[green]Background processes stopped.[/]")
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def api(
|
|
||||||
port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"),
|
|
||||||
host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"),
|
|
||||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"),
|
|
||||||
) -> None:
|
|
||||||
"""Run the DECNET API and Web Dashboard in standalone mode."""
|
|
||||||
import subprocess # nosec B404
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
console.print(f"[green]Starting DECNET API on {host}:{port}...[/]")
|
|
||||||
_env: dict[str, str] = os.environ.copy()
|
|
||||||
_env["DECNET_INGEST_LOG_FILE"] = str(log_file)
|
|
||||||
try:
|
|
||||||
subprocess.run( # nosec B603 B404
|
|
||||||
[sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", host, "--port", str(port)],
|
|
||||||
env=_env
|
|
||||||
)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
pass
|
|
||||||
except (FileNotFoundError, subprocess.SubprocessError):
|
|
||||||
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def deploy(
|
|
||||||
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
|
|
||||||
deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
|
|
||||||
interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
|
|
||||||
subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
|
|
||||||
ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
|
|
||||||
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
|
|
||||||
randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
|
|
||||||
distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
|
|
||||||
randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
|
|
||||||
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
|
|
||||||
archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
|
|
||||||
mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
|
|
||||||
dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
|
|
||||||
no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
|
|
||||||
parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"),
|
|
||||||
ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
|
|
||||||
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
|
|
||||||
api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"),
|
|
||||||
api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"),
|
|
||||||
) -> None:
|
|
||||||
"""Deploy deckies to the LAN."""
|
|
||||||
import os
|
|
||||||
if mode not in ("unihost", "swarm"):
|
|
||||||
console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------ #
|
|
||||||
# Config-file path #
|
|
||||||
# ------------------------------------------------------------------ #
|
|
||||||
if config_file:
|
|
||||||
try:
|
|
||||||
ini = load_ini(config_file)
|
|
||||||
except FileNotFoundError as e:
|
|
||||||
console.print(f"[red]{e}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
iface = interface or ini.interface or detect_interface()
|
|
||||||
subnet_cidr = subnet or ini.subnet
|
|
||||||
effective_gateway = ini.gateway
|
|
||||||
if subnet_cidr is None:
|
|
||||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
|
||||||
elif effective_gateway is None:
|
|
||||||
_, effective_gateway = detect_subnet(iface)
|
|
||||||
|
|
||||||
host_ip = get_host_ip(iface)
|
|
||||||
console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} "
|
|
||||||
f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} "
|
|
||||||
f"[dim]Host IP:[/] {host_ip}")
|
|
||||||
|
|
||||||
if ini.custom_services:
|
|
||||||
from decnet.custom_service import CustomService
|
|
||||||
from decnet.services.registry import register_custom_service
|
|
||||||
for cs in ini.custom_services:
|
|
||||||
register_custom_service(
|
|
||||||
CustomService(
|
|
||||||
name=cs.name,
|
|
||||||
image=cs.image,
|
|
||||||
exec_cmd=cs.exec_cmd,
|
|
||||||
ports=cs.ports,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
effective_log_file = log_file
|
|
||||||
try:
|
|
||||||
decky_configs = build_deckies_from_ini(
|
|
||||||
ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval
|
|
||||||
)
|
|
||||||
except ValueError as e:
|
|
||||||
console.print(f"[red]{e}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
# ------------------------------------------------------------------ #
|
|
||||||
# Classic CLI path #
|
|
||||||
# ------------------------------------------------------------------ #
|
|
||||||
else:
|
|
||||||
if deckies is None:
|
|
||||||
console.print("[red]--deckies is required when --config is not used.[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
services_list = [s.strip() for s in services.split(",")] if services else None
|
|
||||||
if services_list:
|
|
||||||
known = set(all_service_names())
|
|
||||||
unknown = [s for s in services_list if s not in known]
|
|
||||||
if unknown:
|
|
||||||
console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
arch: Archetype | None = None
|
|
||||||
if archetype_name:
|
|
||||||
try:
|
|
||||||
arch = get_archetype(archetype_name)
|
|
||||||
except ValueError as e:
|
|
||||||
console.print(f"[red]{e}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
if not services_list and not randomize_services and not arch:
|
|
||||||
console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
iface = interface or detect_interface()
|
|
||||||
if subnet is None:
|
|
||||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
|
||||||
else:
|
|
||||||
subnet_cidr = subnet
|
|
||||||
_, effective_gateway = detect_subnet(iface)
|
|
||||||
|
|
||||||
host_ip = get_host_ip(iface)
|
|
||||||
console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} "
|
|
||||||
f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}")
|
|
||||||
|
|
||||||
distros_list = [d.strip() for d in distro.split(",")] if distro else None
|
|
||||||
if distros_list:
|
|
||||||
try:
|
|
||||||
for slug in distros_list:
|
|
||||||
get_distro(slug)
|
|
||||||
except ValueError as e:
|
|
||||||
console.print(f"[red]{e}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
|
|
||||||
decky_configs = build_deckies(
|
|
||||||
deckies, ips, services_list, randomize_services,
|
|
||||||
distros_explicit=distros_list, randomize_distros=randomize_distros,
|
|
||||||
archetype=arch, mutate_interval=mutate_interval,
|
|
||||||
)
|
|
||||||
effective_log_file = log_file
|
|
||||||
|
|
||||||
if api and not effective_log_file:
|
|
||||||
effective_log_file = os.path.join(os.getcwd(), "decnet.log")
|
|
||||||
console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]")
|
|
||||||
|
|
||||||
config = DecnetConfig(
|
|
||||||
mode=mode,
|
|
||||||
interface=iface,
|
|
||||||
subnet=subnet_cidr,
|
|
||||||
gateway=effective_gateway,
|
|
||||||
deckies=decky_configs,
|
|
||||||
log_file=effective_log_file,
|
|
||||||
ipvlan=ipvlan,
|
|
||||||
mutate_interval=mutate_interval,
|
|
||||||
)
|
|
||||||
|
|
||||||
from decnet.engine import deploy as _deploy
|
|
||||||
_deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel)
|
|
||||||
|
|
||||||
if mutate_interval is not None and not dry_run:
|
|
||||||
import subprocess # nosec B404
|
|
||||||
import sys
|
|
||||||
console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]")
|
|
||||||
try:
|
|
||||||
subprocess.Popen( # nosec B603
|
|
||||||
[sys.executable, "-m", "decnet.cli", "mutate", "--watch"],
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
stderr=subprocess.STDOUT,
|
|
||||||
start_new_session=True,
|
|
||||||
)
|
|
||||||
except (FileNotFoundError, subprocess.SubprocessError):
|
|
||||||
console.print("[red]Failed to start mutator watcher.[/]")
|
|
||||||
|
|
||||||
if effective_log_file and not dry_run and not api:
|
|
||||||
import subprocess # nosec B404
|
|
||||||
import sys
|
|
||||||
from pathlib import Path as _Path
|
|
||||||
_collector_err = _Path(effective_log_file).with_suffix(".collector.log")
|
|
||||||
console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
|
|
||||||
subprocess.Popen( # nosec B603
|
|
||||||
[sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
|
|
||||||
stdin=subprocess.DEVNULL,
|
|
||||||
stdout=open(_collector_err, "a"), # nosec B603
|
|
||||||
stderr=subprocess.STDOUT,
|
|
||||||
start_new_session=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if api and not dry_run:
|
|
||||||
import subprocess # nosec B404
|
|
||||||
import sys
|
|
||||||
console.print(f"[green]Starting DECNET API on port {api_port}...[/]")
|
|
||||||
_env: dict[str, str] = os.environ.copy()
|
|
||||||
_env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "")
|
|
||||||
try:
|
|
||||||
subprocess.Popen( # nosec B603
|
|
||||||
[sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)],
|
|
||||||
env=_env,
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
stderr=subprocess.STDOUT
|
|
||||||
)
|
|
||||||
console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]")
|
|
||||||
except (FileNotFoundError, subprocess.SubprocessError):
|
|
||||||
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def collect(
|
|
||||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"),
|
|
||||||
) -> None:
|
|
||||||
"""Stream Docker logs from all running decky service containers to a log file."""
|
|
||||||
import asyncio
|
|
||||||
from decnet.collector import log_collector_worker
|
|
||||||
console.print(f"[bold cyan]Collector starting[/] → {log_file}")
|
|
||||||
asyncio.run(log_collector_worker(log_file))
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def mutate(
|
|
||||||
watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"),
|
|
||||||
decky_name: Optional[str] = typer.Option(None, "--decky", "-d", help="Force mutate a specific decky immediately"),
|
|
||||||
force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"),
|
|
||||||
) -> None:
|
|
||||||
"""Manually trigger or continuously watch for decky mutation."""
|
|
||||||
import asyncio
|
|
||||||
from decnet.mutator import mutate_decky, mutate_all, run_watch_loop
|
|
||||||
from decnet.web.dependencies import repo
|
|
||||||
|
|
||||||
async def _run() -> None:
|
|
||||||
await repo.initialize()
|
|
||||||
if watch:
|
|
||||||
await run_watch_loop(repo)
|
|
||||||
elif decky_name:
|
|
||||||
await mutate_decky(decky_name, repo)
|
|
||||||
elif force_all:
|
|
||||||
await mutate_all(force=True, repo=repo)
|
|
||||||
else:
|
|
||||||
await mutate_all(force=False, repo=repo)
|
|
||||||
|
|
||||||
asyncio.run(_run())
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def status() -> None:
|
|
||||||
"""Show running deckies and their status."""
|
|
||||||
from decnet.engine import status as _status
|
|
||||||
_status()
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def teardown(
|
|
||||||
all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
|
|
||||||
id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
|
|
||||||
) -> None:
|
|
||||||
"""Stop and remove deckies."""
|
|
||||||
if not all_ and not id_:
|
|
||||||
console.print("[red]Specify --all or --id <name>.[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
from decnet.engine import teardown as _teardown
|
|
||||||
_teardown(decky_id=id_)
|
|
||||||
|
|
||||||
if all_:
|
|
||||||
_kill_api()
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="services")
|
|
||||||
def list_services() -> None:
|
|
||||||
"""List all registered honeypot service plugins."""
|
|
||||||
svcs = all_services()
|
|
||||||
table = Table(title="Available Services", show_lines=True)
|
|
||||||
table.add_column("Name", style="bold cyan")
|
|
||||||
table.add_column("Ports")
|
|
||||||
table.add_column("Image")
|
|
||||||
for name, svc in sorted(svcs.items()):
|
|
||||||
table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
|
|
||||||
console.print(table)
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="distros")
|
|
||||||
def list_distros() -> None:
|
|
||||||
"""List all available OS distro profiles for deckies."""
|
|
||||||
table = Table(title="Available Distro Profiles", show_lines=True)
|
|
||||||
table.add_column("Slug", style="bold cyan")
|
|
||||||
table.add_column("Display Name")
|
|
||||||
table.add_column("Docker Image", style="dim")
|
|
||||||
for slug, profile in sorted(all_distros().items()):
|
|
||||||
table.add_row(slug, profile.display_name, profile.image)
|
|
||||||
console.print(table)
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="correlate")
|
|
||||||
def correlate(
|
|
||||||
log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"),
|
|
||||||
min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"),
|
|
||||||
output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"),
|
|
||||||
emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"),
|
|
||||||
) -> None:
|
|
||||||
"""Analyse logs for cross-decky traversals and print the attacker movement graph."""
|
|
||||||
import sys
|
|
||||||
import json as _json
|
|
||||||
from pathlib import Path
|
|
||||||
from decnet.correlation.engine import CorrelationEngine
|
|
||||||
|
|
||||||
engine = CorrelationEngine()
|
|
||||||
|
|
||||||
if log_file:
|
|
||||||
path = Path(log_file)
|
|
||||||
if not path.exists():
|
|
||||||
console.print(f"[red]Log file not found: {log_file}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
engine.ingest_file(path)
|
|
||||||
elif not sys.stdin.isatty():
|
|
||||||
for line in sys.stdin:
|
|
||||||
engine.ingest(line)
|
|
||||||
else:
|
|
||||||
console.print("[red]Provide --log-file or pipe log data via stdin.[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
traversals = engine.traversals(min_deckies)
|
|
||||||
|
|
||||||
if output == "json":
|
|
||||||
console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2))
|
|
||||||
elif output == "syslog":
|
|
||||||
for line in engine.traversal_syslog_lines(min_deckies):
|
|
||||||
typer.echo(line)
|
|
||||||
else:
|
|
||||||
if not traversals:
|
|
||||||
console.print(
|
|
||||||
f"[yellow]No traversals detected "
|
|
||||||
f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
console.print(engine.report_table(min_deckies))
|
|
||||||
console.print(
|
|
||||||
f"[dim]Parsed {engine.lines_parsed} lines · "
|
|
||||||
f"indexed {engine.events_indexed} events · "
|
|
||||||
f"{len(engine.all_attackers())} unique IPs · "
|
|
||||||
f"[bold]{len(traversals)}[/] traversal(s)[/]"
|
|
||||||
)
|
|
||||||
|
|
||||||
if emit_syslog:
|
|
||||||
for line in engine.traversal_syslog_lines(min_deckies):
|
|
||||||
typer.echo(line)
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="archetypes")
|
|
||||||
def list_archetypes() -> None:
|
|
||||||
"""List all machine archetype profiles."""
|
|
||||||
table = Table(title="Machine Archetypes", show_lines=True)
|
|
||||||
table.add_column("Slug", style="bold cyan")
|
|
||||||
table.add_column("Display Name")
|
|
||||||
table.add_column("Default Services", style="green")
|
|
||||||
table.add_column("Description", style="dim")
|
|
||||||
for slug, arch in sorted(all_archetypes().items()):
|
|
||||||
table.add_row(
|
|
||||||
slug,
|
|
||||||
arch.display_name,
|
|
||||||
", ".join(arch.services),
|
|
||||||
arch.description,
|
|
||||||
)
|
|
||||||
console.print(table)
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="web")
|
|
||||||
def serve_web(
|
|
||||||
web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"),
|
|
||||||
host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"),
|
|
||||||
) -> None:
|
|
||||||
"""Serve the DECNET Web Dashboard frontend."""
|
|
||||||
import http.server
|
|
||||||
import socketserver
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
dist_dir = Path(__file__).parent.parent / "decnet_web" / "dist"
|
|
||||||
|
|
||||||
if not dist_dir.exists():
|
|
||||||
console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|
||||||
def do_GET(self):
|
|
||||||
path = self.translate_path(self.path)
|
|
||||||
if not Path(path).exists() or Path(path).is_dir():
|
|
||||||
self.path = "/index.html"
|
|
||||||
return super().do_GET()
|
|
||||||
|
|
||||||
import os
|
|
||||||
os.chdir(dist_dir)
|
|
||||||
|
|
||||||
with socketserver.TCPServer((host, web_port), SPAHTTPRequestHandler) as httpd:
|
|
||||||
console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]")
|
|
||||||
try:
|
|
||||||
httpd.serve_forever()
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
console.print("\n[dim]Shutting down dashboard server.[/]")
|
|
||||||
|
|
||||||
if __name__ == '__main__': # pragma: no cover
|
|
||||||
app()
|
|
||||||
80
decnet/cli/__init__.py
Normal file
80
decnet/cli/__init__.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
"""
|
||||||
|
DECNET CLI — entry point for all commands.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
decnet deploy --mode unihost --deckies 5 --randomize-services
|
||||||
|
decnet status
|
||||||
|
decnet teardown [--all | --id decky-01]
|
||||||
|
decnet services
|
||||||
|
|
||||||
|
Layout: each command module exports ``register(app)`` which attaches its
|
||||||
|
commands to the passed Typer app. ``__init__.py`` builds the root app,
|
||||||
|
calls every module's ``register`` in order, then runs the master-only
|
||||||
|
gate. The gate must fire LAST so it sees the fully-populated dispatch
|
||||||
|
table before filtering.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import (
|
||||||
|
agent,
|
||||||
|
api,
|
||||||
|
db,
|
||||||
|
deploy,
|
||||||
|
forwarder,
|
||||||
|
inventory,
|
||||||
|
lifecycle,
|
||||||
|
listener,
|
||||||
|
profiler,
|
||||||
|
sniffer,
|
||||||
|
swarm,
|
||||||
|
swarmctl,
|
||||||
|
updater,
|
||||||
|
web,
|
||||||
|
workers,
|
||||||
|
)
|
||||||
|
from .gating import _gate_commands_by_mode
|
||||||
|
from .utils import console as console, log as log
|
||||||
|
|
||||||
|
app = typer.Typer(
|
||||||
|
name="decnet",
|
||||||
|
help="Deploy a deception network of honeypot deckies on your LAN.",
|
||||||
|
no_args_is_help=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Order matches the old flat layout so `decnet --help` reads the same.
|
||||||
|
for _mod in (
|
||||||
|
api, swarmctl, agent, updater, listener, forwarder,
|
||||||
|
swarm,
|
||||||
|
deploy, lifecycle, workers, inventory,
|
||||||
|
web, profiler, sniffer, db,
|
||||||
|
):
|
||||||
|
_mod.register(app)
|
||||||
|
|
||||||
|
_gate_commands_by_mode(app)
|
||||||
|
|
||||||
|
# Backwards-compat re-exports. Tests and third-party tooling import these
|
||||||
|
# directly from ``decnet.cli``; the refactor must keep them resolvable.
|
||||||
|
from .db import _db_reset_mysql_async # noqa: E402,F401
|
||||||
|
from .gating import ( # noqa: E402,F401
|
||||||
|
MASTER_ONLY_COMMANDS,
|
||||||
|
MASTER_ONLY_GROUPS,
|
||||||
|
_agent_mode_active,
|
||||||
|
_require_master_mode,
|
||||||
|
)
|
||||||
|
from .utils import ( # noqa: E402,F401
|
||||||
|
_daemonize,
|
||||||
|
_http_request,
|
||||||
|
_is_running,
|
||||||
|
_kill_all_services,
|
||||||
|
_pid_dir,
|
||||||
|
_service_registry,
|
||||||
|
_spawn_detached,
|
||||||
|
_swarmctl_base_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": # pragma: no cover
|
||||||
|
app()
|
||||||
64
decnet/cli/agent.py
Normal file
64
decnet/cli/agent.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pathlib as _pathlib
|
||||||
|
import sys as _sys
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def agent(
|
||||||
|
port: int = typer.Option(8765, "--port", help="Port for the worker agent"),
|
||||||
|
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the worker agent"), # nosec B104
|
||||||
|
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent, expanded under the running user's HOME — set this when running as sudo/root)"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
no_forwarder: bool = typer.Option(False, "--no-forwarder", help="Do not auto-spawn the log forwarder alongside the agent"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the DECNET SWARM worker agent (requires a cert bundle in ~/.decnet/agent/).
|
||||||
|
|
||||||
|
By default, `decnet agent` auto-spawns `decnet forwarder` as a fully-
|
||||||
|
detached sibling process so worker logs start flowing to the master
|
||||||
|
without a second manual invocation. The forwarder survives agent
|
||||||
|
restarts and crashes — if it dies on its own, restart it manually
|
||||||
|
with `decnet forwarder --daemon …`. Pass --no-forwarder to skip.
|
||||||
|
"""
|
||||||
|
from decnet.agent import server as _agent_server
|
||||||
|
from decnet.env import DECNET_SWARM_MASTER_HOST, DECNET_INGEST_LOG_FILE
|
||||||
|
from decnet.swarm import pki as _pki
|
||||||
|
|
||||||
|
resolved_dir = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("agent daemonizing host=%s port=%d", host, port)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
if not no_forwarder and DECNET_SWARM_MASTER_HOST:
|
||||||
|
fw_argv = [
|
||||||
|
_sys.executable, "-m", "decnet", "forwarder",
|
||||||
|
"--master-host", DECNET_SWARM_MASTER_HOST,
|
||||||
|
"--master-port", str(int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))),
|
||||||
|
"--agent-dir", str(resolved_dir),
|
||||||
|
"--log-file", str(DECNET_INGEST_LOG_FILE),
|
||||||
|
"--daemon",
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
pid = _utils._spawn_detached(fw_argv, _utils._pid_dir() / "forwarder.pid")
|
||||||
|
log.info("agent auto-spawned forwarder pid=%d master=%s", pid, DECNET_SWARM_MASTER_HOST)
|
||||||
|
console.print(f"[dim]Auto-spawned forwarder (pid {pid}) → {DECNET_SWARM_MASTER_HOST}.[/]")
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
log.warning("agent could not auto-spawn forwarder: %s", e)
|
||||||
|
console.print(f"[yellow]forwarder auto-spawn skipped: {e}[/]")
|
||||||
|
elif not no_forwarder:
|
||||||
|
log.info("agent skipping forwarder auto-spawn (DECNET_SWARM_MASTER_HOST unset)")
|
||||||
|
|
||||||
|
log.info("agent command invoked host=%s port=%d dir=%s", host, port, resolved_dir)
|
||||||
|
console.print(f"[green]Starting DECNET worker agent on {host}:{port} (mTLS)...[/]")
|
||||||
|
rc = _agent_server.run(host, port, agent_dir=resolved_dir)
|
||||||
|
if rc != 0:
|
||||||
|
raise typer.Exit(rc)
|
||||||
53
decnet/cli/api.py
Normal file
53
decnet/cli/api.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .gating import _require_master_mode
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def api(
|
||||||
|
port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"),
|
||||||
|
host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"),
|
||||||
|
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
workers: int = typer.Option(1, "--workers", "-w", min=1, help="Number of uvicorn worker processes"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the DECNET API and Web Dashboard in standalone mode."""
|
||||||
|
_require_master_mode("api")
|
||||||
|
if daemon:
|
||||||
|
log.info("API daemonizing host=%s port=%d workers=%d", host, port, workers)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("API command invoked host=%s port=%d workers=%d", host, port, workers)
|
||||||
|
console.print(f"[green]Starting DECNET API on {host}:{port} (workers={workers})...[/]")
|
||||||
|
_env: dict[str, str] = os.environ.copy()
|
||||||
|
_env["DECNET_INGEST_LOG_FILE"] = str(log_file)
|
||||||
|
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.api:app",
|
||||||
|
"--host", host, "--port", str(port), "--workers", str(workers)]
|
||||||
|
try:
|
||||||
|
proc = subprocess.Popen(_cmd, env=_env, start_new_session=True) # nosec B603 B404
|
||||||
|
try:
|
||||||
|
proc.wait()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
try:
|
||||||
|
os.killpg(proc.pid, signal.SIGTERM)
|
||||||
|
try:
|
||||||
|
proc.wait(timeout=10)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
os.killpg(proc.pid, signal.SIGKILL)
|
||||||
|
proc.wait()
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||||
130
decnet/cli/db.py
Normal file
130
decnet/cli/db.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
_DB_RESET_TABLES: tuple[str, ...] = (
|
||||||
|
# Order matters for DROP TABLE: child FKs first.
|
||||||
|
# - attacker_behavior FK-references attackers.
|
||||||
|
# - decky_shards FK-references swarm_hosts.
|
||||||
|
"attacker_behavior",
|
||||||
|
"attackers",
|
||||||
|
"logs",
|
||||||
|
"bounty",
|
||||||
|
"state",
|
||||||
|
"users",
|
||||||
|
"decky_shards",
|
||||||
|
"swarm_hosts",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None:
|
||||||
|
"""Inspect + (optionally) wipe a MySQL database. Pulled out of the CLI
|
||||||
|
wrapper so tests can drive it without spawning a Typer runner."""
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from sqlalchemy import text
|
||||||
|
from sqlalchemy.ext.asyncio import create_async_engine
|
||||||
|
|
||||||
|
db_name = urlparse(dsn).path.lstrip("/") or "(default)"
|
||||||
|
engine = create_async_engine(dsn)
|
||||||
|
try:
|
||||||
|
rows: dict[str, int] = {}
|
||||||
|
async with engine.connect() as conn:
|
||||||
|
for tbl in _DB_RESET_TABLES:
|
||||||
|
try:
|
||||||
|
result = await conn.execute(text(f"SELECT COUNT(*) FROM `{tbl}`")) # nosec B608
|
||||||
|
rows[tbl] = result.scalar() or 0
|
||||||
|
except Exception: # noqa: BLE001 — ProgrammingError for missing table varies by driver
|
||||||
|
rows[tbl] = -1
|
||||||
|
|
||||||
|
summary = Table(title=f"DECNET MySQL reset — database `{db_name}` (mode={mode})")
|
||||||
|
summary.add_column("Table", style="cyan")
|
||||||
|
summary.add_column("Rows", justify="right")
|
||||||
|
for tbl, count in rows.items():
|
||||||
|
summary.add_row(tbl, "[dim]missing[/]" if count < 0 else f"{count:,}")
|
||||||
|
console.print(summary)
|
||||||
|
|
||||||
|
if not confirm:
|
||||||
|
console.print(
|
||||||
|
"[yellow]Dry-run only. Re-run with [bold]--i-know-what-im-doing[/] "
|
||||||
|
"to actually execute.[/]"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
async with engine.begin() as conn:
|
||||||
|
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0"))
|
||||||
|
for tbl in _DB_RESET_TABLES:
|
||||||
|
if rows.get(tbl, -1) < 0:
|
||||||
|
continue
|
||||||
|
if mode == "truncate":
|
||||||
|
await conn.execute(text(f"TRUNCATE TABLE `{tbl}`"))
|
||||||
|
console.print(f"[green]✓ TRUNCATE {tbl}[/]")
|
||||||
|
else:
|
||||||
|
await conn.execute(text(f"DROP TABLE `{tbl}`"))
|
||||||
|
console.print(f"[green]✓ DROP TABLE {tbl}[/]")
|
||||||
|
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 1"))
|
||||||
|
|
||||||
|
console.print(f"[bold green]Done. Database `{db_name}` reset ({mode}).[/]")
|
||||||
|
finally:
|
||||||
|
await engine.dispose()
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command(name="db-reset")
|
||||||
|
def db_reset(
|
||||||
|
i_know: bool = typer.Option(
|
||||||
|
False,
|
||||||
|
"--i-know-what-im-doing",
|
||||||
|
help="Required to actually execute. Without it, the command runs in dry-run mode.",
|
||||||
|
),
|
||||||
|
mode: str = typer.Option(
|
||||||
|
"truncate",
|
||||||
|
"--mode",
|
||||||
|
help="truncate (wipe rows, keep schema) | drop-tables (DROP TABLE for each DECNET table)",
|
||||||
|
),
|
||||||
|
url: Optional[str] = typer.Option(
|
||||||
|
None,
|
||||||
|
"--url",
|
||||||
|
help="Override DECNET_DB_URL for this invocation (e.g. when cleanup needs admin creds).",
|
||||||
|
),
|
||||||
|
) -> None:
|
||||||
|
"""Wipe the MySQL database used by the DECNET dashboard.
|
||||||
|
|
||||||
|
Destructive. Runs dry by default — pass --i-know-what-im-doing to commit.
|
||||||
|
Only supported against MySQL; refuses to operate on SQLite.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
|
||||||
|
if mode not in ("truncate", "drop-tables"):
|
||||||
|
console.print(f"[red]Invalid --mode '{mode}'. Expected: truncate | drop-tables.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
|
||||||
|
if db_type != "mysql":
|
||||||
|
console.print(
|
||||||
|
f"[red]db-reset is MySQL-only (DECNET_DB_TYPE='{db_type}'). "
|
||||||
|
f"For SQLite, just delete the decnet.db file.[/]"
|
||||||
|
)
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
dsn = url or os.environ.get("DECNET_DB_URL")
|
||||||
|
if not dsn:
|
||||||
|
from decnet.web.db.mysql.database import build_mysql_url
|
||||||
|
try:
|
||||||
|
dsn = build_mysql_url()
|
||||||
|
except ValueError as e:
|
||||||
|
console.print(f"[red]{e}[/]")
|
||||||
|
raise typer.Exit(2) from e
|
||||||
|
|
||||||
|
log.info("db-reset invoked mode=%s confirm=%s", mode, i_know)
|
||||||
|
try:
|
||||||
|
asyncio.run(_db_reset_mysql_async(dsn, mode=mode, confirm=i_know))
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
console.print(f"[red]db-reset failed: {e}[/]")
|
||||||
|
raise typer.Exit(1) from e
|
||||||
307
decnet/cli/deploy.py
Normal file
307
decnet/cli/deploy.py
Normal file
@@ -0,0 +1,307 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from decnet.archetypes import Archetype, get_archetype
|
||||||
|
from decnet.config import DecnetConfig
|
||||||
|
from decnet.distros import get_distro
|
||||||
|
from decnet.env import DECNET_API_HOST, DECNET_INGEST_LOG_FILE
|
||||||
|
from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini
|
||||||
|
from decnet.ini_loader import load_ini
|
||||||
|
from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .gating import _require_master_mode
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def _deploy_swarm(config: "DecnetConfig", *, dry_run: bool, no_cache: bool) -> None:
|
||||||
|
"""Shard deckies round-robin across enrolled workers and POST to swarmctl."""
|
||||||
|
base = _utils._swarmctl_base_url(None)
|
||||||
|
resp = _utils._http_request("GET", base + "/swarm/hosts?host_status=enrolled")
|
||||||
|
enrolled = resp.json()
|
||||||
|
resp2 = _utils._http_request("GET", base + "/swarm/hosts?host_status=active")
|
||||||
|
active = resp2.json()
|
||||||
|
workers = [*enrolled, *active]
|
||||||
|
if not workers:
|
||||||
|
console.print("[red]No enrolled workers — run `decnet swarm enroll ...` first.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
assigned: list = []
|
||||||
|
for idx, d in enumerate(config.deckies):
|
||||||
|
target = workers[idx % len(workers)]
|
||||||
|
assigned.append(d.model_copy(update={"host_uuid": target["uuid"]}))
|
||||||
|
config = config.model_copy(update={"deckies": assigned})
|
||||||
|
|
||||||
|
body = {"config": config.model_dump(mode="json"), "dry_run": dry_run, "no_cache": no_cache}
|
||||||
|
console.print(f"[cyan]Dispatching {len(config.deckies)} deckies across {len(workers)} worker(s)...[/]")
|
||||||
|
resp3 = _utils._http_request("POST", base + "/swarm/deploy", json_body=body, timeout=900.0)
|
||||||
|
results = resp3.json().get("results", [])
|
||||||
|
|
||||||
|
table = Table(title="SWARM deploy results")
|
||||||
|
for col in ("worker", "host_uuid", "ok", "detail"):
|
||||||
|
table.add_column(col)
|
||||||
|
any_failed = False
|
||||||
|
for r in results:
|
||||||
|
ok = bool(r.get("ok"))
|
||||||
|
if not ok:
|
||||||
|
any_failed = True
|
||||||
|
detail = r.get("detail")
|
||||||
|
if isinstance(detail, dict):
|
||||||
|
detail = detail.get("status") or "ok"
|
||||||
|
table.add_row(
|
||||||
|
str(r.get("host_name") or ""),
|
||||||
|
str(r.get("host_uuid") or ""),
|
||||||
|
"[green]yes[/]" if ok else "[red]no[/]",
|
||||||
|
str(detail)[:80],
|
||||||
|
)
|
||||||
|
console.print(table)
|
||||||
|
if any_failed:
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def deploy(
|
||||||
|
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
|
||||||
|
deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
|
||||||
|
interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
|
||||||
|
subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
|
||||||
|
ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
|
||||||
|
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
|
||||||
|
randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
|
||||||
|
distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
|
||||||
|
randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
|
||||||
|
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
|
||||||
|
archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
|
||||||
|
mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
|
||||||
|
dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
|
||||||
|
no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
|
||||||
|
parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"),
|
||||||
|
ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
|
||||||
|
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
|
||||||
|
api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"),
|
||||||
|
api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Deploy deckies to the LAN."""
|
||||||
|
import os
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import sys
|
||||||
|
from pathlib import Path as _Path
|
||||||
|
|
||||||
|
_require_master_mode("deploy")
|
||||||
|
if daemon:
|
||||||
|
log.info("deploy daemonizing mode=%s deckies=%s", mode, deckies)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("deploy command invoked mode=%s deckies=%s dry_run=%s", mode, deckies, dry_run)
|
||||||
|
if mode not in ("unihost", "swarm"):
|
||||||
|
console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
if config_file:
|
||||||
|
try:
|
||||||
|
ini = load_ini(config_file)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
console.print(f"[red]{e}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
iface = interface or ini.interface or detect_interface()
|
||||||
|
subnet_cidr = subnet or ini.subnet
|
||||||
|
effective_gateway = ini.gateway
|
||||||
|
if subnet_cidr is None:
|
||||||
|
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||||
|
elif effective_gateway is None:
|
||||||
|
_, effective_gateway = detect_subnet(iface)
|
||||||
|
|
||||||
|
host_ip = get_host_ip(iface)
|
||||||
|
console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} "
|
||||||
|
f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} "
|
||||||
|
f"[dim]Host IP:[/] {host_ip}")
|
||||||
|
|
||||||
|
if ini.custom_services:
|
||||||
|
from decnet.custom_service import CustomService
|
||||||
|
from decnet.services.registry import register_custom_service
|
||||||
|
for cs in ini.custom_services:
|
||||||
|
register_custom_service(
|
||||||
|
CustomService(
|
||||||
|
name=cs.name,
|
||||||
|
image=cs.image,
|
||||||
|
exec_cmd=cs.exec_cmd,
|
||||||
|
ports=cs.ports,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
effective_log_file = log_file
|
||||||
|
try:
|
||||||
|
decky_configs = build_deckies_from_ini(
|
||||||
|
ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval
|
||||||
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
console.print(f"[red]{e}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
else:
|
||||||
|
if deckies is None:
|
||||||
|
console.print("[red]--deckies is required when --config is not used.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
services_list = [s.strip() for s in services.split(",")] if services else None
|
||||||
|
if services_list:
|
||||||
|
known = set(all_service_names())
|
||||||
|
unknown = [s for s in services_list if s not in known]
|
||||||
|
if unknown:
|
||||||
|
console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
arch: Archetype | None = None
|
||||||
|
if archetype_name:
|
||||||
|
try:
|
||||||
|
arch = get_archetype(archetype_name)
|
||||||
|
except ValueError as e:
|
||||||
|
console.print(f"[red]{e}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
if not services_list and not randomize_services and not arch:
|
||||||
|
console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
iface = interface or detect_interface()
|
||||||
|
if subnet is None:
|
||||||
|
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||||
|
else:
|
||||||
|
subnet_cidr = subnet
|
||||||
|
_, effective_gateway = detect_subnet(iface)
|
||||||
|
|
||||||
|
host_ip = get_host_ip(iface)
|
||||||
|
console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} "
|
||||||
|
f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}")
|
||||||
|
|
||||||
|
distros_list = [d.strip() for d in distro.split(",")] if distro else None
|
||||||
|
if distros_list:
|
||||||
|
try:
|
||||||
|
for slug in distros_list:
|
||||||
|
get_distro(slug)
|
||||||
|
except ValueError as e:
|
||||||
|
console.print(f"[red]{e}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
|
||||||
|
decky_configs = build_deckies(
|
||||||
|
deckies, ips, services_list, randomize_services,
|
||||||
|
distros_explicit=distros_list, randomize_distros=randomize_distros,
|
||||||
|
archetype=arch, mutate_interval=mutate_interval,
|
||||||
|
)
|
||||||
|
effective_log_file = log_file
|
||||||
|
|
||||||
|
if api and not effective_log_file:
|
||||||
|
effective_log_file = os.path.join(os.getcwd(), "decnet.log")
|
||||||
|
console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]")
|
||||||
|
|
||||||
|
config = DecnetConfig(
|
||||||
|
mode=mode,
|
||||||
|
interface=iface,
|
||||||
|
subnet=subnet_cidr,
|
||||||
|
gateway=effective_gateway,
|
||||||
|
deckies=decky_configs,
|
||||||
|
log_file=effective_log_file,
|
||||||
|
ipvlan=ipvlan,
|
||||||
|
mutate_interval=mutate_interval,
|
||||||
|
)
|
||||||
|
|
||||||
|
log.debug("deploy: config built deckies=%d interface=%s subnet=%s", len(config.deckies), config.interface, config.subnet)
|
||||||
|
|
||||||
|
if mode == "swarm":
|
||||||
|
_deploy_swarm(config, dry_run=dry_run, no_cache=no_cache)
|
||||||
|
if dry_run:
|
||||||
|
log.info("deploy: swarm dry-run complete, no workers dispatched")
|
||||||
|
else:
|
||||||
|
log.info("deploy: swarm deployment complete deckies=%d", len(config.deckies))
|
||||||
|
return
|
||||||
|
|
||||||
|
from decnet.engine import deploy as _deploy
|
||||||
|
_deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel)
|
||||||
|
if dry_run:
|
||||||
|
log.info("deploy: dry-run complete, no containers started")
|
||||||
|
else:
|
||||||
|
log.info("deploy: deployment complete deckies=%d", len(config.deckies))
|
||||||
|
|
||||||
|
if mutate_interval is not None and not dry_run:
|
||||||
|
console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]")
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "decnet.cli", "mutate", "--watch"],
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start mutator watcher.[/]")
|
||||||
|
|
||||||
|
if effective_log_file and not dry_run and not api:
|
||||||
|
_collector_err = _Path(effective_log_file).with_suffix(".collector.log")
|
||||||
|
console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=open(_collector_err, "a"),
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if api and not dry_run:
|
||||||
|
console.print(f"[green]Starting DECNET API on port {api_port}...[/]")
|
||||||
|
_env: dict[str, str] = os.environ.copy()
|
||||||
|
_env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "")
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)],
|
||||||
|
env=_env,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT
|
||||||
|
)
|
||||||
|
console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]")
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||||
|
|
||||||
|
if effective_log_file and not dry_run:
|
||||||
|
console.print("[bold cyan]Starting DECNET-PROBER[/] (auto-discovers attackers from log stream)")
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "decnet.cli", "probe", "--daemon", "--log-file", str(effective_log_file)],
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start DECNET-PROBER.[/]")
|
||||||
|
|
||||||
|
if effective_log_file and not dry_run:
|
||||||
|
console.print("[bold cyan]Starting DECNET-PROFILER[/] (builds attacker profiles from log stream)")
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "decnet.cli", "profiler", "--daemon"],
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start DECNET-PROFILER.[/]")
|
||||||
|
|
||||||
|
if effective_log_file and not dry_run:
|
||||||
|
console.print("[bold cyan]Starting DECNET-SNIFFER[/] (passive network capture)")
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", str(effective_log_file)],
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start DECNET-SNIFFER.[/]")
|
||||||
74
decnet/cli/forwarder.py
Normal file
74
decnet/cli/forwarder.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import pathlib
|
||||||
|
import signal
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def forwarder(
|
||||||
|
master_host: Optional[str] = typer.Option(None, "--master-host", help="Master listener hostname/IP (default: $DECNET_SWARM_MASTER_HOST)"),
|
||||||
|
master_port: int = typer.Option(6514, "--master-port", help="Master listener TCP port (RFC 5425 default 6514)"),
|
||||||
|
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Local RFC 5424 file to tail and forward"),
|
||||||
|
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent)"),
|
||||||
|
state_db: Optional[str] = typer.Option(None, "--state-db", help="Forwarder offset SQLite path (default: <agent_dir>/forwarder.db)"),
|
||||||
|
poll_interval: float = typer.Option(0.5, "--poll-interval", help="Seconds between log file stat checks"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the worker-side syslog-over-TLS forwarder (RFC 5425, mTLS to master:6514)."""
|
||||||
|
from decnet.env import DECNET_SWARM_MASTER_HOST
|
||||||
|
from decnet.swarm import pki
|
||||||
|
from decnet.swarm.log_forwarder import ForwarderConfig, run_forwarder
|
||||||
|
|
||||||
|
resolved_host = master_host or DECNET_SWARM_MASTER_HOST
|
||||||
|
if not resolved_host:
|
||||||
|
console.print("[red]--master-host is required (or set DECNET_SWARM_MASTER_HOST).[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
resolved_agent_dir = pathlib.Path(agent_dir) if agent_dir else pki.DEFAULT_AGENT_DIR
|
||||||
|
if not (resolved_agent_dir / "worker.crt").exists():
|
||||||
|
console.print(f"[red]No worker cert bundle at {resolved_agent_dir} — enroll from the master first.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
if not log_file:
|
||||||
|
console.print("[red]--log-file is required.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
cfg = ForwarderConfig(
|
||||||
|
log_path=pathlib.Path(log_file),
|
||||||
|
master_host=resolved_host,
|
||||||
|
master_port=master_port,
|
||||||
|
agent_dir=resolved_agent_dir,
|
||||||
|
state_db=pathlib.Path(state_db) if state_db else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("forwarder daemonizing master=%s:%d log=%s", resolved_host, master_port, log_file)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("forwarder command invoked master=%s:%d log=%s", resolved_host, master_port, log_file)
|
||||||
|
console.print(f"[green]Starting DECNET forwarder → {resolved_host}:{master_port} (mTLS)...[/]")
|
||||||
|
|
||||||
|
async def _main() -> None:
|
||||||
|
stop = asyncio.Event()
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||||
|
try:
|
||||||
|
loop.add_signal_handler(sig, stop.set)
|
||||||
|
except (NotImplementedError, RuntimeError): # pragma: no cover
|
||||||
|
pass
|
||||||
|
await run_forwarder(cfg, poll_interval=poll_interval, stop_event=stop)
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(_main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
71
decnet/cli/gating.py
Normal file
71
decnet/cli/gating.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
"""Role-based CLI gating.
|
||||||
|
|
||||||
|
MAINTAINERS: when you add a new Typer command (or add_typer group) that is
|
||||||
|
master-only, register its name in MASTER_ONLY_COMMANDS / MASTER_ONLY_GROUPS
|
||||||
|
below. The gate is the only thing that:
|
||||||
|
(a) hides the command from `decnet --help` on worker hosts, and
|
||||||
|
(b) prevents a misconfigured worker from invoking master-side logic.
|
||||||
|
Forgetting to register a new command is a role-boundary bug. Grep for
|
||||||
|
MASTER_ONLY when touching command registration.
|
||||||
|
|
||||||
|
Worker-legitimate commands (NOT in these sets): agent, updater, forwarder,
|
||||||
|
status, collect, probe, sniffer. Agents run deckies locally and should be
|
||||||
|
able to inspect them + run the per-host microservices (collector streams
|
||||||
|
container logs, prober characterizes attackers hitting this host, sniffer
|
||||||
|
captures traffic). Mutator and Profiler stay master-only: the mutator
|
||||||
|
orchestrates respawns across the swarm; the profiler rebuilds attacker
|
||||||
|
profiles against the master DB (no per-host DB exists).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from .utils import console
|
||||||
|
|
||||||
|
MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
|
||||||
|
"api", "swarmctl", "deploy", "redeploy", "teardown",
|
||||||
|
"mutate", "listener", "profiler",
|
||||||
|
"services", "distros", "correlate", "archetypes", "web",
|
||||||
|
"db-reset",
|
||||||
|
})
|
||||||
|
MASTER_ONLY_GROUPS: frozenset[str] = frozenset({"swarm"})
|
||||||
|
|
||||||
|
|
||||||
|
def _agent_mode_active() -> bool:
|
||||||
|
"""True when the host is configured as an agent AND master commands are
|
||||||
|
disallowed (the default for agents). Workers overriding this explicitly
|
||||||
|
set DECNET_DISALLOW_MASTER=false to opt into hybrid use."""
|
||||||
|
mode = os.environ.get("DECNET_MODE", "master").lower()
|
||||||
|
disallow = os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true"
|
||||||
|
return mode == "agent" and disallow
|
||||||
|
|
||||||
|
|
||||||
|
def _require_master_mode(command_name: str) -> None:
|
||||||
|
"""Defence-in-depth: called at the top of every master-only command body.
|
||||||
|
|
||||||
|
The registration-time gate in _gate_commands_by_mode() already hides
|
||||||
|
these commands from Typer's dispatch table, but this check protects
|
||||||
|
against direct function imports (e.g. from tests or third-party tools)
|
||||||
|
that would bypass Typer entirely."""
|
||||||
|
if _agent_mode_active():
|
||||||
|
console.print(
|
||||||
|
f"[red]`decnet {command_name}` is a master-only command; this host "
|
||||||
|
f"is configured as an agent (DECNET_MODE=agent).[/]"
|
||||||
|
)
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def _gate_commands_by_mode(_app: typer.Typer) -> None:
|
||||||
|
if not _agent_mode_active():
|
||||||
|
return
|
||||||
|
_app.registered_commands = [
|
||||||
|
c for c in _app.registered_commands
|
||||||
|
if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS
|
||||||
|
]
|
||||||
|
_app.registered_groups = [
|
||||||
|
g for g in _app.registered_groups
|
||||||
|
if g.name not in MASTER_ONLY_GROUPS
|
||||||
|
]
|
||||||
52
decnet/cli/inventory.py
Normal file
52
decnet/cli/inventory.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from decnet.archetypes import all_archetypes
|
||||||
|
from decnet.distros import all_distros
|
||||||
|
from decnet.services.registry import all_services
|
||||||
|
|
||||||
|
from .utils import console
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command(name="services")
|
||||||
|
def list_services() -> None:
|
||||||
|
"""List all registered honeypot service plugins."""
|
||||||
|
svcs = all_services()
|
||||||
|
table = Table(title="Available Services", show_lines=True)
|
||||||
|
table.add_column("Name", style="bold cyan")
|
||||||
|
table.add_column("Ports")
|
||||||
|
table.add_column("Image")
|
||||||
|
for name, svc in sorted(svcs.items()):
|
||||||
|
table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
@app.command(name="distros")
|
||||||
|
def list_distros() -> None:
|
||||||
|
"""List all available OS distro profiles for deckies."""
|
||||||
|
table = Table(title="Available Distro Profiles", show_lines=True)
|
||||||
|
table.add_column("Slug", style="bold cyan")
|
||||||
|
table.add_column("Display Name")
|
||||||
|
table.add_column("Docker Image", style="dim")
|
||||||
|
for slug, profile in sorted(all_distros().items()):
|
||||||
|
table.add_row(slug, profile.display_name, profile.image)
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
@app.command(name="archetypes")
|
||||||
|
def list_archetypes() -> None:
|
||||||
|
"""List all machine archetype profiles."""
|
||||||
|
table = Table(title="Machine Archetypes", show_lines=True)
|
||||||
|
table.add_column("Slug", style="bold cyan")
|
||||||
|
table.add_column("Display Name")
|
||||||
|
table.add_column("Default Services", style="green")
|
||||||
|
table.add_column("Description", style="dim")
|
||||||
|
for slug, arch in sorted(all_archetypes().items()):
|
||||||
|
table.add_row(
|
||||||
|
slug,
|
||||||
|
arch.display_name,
|
||||||
|
", ".join(arch.services),
|
||||||
|
arch.description,
|
||||||
|
)
|
||||||
|
console.print(table)
|
||||||
97
decnet/cli/lifecycle.py
Normal file
97
decnet/cli/lifecycle.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import subprocess # nosec B404
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .gating import _agent_mode_active, _require_master_mode
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def redeploy(
|
||||||
|
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to the DECNET log file"),
|
||||||
|
) -> None:
|
||||||
|
"""Check running DECNET services and relaunch any that are down."""
|
||||||
|
log.info("redeploy: checking services")
|
||||||
|
registry = _utils._service_registry(str(log_file))
|
||||||
|
|
||||||
|
table = Table(title="DECNET Services", show_lines=True)
|
||||||
|
table.add_column("Service", style="bold cyan")
|
||||||
|
table.add_column("Status")
|
||||||
|
table.add_column("PID", style="dim")
|
||||||
|
table.add_column("Action")
|
||||||
|
|
||||||
|
relaunched = 0
|
||||||
|
for name, match_fn, launch_args in registry:
|
||||||
|
pid = _utils._is_running(match_fn)
|
||||||
|
if pid is not None:
|
||||||
|
table.add_row(name, "[green]UP[/]", str(pid), "—")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
launch_args,
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
table.add_row(name, "[red]DOWN[/]", "—", "[green]relaunched[/]")
|
||||||
|
relaunched += 1
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError) as exc:
|
||||||
|
table.add_row(name, "[red]DOWN[/]", "—", f"[red]failed: {exc}[/]")
|
||||||
|
|
||||||
|
console.print(table)
|
||||||
|
if relaunched:
|
||||||
|
console.print(f"[green]{relaunched} service(s) relaunched.[/]")
|
||||||
|
else:
|
||||||
|
console.print("[green]All services running.[/]")
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def status() -> None:
|
||||||
|
"""Show running deckies and their status."""
|
||||||
|
log.info("status command invoked")
|
||||||
|
from decnet.engine import status as _status
|
||||||
|
_status()
|
||||||
|
|
||||||
|
registry = _utils._service_registry(str(DECNET_INGEST_LOG_FILE))
|
||||||
|
if _agent_mode_active():
|
||||||
|
registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}]
|
||||||
|
svc_table = Table(title="DECNET Services", show_lines=True)
|
||||||
|
svc_table.add_column("Service", style="bold cyan")
|
||||||
|
svc_table.add_column("Status")
|
||||||
|
svc_table.add_column("PID", style="dim")
|
||||||
|
|
||||||
|
for name, match_fn, _launch_args in registry:
|
||||||
|
pid = _utils._is_running(match_fn)
|
||||||
|
if pid is not None:
|
||||||
|
svc_table.add_row(name, "[green]UP[/]", str(pid))
|
||||||
|
else:
|
||||||
|
svc_table.add_row(name, "[red]DOWN[/]", "—")
|
||||||
|
|
||||||
|
console.print(svc_table)
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def teardown(
|
||||||
|
all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
|
||||||
|
id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
|
||||||
|
) -> None:
|
||||||
|
"""Stop and remove deckies."""
|
||||||
|
_require_master_mode("teardown")
|
||||||
|
if not all_ and not id_:
|
||||||
|
console.print("[red]Specify --all or --id <name>.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
log.info("teardown command invoked all=%s id=%s", all_, id_)
|
||||||
|
from decnet.engine import teardown as _teardown
|
||||||
|
_teardown(decky_id=id_)
|
||||||
|
log.info("teardown complete all=%s id=%s", all_, id_)
|
||||||
|
|
||||||
|
if all_:
|
||||||
|
_utils._kill_all_services()
|
||||||
57
decnet/cli/listener.py
Normal file
57
decnet/cli/listener.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import pathlib
|
||||||
|
import signal
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def listener(
|
||||||
|
bind_host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the master syslog-TLS listener"), # nosec B104
|
||||||
|
bind_port: int = typer.Option(6514, "--port", help="Listener TCP port (RFC 5425 default 6514)"),
|
||||||
|
log_path: Optional[str] = typer.Option(None, "--log-path", help="RFC 5424 forensic sink (default: ./master.log)"),
|
||||||
|
json_path: Optional[str] = typer.Option(None, "--json-path", help="Parsed-JSON ingest sink (default: ./master.json)"),
|
||||||
|
ca_dir: Optional[str] = typer.Option(None, "--ca-dir", help="DECNET CA dir (default: ~/.decnet/ca)"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the master-side syslog-over-TLS listener (RFC 5425, mTLS)."""
|
||||||
|
from decnet.swarm import pki
|
||||||
|
from decnet.swarm.log_listener import ListenerConfig, run_listener
|
||||||
|
|
||||||
|
resolved_ca_dir = pathlib.Path(ca_dir) if ca_dir else pki.DEFAULT_CA_DIR
|
||||||
|
resolved_log = pathlib.Path(log_path) if log_path else pathlib.Path("master.log")
|
||||||
|
resolved_json = pathlib.Path(json_path) if json_path else pathlib.Path("master.json")
|
||||||
|
|
||||||
|
cfg = ListenerConfig(
|
||||||
|
log_path=resolved_log, json_path=resolved_json,
|
||||||
|
bind_host=bind_host, bind_port=bind_port, ca_dir=resolved_ca_dir,
|
||||||
|
)
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("listener daemonizing host=%s port=%d", bind_host, bind_port)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("listener command invoked host=%s port=%d", bind_host, bind_port)
|
||||||
|
console.print(f"[green]Starting DECNET log listener on {bind_host}:{bind_port} (mTLS)...[/]")
|
||||||
|
|
||||||
|
async def _main() -> None:
|
||||||
|
stop = asyncio.Event()
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||||
|
try:
|
||||||
|
loop.add_signal_handler(sig, stop.set)
|
||||||
|
except (NotImplementedError, RuntimeError): # pragma: no cover
|
||||||
|
pass
|
||||||
|
await run_listener(cfg, stop_event=stop)
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(_main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
34
decnet/cli/profiler.py
Normal file
34
decnet/cli/profiler.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command(name="profiler")
|
||||||
|
def profiler_cmd(
|
||||||
|
interval: int = typer.Option(30, "--interval", "-i", help="Seconds between profile rebuild cycles"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the attacker profiler as a standalone microservice."""
|
||||||
|
import asyncio
|
||||||
|
from decnet.profiler import attacker_profile_worker
|
||||||
|
from decnet.web.dependencies import repo
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("profiler daemonizing interval=%d", interval)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("profiler starting interval=%d", interval)
|
||||||
|
console.print(f"[bold cyan]Profiler starting[/] (interval: {interval}s)")
|
||||||
|
|
||||||
|
async def _run() -> None:
|
||||||
|
await repo.initialize()
|
||||||
|
await attacker_profile_worker(repo, interval=interval)
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(_run())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[yellow]Profiler stopped.[/]")
|
||||||
31
decnet/cli/sniffer.py
Normal file
31
decnet/cli/sniffer.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command(name="sniffer")
|
||||||
|
def sniffer_cmd(
|
||||||
|
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write captured syslog + JSON records"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the network sniffer as a standalone microservice."""
|
||||||
|
import asyncio
|
||||||
|
from decnet.sniffer import sniffer_worker
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("sniffer daemonizing log_file=%s", log_file)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("sniffer starting log_file=%s", log_file)
|
||||||
|
console.print(f"[bold cyan]Sniffer starting[/] → {log_file}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(sniffer_worker(log_file))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[yellow]Sniffer stopped.[/]")
|
||||||
346
decnet/cli/swarm.py
Normal file
346
decnet/cli/swarm.py
Normal file
@@ -0,0 +1,346 @@
|
|||||||
|
"""`decnet swarm ...` — master-side operator commands (HTTP to local swarmctl)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
swarm_app = typer.Typer(
|
||||||
|
name="swarm",
|
||||||
|
help="Manage swarm workers (enroll, list, decommission). Requires `decnet swarmctl` running.",
|
||||||
|
no_args_is_help=True,
|
||||||
|
)
|
||||||
|
app.add_typer(swarm_app, name="swarm")
|
||||||
|
|
||||||
|
@swarm_app.command("enroll")
|
||||||
|
def swarm_enroll(
|
||||||
|
name: str = typer.Option(..., "--name", help="Short hostname for the worker (also the cert CN)"),
|
||||||
|
address: str = typer.Option(..., "--address", help="IP or DNS the master uses to reach the worker"),
|
||||||
|
agent_port: int = typer.Option(8765, "--agent-port", help="Worker agent TCP port"),
|
||||||
|
sans: Optional[str] = typer.Option(None, "--sans", help="Comma-separated extra SANs for the worker cert"),
|
||||||
|
notes: Optional[str] = typer.Option(None, "--notes", help="Free-form operator notes"),
|
||||||
|
out_dir: Optional[str] = typer.Option(None, "--out-dir", help="Write the bundle (ca.crt/worker.crt/worker.key) to this dir for scp"),
|
||||||
|
updater: bool = typer.Option(False, "--updater", help="Also issue an updater-identity cert (CN=updater@<name>) for the remote self-updater"),
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL (default: 127.0.0.1:8770)"),
|
||||||
|
) -> None:
|
||||||
|
"""Issue a mTLS bundle for a new worker and register it in the swarm."""
|
||||||
|
import pathlib as _pathlib
|
||||||
|
|
||||||
|
body: dict = {"name": name, "address": address, "agent_port": agent_port}
|
||||||
|
if sans:
|
||||||
|
body["sans"] = [s.strip() for s in sans.split(",") if s.strip()]
|
||||||
|
if notes:
|
||||||
|
body["notes"] = notes
|
||||||
|
if updater:
|
||||||
|
body["issue_updater_bundle"] = True
|
||||||
|
|
||||||
|
resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/enroll", json_body=body)
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
console.print(f"[green]Enrolled worker:[/] {data['name']} "
|
||||||
|
f"[dim]uuid=[/]{data['host_uuid']} "
|
||||||
|
f"[dim]fingerprint=[/]{data['fingerprint']}")
|
||||||
|
if data.get("updater"):
|
||||||
|
console.print(f"[green] + updater identity[/] "
|
||||||
|
f"[dim]fingerprint=[/]{data['updater']['fingerprint']}")
|
||||||
|
|
||||||
|
if out_dir:
|
||||||
|
target = _pathlib.Path(out_dir).expanduser()
|
||||||
|
target.mkdir(parents=True, exist_ok=True)
|
||||||
|
(target / "ca.crt").write_text(data["ca_cert_pem"])
|
||||||
|
(target / "worker.crt").write_text(data["worker_cert_pem"])
|
||||||
|
(target / "worker.key").write_text(data["worker_key_pem"])
|
||||||
|
for leaf in ("worker.key",):
|
||||||
|
try:
|
||||||
|
(target / leaf).chmod(0o600)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
console.print(f"[cyan]Agent bundle written to[/] {target}")
|
||||||
|
|
||||||
|
if data.get("updater"):
|
||||||
|
upd_target = target.parent / f"{target.name}-updater"
|
||||||
|
upd_target.mkdir(parents=True, exist_ok=True)
|
||||||
|
(upd_target / "ca.crt").write_text(data["ca_cert_pem"])
|
||||||
|
(upd_target / "updater.crt").write_text(data["updater"]["updater_cert_pem"])
|
||||||
|
(upd_target / "updater.key").write_text(data["updater"]["updater_key_pem"])
|
||||||
|
try:
|
||||||
|
(upd_target / "updater.key").chmod(0o600)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
console.print(f"[cyan]Updater bundle written to[/] {upd_target}")
|
||||||
|
console.print("[dim]Ship the agent dir to ~/.decnet/agent/ and the updater dir to ~/.decnet/updater/ on the worker.[/]")
|
||||||
|
else:
|
||||||
|
console.print("[dim]Ship this directory to the worker at ~/.decnet/agent/ (or wherever `decnet agent --agent-dir` points).[/]")
|
||||||
|
else:
|
||||||
|
console.print("[yellow]No --out-dir given — bundle PEMs are in the JSON response; persist them before leaving this shell.[/]")
|
||||||
|
|
||||||
|
@swarm_app.command("list")
|
||||||
|
def swarm_list(
|
||||||
|
host_status: Optional[str] = typer.Option(None, "--status", help="Filter by status (enrolled|active|unreachable|decommissioned)"),
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||||
|
) -> None:
|
||||||
|
"""List enrolled workers."""
|
||||||
|
q = f"?host_status={host_status}" if host_status else ""
|
||||||
|
resp = _utils._http_request("GET", _utils._swarmctl_base_url(url) + "/swarm/hosts" + q)
|
||||||
|
rows = resp.json()
|
||||||
|
if not rows:
|
||||||
|
console.print("[dim]No workers enrolled.[/]")
|
||||||
|
return
|
||||||
|
table = Table(title="DECNET swarm workers")
|
||||||
|
for col in ("name", "address", "port", "status", "last heartbeat", "enrolled"):
|
||||||
|
table.add_column(col)
|
||||||
|
for r in rows:
|
||||||
|
table.add_row(
|
||||||
|
r.get("name") or "",
|
||||||
|
r.get("address") or "",
|
||||||
|
str(r.get("agent_port") or ""),
|
||||||
|
r.get("status") or "",
|
||||||
|
str(r.get("last_heartbeat") or "—"),
|
||||||
|
str(r.get("enrolled_at") or "—"),
|
||||||
|
)
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
@swarm_app.command("check")
|
||||||
|
def swarm_check(
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||||
|
json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
|
||||||
|
) -> None:
|
||||||
|
"""Actively probe every enrolled worker and refresh status + last_heartbeat."""
|
||||||
|
resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/check", timeout=60.0)
|
||||||
|
payload = resp.json()
|
||||||
|
results = payload.get("results", [])
|
||||||
|
|
||||||
|
if json_out:
|
||||||
|
console.print_json(data=payload)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
console.print("[dim]No workers enrolled.[/]")
|
||||||
|
return
|
||||||
|
|
||||||
|
table = Table(title="DECNET swarm check")
|
||||||
|
for col in ("name", "address", "reachable", "detail"):
|
||||||
|
table.add_column(col)
|
||||||
|
for r in results:
|
||||||
|
reachable = r.get("reachable")
|
||||||
|
mark = "[green]yes[/]" if reachable else "[red]no[/]"
|
||||||
|
detail = r.get("detail")
|
||||||
|
detail_str = "—"
|
||||||
|
if isinstance(detail, dict):
|
||||||
|
detail_str = detail.get("status") or ", ".join(f"{k}={v}" for k, v in detail.items())
|
||||||
|
elif detail is not None:
|
||||||
|
detail_str = str(detail)
|
||||||
|
table.add_row(
|
||||||
|
r.get("name") or "",
|
||||||
|
r.get("address") or "",
|
||||||
|
mark,
|
||||||
|
detail_str,
|
||||||
|
)
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
@swarm_app.command("update")
|
||||||
|
def swarm_update(
|
||||||
|
host: Optional[str] = typer.Option(None, "--host", help="Target worker (name or UUID). Omit with --all."),
|
||||||
|
all_hosts: bool = typer.Option(False, "--all", help="Push to every enrolled worker."),
|
||||||
|
include_self: bool = typer.Option(False, "--include-self", help="Also push to each updater's /update-self after a successful agent update."),
|
||||||
|
root: Optional[str] = typer.Option(None, "--root", help="Source tree to tar (default: CWD)."),
|
||||||
|
exclude: list[str] = typer.Option([], "--exclude", help="Additional exclude glob. Repeatable."),
|
||||||
|
updater_port: int = typer.Option(8766, "--updater-port", help="Port the workers' updater listens on."),
|
||||||
|
dry_run: bool = typer.Option(False, "--dry-run", help="Build the tarball and print stats; no network."),
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL."),
|
||||||
|
) -> None:
|
||||||
|
"""Push the current working tree to workers' self-updaters (with auto-rollback on failure)."""
|
||||||
|
import asyncio
|
||||||
|
import pathlib as _pathlib
|
||||||
|
|
||||||
|
from decnet.swarm.tar_tree import tar_working_tree, detect_git_sha
|
||||||
|
from decnet.swarm.updater_client import UpdaterClient
|
||||||
|
|
||||||
|
if not (host or all_hosts):
|
||||||
|
console.print("[red]Supply --host <name> or --all.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
if host and all_hosts:
|
||||||
|
console.print("[red]--host and --all are mutually exclusive.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
base = _utils._swarmctl_base_url(url)
|
||||||
|
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||||
|
rows = resp.json()
|
||||||
|
if host:
|
||||||
|
targets = [r for r in rows if r.get("name") == host or r.get("uuid") == host]
|
||||||
|
if not targets:
|
||||||
|
console.print(f"[red]No enrolled worker matching '{host}'.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
else:
|
||||||
|
targets = [r for r in rows if r.get("status") != "decommissioned"]
|
||||||
|
if not targets:
|
||||||
|
console.print("[dim]No targets.[/]")
|
||||||
|
return
|
||||||
|
|
||||||
|
tree_root = _pathlib.Path(root) if root else _pathlib.Path.cwd()
|
||||||
|
sha = detect_git_sha(tree_root)
|
||||||
|
console.print(f"[dim]Tarring[/] {tree_root} [dim]sha={sha or '(not a git repo)'}[/]")
|
||||||
|
tarball = tar_working_tree(tree_root, extra_excludes=exclude)
|
||||||
|
console.print(f"[dim]Tarball size:[/] {len(tarball):,} bytes")
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
console.print("[yellow]--dry-run: not pushing.[/]")
|
||||||
|
for t in targets:
|
||||||
|
console.print(f" would push to [cyan]{t.get('name')}[/] at {t.get('address')}:{updater_port}")
|
||||||
|
return
|
||||||
|
|
||||||
|
async def _push_one(h: dict) -> dict:
|
||||||
|
name = h.get("name") or h.get("uuid")
|
||||||
|
out: dict = {"name": name, "address": h.get("address"), "agent": None, "self": None}
|
||||||
|
try:
|
||||||
|
async with UpdaterClient(h, updater_port=updater_port) as u:
|
||||||
|
r = await u.update(tarball, sha=sha)
|
||||||
|
out["agent"] = {"status": r.status_code, "body": r.json() if r.content else {}}
|
||||||
|
if r.status_code == 200 and include_self:
|
||||||
|
rs = await u.update_self(tarball, sha=sha)
|
||||||
|
out["self"] = {"status": rs.status_code, "body": rs.json() if rs.content else {}}
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
out["error"] = f"{type(exc).__name__}: {exc}"
|
||||||
|
return out
|
||||||
|
|
||||||
|
async def _push_all() -> list[dict]:
|
||||||
|
return await asyncio.gather(*(_push_one(t) for t in targets))
|
||||||
|
|
||||||
|
results = asyncio.run(_push_all())
|
||||||
|
|
||||||
|
table = Table(title="DECNET swarm update")
|
||||||
|
for col in ("host", "address", "agent", "self", "detail"):
|
||||||
|
table.add_column(col)
|
||||||
|
any_failure = False
|
||||||
|
for r in results:
|
||||||
|
agent = r.get("agent") or {}
|
||||||
|
selff = r.get("self") or {}
|
||||||
|
err = r.get("error")
|
||||||
|
if err:
|
||||||
|
any_failure = True
|
||||||
|
table.add_row(r["name"], r.get("address") or "", "[red]error[/]", "—", err)
|
||||||
|
continue
|
||||||
|
a_status = agent.get("status")
|
||||||
|
if a_status == 200:
|
||||||
|
agent_cell = "[green]updated[/]"
|
||||||
|
elif a_status == 409:
|
||||||
|
agent_cell = "[yellow]rolled-back[/]"
|
||||||
|
any_failure = True
|
||||||
|
else:
|
||||||
|
agent_cell = f"[red]{a_status}[/]"
|
||||||
|
any_failure = True
|
||||||
|
if not include_self:
|
||||||
|
self_cell = "—"
|
||||||
|
elif selff.get("status") == 200 or selff.get("status") is None:
|
||||||
|
self_cell = "[green]ok[/]" if selff else "[dim]skipped[/]"
|
||||||
|
else:
|
||||||
|
self_cell = f"[red]{selff.get('status')}[/]"
|
||||||
|
detail = ""
|
||||||
|
body = agent.get("body") or {}
|
||||||
|
if isinstance(body, dict):
|
||||||
|
detail = body.get("release", {}).get("sha") or body.get("detail", {}).get("error") or ""
|
||||||
|
table.add_row(r["name"], r.get("address") or "", agent_cell, self_cell, str(detail)[:80])
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
if any_failure:
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
@swarm_app.command("deckies")
|
||||||
|
def swarm_deckies(
|
||||||
|
host: Optional[str] = typer.Option(None, "--host", help="Filter by worker name or UUID"),
|
||||||
|
state: Optional[str] = typer.Option(None, "--state", help="Filter by shard state (pending|running|failed|torn_down)"),
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||||
|
json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
|
||||||
|
) -> None:
|
||||||
|
"""List deployed deckies across the swarm with their owning worker host."""
|
||||||
|
base = _utils._swarmctl_base_url(url)
|
||||||
|
|
||||||
|
host_uuid: Optional[str] = None
|
||||||
|
if host:
|
||||||
|
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||||
|
rows = resp.json()
|
||||||
|
match = next((r for r in rows if r.get("uuid") == host or r.get("name") == host), None)
|
||||||
|
if match is None:
|
||||||
|
console.print(f"[red]No enrolled worker matching '{host}'.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
host_uuid = match["uuid"]
|
||||||
|
|
||||||
|
query = []
|
||||||
|
if host_uuid:
|
||||||
|
query.append(f"host_uuid={host_uuid}")
|
||||||
|
if state:
|
||||||
|
query.append(f"state={state}")
|
||||||
|
path = "/swarm/deckies" + ("?" + "&".join(query) if query else "")
|
||||||
|
|
||||||
|
resp = _utils._http_request("GET", base + path)
|
||||||
|
rows = resp.json()
|
||||||
|
|
||||||
|
if json_out:
|
||||||
|
console.print_json(data=rows)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
console.print("[dim]No deckies deployed.[/]")
|
||||||
|
return
|
||||||
|
|
||||||
|
table = Table(title="DECNET swarm deckies")
|
||||||
|
for col in ("decky", "host", "address", "state", "services"):
|
||||||
|
table.add_column(col)
|
||||||
|
for r in rows:
|
||||||
|
services = ",".join(r.get("services") or []) or "—"
|
||||||
|
state_val = r.get("state") or "pending"
|
||||||
|
colored = {
|
||||||
|
"running": f"[green]{state_val}[/]",
|
||||||
|
"failed": f"[red]{state_val}[/]",
|
||||||
|
"pending": f"[yellow]{state_val}[/]",
|
||||||
|
"torn_down": f"[dim]{state_val}[/]",
|
||||||
|
}.get(state_val, state_val)
|
||||||
|
table.add_row(
|
||||||
|
r.get("decky_name") or "",
|
||||||
|
r.get("host_name") or "<unknown>",
|
||||||
|
r.get("host_address") or "",
|
||||||
|
colored,
|
||||||
|
services,
|
||||||
|
)
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
@swarm_app.command("decommission")
|
||||||
|
def swarm_decommission(
|
||||||
|
name: Optional[str] = typer.Option(None, "--name", help="Worker hostname"),
|
||||||
|
uuid: Optional[str] = typer.Option(None, "--uuid", help="Worker UUID (skip lookup)"),
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||||
|
yes: bool = typer.Option(False, "--yes", "-y", help="Skip interactive confirmation"),
|
||||||
|
) -> None:
|
||||||
|
"""Remove a worker from the swarm (cascades decky shard rows)."""
|
||||||
|
if not (name or uuid):
|
||||||
|
console.print("[red]Supply --name or --uuid.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
base = _utils._swarmctl_base_url(url)
|
||||||
|
target_uuid = uuid
|
||||||
|
target_name = name
|
||||||
|
if target_uuid is None:
|
||||||
|
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||||
|
rows = resp.json()
|
||||||
|
match = next((r for r in rows if r.get("name") == name), None)
|
||||||
|
if match is None:
|
||||||
|
console.print(f"[red]No enrolled worker named '{name}'.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
target_uuid = match["uuid"]
|
||||||
|
target_name = match.get("name") or target_name
|
||||||
|
|
||||||
|
if not yes:
|
||||||
|
confirm = typer.confirm(f"Decommission worker {target_name!r} ({target_uuid})?", default=False)
|
||||||
|
if not confirm:
|
||||||
|
console.print("[dim]Aborted.[/]")
|
||||||
|
raise typer.Exit(0)
|
||||||
|
|
||||||
|
_utils._http_request("DELETE", f"{base}/swarm/hosts/{target_uuid}")
|
||||||
|
console.print(f"[green]Decommissioned {target_name or target_uuid}.[/]")
|
||||||
104
decnet/cli/swarmctl.py
Normal file
104
decnet/cli/swarmctl.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import sys
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .gating import _require_master_mode
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def swarmctl(
|
||||||
|
port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
|
||||||
|
host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
|
||||||
|
tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
|
||||||
|
cert: Optional[str] = typer.Option(None, "--cert", help="BYOC: path to TLS server cert (PEM). Auto-issues from the DECNET CA if omitted."),
|
||||||
|
key: Optional[str] = typer.Option(None, "--key", help="BYOC: path to TLS server private key (PEM)."),
|
||||||
|
client_ca: Optional[str] = typer.Option(None, "--client-ca", help="CA bundle used to verify worker client certs. Defaults to the DECNET CA."),
|
||||||
|
) -> None:
|
||||||
|
"""Run the DECNET SWARM controller (master-side, separate process from `decnet api`).
|
||||||
|
|
||||||
|
By default, `decnet swarmctl` auto-spawns `decnet listener` as a fully-
|
||||||
|
detached sibling process so the master starts accepting forwarder
|
||||||
|
connections on 6514 without a second manual invocation. The listener
|
||||||
|
survives swarmctl restarts and crashes — if it dies on its own,
|
||||||
|
restart it manually with `decnet listener --daemon …`. Pass
|
||||||
|
--no-listener to skip.
|
||||||
|
|
||||||
|
Pass ``--tls`` to serve over HTTPS with mutual-TLS enforcement. By
|
||||||
|
default the server cert is auto-issued from the DECNET CA under
|
||||||
|
``~/.decnet/swarmctl/`` so enrolled workers (which already ship that
|
||||||
|
CA's ``ca.crt``) trust it out of the box. BYOC via ``--cert``/``--key``
|
||||||
|
if you need a publicly-trusted or externally-managed cert.
|
||||||
|
"""
|
||||||
|
_require_master_mode("swarmctl")
|
||||||
|
if daemon:
|
||||||
|
log.info("swarmctl daemonizing host=%s port=%d", host, port)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
if not no_listener:
|
||||||
|
listener_host = os.environ.get("DECNET_LISTENER_HOST", "0.0.0.0") # nosec B104
|
||||||
|
listener_port = int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))
|
||||||
|
lst_argv = [
|
||||||
|
sys.executable, "-m", "decnet", "listener",
|
||||||
|
"--host", listener_host,
|
||||||
|
"--port", str(listener_port),
|
||||||
|
"--daemon",
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
pid = _utils._spawn_detached(lst_argv, _utils._pid_dir() / "listener.pid")
|
||||||
|
log.info("swarmctl auto-spawned listener pid=%d bind=%s:%d",
|
||||||
|
pid, listener_host, listener_port)
|
||||||
|
console.print(f"[dim]Auto-spawned listener (pid {pid}) on {listener_host}:{listener_port}.[/]")
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
log.warning("swarmctl could not auto-spawn listener: %s", e)
|
||||||
|
console.print(f"[yellow]listener auto-spawn skipped: {e}[/]")
|
||||||
|
|
||||||
|
log.info("swarmctl command invoked host=%s port=%d tls=%s", host, port, tls)
|
||||||
|
scheme = "https" if tls else "http"
|
||||||
|
console.print(f"[green]Starting DECNET SWARM controller on {scheme}://{host}:{port}...[/]")
|
||||||
|
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app",
|
||||||
|
"--host", host, "--port", str(port)]
|
||||||
|
if tls:
|
||||||
|
from decnet.swarm import pki as _pki
|
||||||
|
if cert and key:
|
||||||
|
cert_path, key_path = cert, key
|
||||||
|
elif cert or key:
|
||||||
|
console.print("[red]--cert and --key must be provided together.[/]")
|
||||||
|
raise typer.Exit(code=2)
|
||||||
|
else:
|
||||||
|
auto_cert, auto_key, _auto_ca = _pki.ensure_swarmctl_cert(host)
|
||||||
|
cert_path, key_path = str(auto_cert), str(auto_key)
|
||||||
|
console.print(f"[dim]Auto-issued swarmctl server cert → {cert_path}[/]")
|
||||||
|
ca_path = client_ca or str(_pki.DEFAULT_CA_DIR / "ca.crt")
|
||||||
|
_cmd += [
|
||||||
|
"--ssl-keyfile", key_path,
|
||||||
|
"--ssl-certfile", cert_path,
|
||||||
|
"--ssl-ca-certs", ca_path,
|
||||||
|
"--ssl-cert-reqs", "2",
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
proc = subprocess.Popen(_cmd, start_new_session=True) # nosec B603 B404
|
||||||
|
try:
|
||||||
|
proc.wait()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
try:
|
||||||
|
os.killpg(proc.pid, signal.SIGTERM)
|
||||||
|
try:
|
||||||
|
proc.wait(timeout=10)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
os.killpg(proc.pid, signal.SIGKILL)
|
||||||
|
proc.wait()
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||||
46
decnet/cli/updater.py
Normal file
46
decnet/cli/updater.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pathlib as _pathlib
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def updater(
|
||||||
|
port: int = typer.Option(8766, "--port", help="Port for the self-updater daemon"),
|
||||||
|
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the updater"), # nosec B104
|
||||||
|
updater_dir: Optional[str] = typer.Option(None, "--updater-dir", help="Updater cert bundle dir (default: ~/.decnet/updater)"),
|
||||||
|
install_dir: Optional[str] = typer.Option(None, "--install-dir", help="Release install root (default: /opt/decnet)"),
|
||||||
|
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker agent cert bundle (for local /health probes; default: ~/.decnet/agent)"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the DECNET self-updater (requires a bundle in ~/.decnet/updater/)."""
|
||||||
|
from decnet.swarm import pki as _pki
|
||||||
|
from decnet.updater import server as _upd_server
|
||||||
|
|
||||||
|
resolved_updater = _pathlib.Path(updater_dir) if updater_dir else _upd_server.DEFAULT_UPDATER_DIR
|
||||||
|
resolved_install = _pathlib.Path(install_dir) if install_dir else _pathlib.Path("/opt/decnet")
|
||||||
|
resolved_agent = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("updater daemonizing host=%s port=%d", host, port)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"updater command invoked host=%s port=%d updater_dir=%s install_dir=%s",
|
||||||
|
host, port, resolved_updater, resolved_install,
|
||||||
|
)
|
||||||
|
console.print(f"[green]Starting DECNET self-updater on {host}:{port} (mTLS)...[/]")
|
||||||
|
rc = _upd_server.run(
|
||||||
|
host, port,
|
||||||
|
updater_dir=resolved_updater,
|
||||||
|
install_dir=resolved_install,
|
||||||
|
agent_dir=resolved_agent,
|
||||||
|
)
|
||||||
|
if rc != 0:
|
||||||
|
raise typer.Exit(rc)
|
||||||
177
decnet/cli/utils.py
Normal file
177
decnet/cli/utils.py
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
"""Shared CLI helpers: console, logger, process management, swarm HTTP client.
|
||||||
|
|
||||||
|
Submodules reference these as ``from . import utils`` then ``utils.foo(...)``
|
||||||
|
so tests can patch ``decnet.cli.utils.<name>`` and have every caller see it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.console import Console
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
log = get_logger("cli")
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
|
||||||
|
def _daemonize() -> None:
|
||||||
|
"""Fork the current process into a background daemon (Unix double-fork)."""
|
||||||
|
if os.fork() > 0:
|
||||||
|
raise SystemExit(0)
|
||||||
|
os.setsid()
|
||||||
|
if os.fork() > 0:
|
||||||
|
raise SystemExit(0)
|
||||||
|
sys.stdout = open(os.devnull, "w") # noqa: SIM115
|
||||||
|
sys.stderr = open(os.devnull, "w") # noqa: SIM115
|
||||||
|
sys.stdin = open(os.devnull, "r") # noqa: SIM115
|
||||||
|
|
||||||
|
|
||||||
|
def _pid_dir() -> Path:
|
||||||
|
"""Return the writable PID directory.
|
||||||
|
|
||||||
|
/opt/decnet when it exists and is writable (production), else
|
||||||
|
~/.decnet (dev). The directory is created if needed."""
|
||||||
|
candidates = [Path("/opt/decnet"), Path.home() / ".decnet"]
|
||||||
|
for path in candidates:
|
||||||
|
try:
|
||||||
|
path.mkdir(parents=True, exist_ok=True)
|
||||||
|
if os.access(path, os.W_OK):
|
||||||
|
return path
|
||||||
|
except (PermissionError, OSError):
|
||||||
|
continue
|
||||||
|
return Path("/tmp") # nosec B108
|
||||||
|
|
||||||
|
|
||||||
|
def _spawn_detached(argv: list[str], pid_file: Path) -> int:
|
||||||
|
"""Spawn a DECNET subcommand as a fully-independent sibling process.
|
||||||
|
|
||||||
|
The parent does NOT wait() on this child. start_new_session=True puts
|
||||||
|
the child in its own session so SIGHUP on parent exit doesn't kill it;
|
||||||
|
stdin/stdout/stderr go to /dev/null so the launching shell can close
|
||||||
|
without EIO on the child. close_fds=True prevents inherited sockets
|
||||||
|
from pinning ports we're trying to rebind.
|
||||||
|
|
||||||
|
This is deliberately NOT a supervisor — we fire-and-forget. If the
|
||||||
|
child dies, the operator restarts it manually via its own subcommand.
|
||||||
|
"""
|
||||||
|
if pid_file.exists():
|
||||||
|
try:
|
||||||
|
existing = int(pid_file.read_text().strip())
|
||||||
|
os.kill(existing, 0)
|
||||||
|
return existing
|
||||||
|
except (ValueError, ProcessLookupError, PermissionError, OSError):
|
||||||
|
pass # stale pid_file — fall through and spawn
|
||||||
|
|
||||||
|
with open(os.devnull, "rb") as dn_in, open(os.devnull, "ab") as dn_out:
|
||||||
|
proc = subprocess.Popen( # nosec B603
|
||||||
|
argv,
|
||||||
|
stdin=dn_in, stdout=dn_out, stderr=dn_out,
|
||||||
|
start_new_session=True, close_fds=True,
|
||||||
|
)
|
||||||
|
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
pid_file.write_text(f"{proc.pid}\n")
|
||||||
|
return proc.pid
|
||||||
|
|
||||||
|
|
||||||
|
def _is_running(match_fn) -> int | None:
|
||||||
|
"""Return PID of a running DECNET process matching ``match_fn(cmdline)``, or None."""
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
for proc in psutil.process_iter(["pid", "cmdline"]):
|
||||||
|
try:
|
||||||
|
cmd = proc.info["cmdline"]
|
||||||
|
if cmd and match_fn(cmd):
|
||||||
|
return proc.info["pid"]
|
||||||
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
|
||||||
|
"""Return the microservice registry for health-check and relaunch.
|
||||||
|
|
||||||
|
On agents these run as systemd units invoking /usr/local/bin/decnet,
|
||||||
|
which doesn't include "decnet.cli" in its cmdline. On master dev boxes
|
||||||
|
they're launched via `python -m decnet.cli`. Match either form — cmd
|
||||||
|
is a list of argv tokens, so substring-check the joined string.
|
||||||
|
"""
|
||||||
|
_py = sys.executable
|
||||||
|
|
||||||
|
def _matches(sub: str, extras: tuple[str, ...] = ()):
|
||||||
|
def _check(cmd) -> bool:
|
||||||
|
joined = " ".join(cmd) if not isinstance(cmd, str) else cmd
|
||||||
|
if "decnet" not in joined:
|
||||||
|
return False
|
||||||
|
if sub not in joined:
|
||||||
|
return False
|
||||||
|
return all(e in joined for e in extras)
|
||||||
|
return _check
|
||||||
|
|
||||||
|
return [
|
||||||
|
("Collector", _matches("collect"),
|
||||||
|
[_py, "-m", "decnet.cli", "collect", "--daemon", "--log-file", log_file]),
|
||||||
|
("Mutator", _matches("mutate", ("--watch",)),
|
||||||
|
[_py, "-m", "decnet.cli", "mutate", "--daemon", "--watch"]),
|
||||||
|
("Prober", _matches("probe"),
|
||||||
|
[_py, "-m", "decnet.cli", "probe", "--daemon", "--log-file", log_file]),
|
||||||
|
("Profiler", _matches("profiler"),
|
||||||
|
[_py, "-m", "decnet.cli", "profiler", "--daemon"]),
|
||||||
|
("Sniffer", _matches("sniffer"),
|
||||||
|
[_py, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", log_file]),
|
||||||
|
("API",
|
||||||
|
lambda cmd: "uvicorn" in cmd and "decnet.web.api:app" in cmd,
|
||||||
|
[_py, "-m", "uvicorn", "decnet.web.api:app",
|
||||||
|
"--host", DECNET_API_HOST, "--port", str(DECNET_API_PORT)]),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _kill_all_services() -> None:
|
||||||
|
"""Find and kill all running DECNET microservice processes."""
|
||||||
|
registry = _service_registry(str(DECNET_INGEST_LOG_FILE))
|
||||||
|
killed = 0
|
||||||
|
for name, match_fn, _launch_args in registry:
|
||||||
|
pid = _is_running(match_fn)
|
||||||
|
if pid is not None:
|
||||||
|
console.print(f"[yellow]Stopping {name} (PID {pid})...[/]")
|
||||||
|
os.kill(pid, signal.SIGTERM)
|
||||||
|
killed += 1
|
||||||
|
|
||||||
|
if killed:
|
||||||
|
console.print(f"[green]{killed} background process(es) stopped.[/]")
|
||||||
|
else:
|
||||||
|
console.print("[dim]No DECNET services were running.[/]")
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770"
|
||||||
|
|
||||||
|
|
||||||
|
def _swarmctl_base_url(url: Optional[str]) -> str:
|
||||||
|
return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL)
|
||||||
|
|
||||||
|
|
||||||
|
def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
|
||||||
|
"""Tiny sync wrapper around httpx; avoids leaking async into the CLI."""
|
||||||
|
import httpx
|
||||||
|
try:
|
||||||
|
resp = httpx.request(method, url, json=json_body, timeout=timeout)
|
||||||
|
except httpx.HTTPError as exc:
|
||||||
|
console.print(f"[red]Could not reach swarm controller at {url}: {exc}[/]")
|
||||||
|
console.print("[dim]Is `decnet swarmctl` running?[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
try:
|
||||||
|
detail = resp.json().get("detail", resp.text)
|
||||||
|
except Exception: # nosec B110
|
||||||
|
detail = resp.text
|
||||||
|
console.print(f"[red]{method} {url} failed: {resp.status_code} — {detail}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
return resp
|
||||||
120
decnet/cli/web.py
Normal file
120
decnet/cli/web.py
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from decnet.env import DECNET_API_PORT, DECNET_WEB_HOST, DECNET_WEB_PORT
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command(name="web")
|
||||||
|
def serve_web(
|
||||||
|
web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"),
|
||||||
|
host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"),
|
||||||
|
api_port: int = typer.Option(DECNET_API_PORT, "--api-port", help="Port the DECNET API is listening on"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Serve the DECNET Web Dashboard frontend.
|
||||||
|
|
||||||
|
Proxies /api/* requests to the API server so the frontend can use
|
||||||
|
relative URLs (/api/v1/...) with no CORS configuration required.
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import http.server
|
||||||
|
import os
|
||||||
|
import socketserver
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
dist_dir = Path(__file__).resolve().parent.parent.parent / "decnet_web" / "dist"
|
||||||
|
|
||||||
|
if not dist_dir.exists():
|
||||||
|
console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("web daemonizing host=%s port=%d api_port=%d", host, web_port, api_port)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
_api_port = api_port
|
||||||
|
|
||||||
|
class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
|
def do_GET(self):
|
||||||
|
if self.path.startswith("/api/"):
|
||||||
|
self._proxy("GET")
|
||||||
|
return
|
||||||
|
path = self.translate_path(self.path)
|
||||||
|
if not Path(path).exists() or Path(path).is_dir():
|
||||||
|
self.path = "/index.html"
|
||||||
|
return super().do_GET()
|
||||||
|
|
||||||
|
def do_POST(self):
|
||||||
|
if self.path.startswith("/api/"):
|
||||||
|
self._proxy("POST")
|
||||||
|
return
|
||||||
|
self.send_error(405)
|
||||||
|
|
||||||
|
def do_PUT(self):
|
||||||
|
if self.path.startswith("/api/"):
|
||||||
|
self._proxy("PUT")
|
||||||
|
return
|
||||||
|
self.send_error(405)
|
||||||
|
|
||||||
|
def do_DELETE(self):
|
||||||
|
if self.path.startswith("/api/"):
|
||||||
|
self._proxy("DELETE")
|
||||||
|
return
|
||||||
|
self.send_error(405)
|
||||||
|
|
||||||
|
def _proxy(self, method: str) -> None:
|
||||||
|
content_length = int(self.headers.get("Content-Length", 0))
|
||||||
|
body = self.rfile.read(content_length) if content_length else None
|
||||||
|
|
||||||
|
forward = {k: v for k, v in self.headers.items()
|
||||||
|
if k.lower() not in ("host", "connection")}
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", _api_port, timeout=120)
|
||||||
|
conn.request(method, self.path, body=body, headers=forward)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
|
||||||
|
self.send_response(resp.status)
|
||||||
|
for key, val in resp.getheaders():
|
||||||
|
if key.lower() not in ("connection", "transfer-encoding"):
|
||||||
|
self.send_header(key, val)
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
content_type = resp.getheader("Content-Type", "")
|
||||||
|
if "text/event-stream" in content_type:
|
||||||
|
conn.sock.settimeout(None)
|
||||||
|
|
||||||
|
_read = getattr(resp, "read1", resp.read)
|
||||||
|
while True:
|
||||||
|
chunk = _read(4096)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
self.wfile.write(chunk)
|
||||||
|
self.wfile.flush()
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("web proxy error %s %s: %s", method, self.path, exc)
|
||||||
|
self.send_error(502, f"API proxy error: {exc}")
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
conn.close()
|
||||||
|
except Exception: # nosec B110 — best-effort conn cleanup
|
||||||
|
pass
|
||||||
|
|
||||||
|
def log_message(self, fmt: str, *args: object) -> None:
|
||||||
|
log.debug("web %s", fmt % args)
|
||||||
|
|
||||||
|
os.chdir(dist_dir)
|
||||||
|
|
||||||
|
socketserver.TCPServer.allow_reuse_address = True
|
||||||
|
with socketserver.ThreadingTCPServer((host, web_port), SPAHTTPRequestHandler) as httpd:
|
||||||
|
console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]")
|
||||||
|
console.print(f"[dim]Proxying /api/* → http://127.0.0.1:{_api_port}[/]")
|
||||||
|
try:
|
||||||
|
httpd.serve_forever()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[dim]Shutting down dashboard server.[/]")
|
||||||
142
decnet/cli/workers.py
Normal file
142
decnet/cli/workers.py
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def probe(
|
||||||
|
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path for RFC 5424 syslog + .json output (reads attackers from .json, writes results to both)"),
|
||||||
|
interval: int = typer.Option(300, "--interval", "-i", help="Seconds between probe cycles (default: 300)"),
|
||||||
|
timeout: float = typer.Option(5.0, "--timeout", help="Per-probe TCP timeout in seconds"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background (used by deploy, no console output)"),
|
||||||
|
) -> None:
|
||||||
|
"""Fingerprint attackers (JARM + HASSH + TCP/IP stack) discovered in the log stream."""
|
||||||
|
import asyncio
|
||||||
|
from decnet.prober import prober_worker
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("probe daemonizing log_file=%s interval=%d", log_file, interval)
|
||||||
|
_utils._daemonize()
|
||||||
|
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
|
||||||
|
return
|
||||||
|
|
||||||
|
log.info("probe command invoked log_file=%s interval=%d", log_file, interval)
|
||||||
|
console.print(f"[bold cyan]DECNET-PROBER[/] watching {log_file} for attackers (interval: {interval}s)")
|
||||||
|
console.print("[dim]Press Ctrl+C to stop[/]")
|
||||||
|
try:
|
||||||
|
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[yellow]DECNET-PROBER stopped.[/]")
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def collect(
|
||||||
|
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Stream Docker logs from all running decky service containers to a log file."""
|
||||||
|
import asyncio
|
||||||
|
from decnet.collector import log_collector_worker
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("collect daemonizing log_file=%s", log_file)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("collect command invoked log_file=%s", log_file)
|
||||||
|
console.print(f"[bold cyan]Collector starting[/] → {log_file}")
|
||||||
|
asyncio.run(log_collector_worker(log_file))
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def mutate(
|
||||||
|
watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"),
|
||||||
|
decky_name: Optional[str] = typer.Option(None, "--decky", help="Force mutate a specific decky immediately"),
|
||||||
|
force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Manually trigger or continuously watch for decky mutation."""
|
||||||
|
import asyncio
|
||||||
|
from decnet.mutator import mutate_decky, mutate_all, run_watch_loop
|
||||||
|
from decnet.web.dependencies import repo
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("mutate daemonizing watch=%s", watch)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
async def _run() -> None:
|
||||||
|
await repo.initialize()
|
||||||
|
if watch:
|
||||||
|
await run_watch_loop(repo)
|
||||||
|
elif decky_name:
|
||||||
|
await mutate_decky(decky_name, repo)
|
||||||
|
elif force_all:
|
||||||
|
await mutate_all(force=True, repo=repo)
|
||||||
|
else:
|
||||||
|
await mutate_all(force=False, repo=repo)
|
||||||
|
|
||||||
|
asyncio.run(_run())
|
||||||
|
|
||||||
|
@app.command(name="correlate")
|
||||||
|
def correlate(
|
||||||
|
log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"),
|
||||||
|
min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"),
|
||||||
|
output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"),
|
||||||
|
emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Analyse logs for cross-decky traversals and print the attacker movement graph."""
|
||||||
|
import sys
|
||||||
|
import json as _json
|
||||||
|
from pathlib import Path
|
||||||
|
from decnet.correlation.engine import CorrelationEngine
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("correlate daemonizing log_file=%s", log_file)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
engine = CorrelationEngine()
|
||||||
|
|
||||||
|
if log_file:
|
||||||
|
path = Path(log_file)
|
||||||
|
if not path.exists():
|
||||||
|
console.print(f"[red]Log file not found: {log_file}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
engine.ingest_file(path)
|
||||||
|
elif not sys.stdin.isatty():
|
||||||
|
for line in sys.stdin:
|
||||||
|
engine.ingest(line)
|
||||||
|
else:
|
||||||
|
console.print("[red]Provide --log-file or pipe log data via stdin.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
traversals = engine.traversals(min_deckies)
|
||||||
|
|
||||||
|
if output == "json":
|
||||||
|
console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2))
|
||||||
|
elif output == "syslog":
|
||||||
|
for line in engine.traversal_syslog_lines(min_deckies):
|
||||||
|
typer.echo(line)
|
||||||
|
else:
|
||||||
|
if not traversals:
|
||||||
|
console.print(
|
||||||
|
f"[yellow]No traversals detected "
|
||||||
|
f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
console.print(engine.report_table(min_deckies))
|
||||||
|
console.print(
|
||||||
|
f"[dim]Parsed {engine.lines_parsed} lines · "
|
||||||
|
f"indexed {engine.events_indexed} events · "
|
||||||
|
f"{len(engine.all_attackers())} unique IPs · "
|
||||||
|
f"[bold]{len(traversals)}[/] traversal(s)[/]"
|
||||||
|
)
|
||||||
|
|
||||||
|
if emit_syslog:
|
||||||
|
for line in engine.traversal_syslog_lines(min_deckies):
|
||||||
|
typer.echo(line)
|
||||||
@@ -8,13 +8,100 @@ The ingester tails the .json file; rsyslog can consume the .log file independent
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import os
|
||||||
import re
|
import re
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
logger = logging.getLogger("decnet.collector")
|
from decnet.logging import get_logger
|
||||||
|
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer, inject_context as _inject_ctx
|
||||||
|
|
||||||
|
logger = get_logger("collector")
|
||||||
|
|
||||||
|
# ─── Ingestion rate limiter ───────────────────────────────────────────────────
|
||||||
|
#
|
||||||
|
# Rationale: connection-lifecycle events (connect/disconnect/accept/close) are
|
||||||
|
# emitted once per TCP connection. During a portscan or credential-stuffing
|
||||||
|
# run, a single attacker can generate hundreds of these per second from the
|
||||||
|
# honeypot services themselves — each becoming a tiny WAL-write transaction
|
||||||
|
# through the ingester, starving reads until the queue drains.
|
||||||
|
#
|
||||||
|
# The collector still writes every line to the raw .log file (forensic record
|
||||||
|
# for rsyslog/SIEM). Only the .json path — which feeds SQLite — is deduped.
|
||||||
|
#
|
||||||
|
# Dedup key: (attacker_ip, decky, service, event_type)
|
||||||
|
# Window: DECNET_COLLECTOR_RL_WINDOW_SEC seconds (default 1.0)
|
||||||
|
# Scope: DECNET_COLLECTOR_RL_EVENT_TYPES comma list
|
||||||
|
# (default: connect,disconnect,connection,accept,close)
|
||||||
|
# Events outside that set bypass the limiter untouched.
|
||||||
|
|
||||||
|
def _parse_float_env(name: str, default: float) -> float:
|
||||||
|
raw = os.environ.get(name)
|
||||||
|
if raw is None:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
value = float(raw)
|
||||||
|
except ValueError:
|
||||||
|
logger.warning("collector: invalid %s=%r, using default %s", name, raw, default)
|
||||||
|
return default
|
||||||
|
return max(0.0, value)
|
||||||
|
|
||||||
|
|
||||||
|
_RL_WINDOW_SEC: float = _parse_float_env("DECNET_COLLECTOR_RL_WINDOW_SEC", 1.0)
|
||||||
|
_RL_EVENT_TYPES: frozenset[str] = frozenset(
|
||||||
|
t.strip()
|
||||||
|
for t in os.environ.get(
|
||||||
|
"DECNET_COLLECTOR_RL_EVENT_TYPES",
|
||||||
|
"connect,disconnect,connection,accept,close",
|
||||||
|
).split(",")
|
||||||
|
if t.strip()
|
||||||
|
)
|
||||||
|
_RL_MAX_ENTRIES: int = 10_000
|
||||||
|
|
||||||
|
_rl_lock: threading.Lock = threading.Lock()
|
||||||
|
_rl_last: dict[tuple[str, str, str, str], float] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _should_ingest(parsed: dict[str, Any]) -> bool:
|
||||||
|
"""
|
||||||
|
Return True if this parsed event should be written to the JSON ingestion
|
||||||
|
stream. Rate-limited connection-lifecycle events return False when another
|
||||||
|
event with the same (attacker_ip, decky, service, event_type) was emitted
|
||||||
|
inside the dedup window.
|
||||||
|
"""
|
||||||
|
event_type = parsed.get("event_type", "")
|
||||||
|
if _RL_WINDOW_SEC <= 0.0 or event_type not in _RL_EVENT_TYPES:
|
||||||
|
return True
|
||||||
|
key = (
|
||||||
|
parsed.get("attacker_ip", "Unknown"),
|
||||||
|
parsed.get("decky", ""),
|
||||||
|
parsed.get("service", ""),
|
||||||
|
event_type,
|
||||||
|
)
|
||||||
|
now = time.monotonic()
|
||||||
|
with _rl_lock:
|
||||||
|
last = _rl_last.get(key, 0.0)
|
||||||
|
if now - last < _RL_WINDOW_SEC:
|
||||||
|
return False
|
||||||
|
_rl_last[key] = now
|
||||||
|
# Opportunistic GC: when the map grows past the cap, drop entries older
|
||||||
|
# than 60 windows (well outside any realistic in-flight dedup range).
|
||||||
|
if len(_rl_last) > _RL_MAX_ENTRIES:
|
||||||
|
cutoff = now - (_RL_WINDOW_SEC * 60.0)
|
||||||
|
stale = [k for k, t in _rl_last.items() if t < cutoff]
|
||||||
|
for k in stale:
|
||||||
|
del _rl_last[k]
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _reset_rate_limiter() -> None:
|
||||||
|
"""Test-only helper — clear dedup state between test cases."""
|
||||||
|
with _rl_lock:
|
||||||
|
_rl_last.clear()
|
||||||
|
|
||||||
# ─── RFC 5424 parser ──────────────────────────────────────────────────────────
|
# ─── RFC 5424 parser ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -23,13 +110,22 @@ _RFC5424_RE = re.compile(
|
|||||||
r"(\S+) " # 1: TIMESTAMP
|
r"(\S+) " # 1: TIMESTAMP
|
||||||
r"(\S+) " # 2: HOSTNAME (decky name)
|
r"(\S+) " # 2: HOSTNAME (decky name)
|
||||||
r"(\S+) " # 3: APP-NAME (service)
|
r"(\S+) " # 3: APP-NAME (service)
|
||||||
r"- " # PROCID always NILVALUE
|
r"\S+ " # PROCID — NILVALUE ("-") for syslog_bridge emitters,
|
||||||
|
# real PID for native syslog callers like sshd/sudo
|
||||||
|
# routed through rsyslog. Accept both; we don't consume it.
|
||||||
r"(\S+) " # 4: MSGID (event_type)
|
r"(\S+) " # 4: MSGID (event_type)
|
||||||
r"(.+)$", # 5: SD element + optional MSG
|
r"(.+)$", # 5: SD element + optional MSG
|
||||||
)
|
)
|
||||||
_SD_BLOCK_RE = re.compile(r'\[decnet@55555\s+(.*?)\]', re.DOTALL)
|
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||||
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
||||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip")
|
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
|
||||||
|
|
||||||
|
# Free-form `key=value` pairs in the MSG body. Used for lines that bypass the
|
||||||
|
# syslog_bridge SD format — e.g. the SSH container's PROMPT_COMMAND which
|
||||||
|
# calls `logger -t bash "CMD uid=0 user=root src=1.2.3.4 pwd=/root cmd=…"`.
|
||||||
|
# Values run until the next whitespace, so `cmd=…` at end-of-line is preserved
|
||||||
|
# as one unit; we only care about IP-shaped fields here anyway.
|
||||||
|
_MSG_KV_RE = re.compile(r'(\w+)=(\S+)')
|
||||||
|
|
||||||
|
|
||||||
def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
||||||
@@ -64,6 +160,19 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
|||||||
attacker_ip = fields[fname]
|
attacker_ip = fields[fname]
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Fallback for plain `logger` callers that don't use SD params (notably
|
||||||
|
# the SSH container's bash PROMPT_COMMAND: `logger -t bash "CMD … src=IP …"`).
|
||||||
|
# Scan the MSG body for IP-shaped `key=value` tokens ONLY — don't fold
|
||||||
|
# them into `fields`, because the frontend's parseEventBody already
|
||||||
|
# renders kv pairs from the msg and doubling them up produces noisy
|
||||||
|
# duplicate pills. This keeps attacker attribution working without
|
||||||
|
# changing the shape of `fields` for non-SD lines.
|
||||||
|
if attacker_ip == "Unknown" and msg:
|
||||||
|
for k, v in _MSG_KV_RE.findall(msg):
|
||||||
|
if k in _IP_FIELDS:
|
||||||
|
attacker_ip = v
|
||||||
|
break
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S")
|
ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -115,19 +224,37 @@ def is_service_event(attrs: dict) -> bool:
|
|||||||
|
|
||||||
# ─── Blocking stream worker (runs in a thread) ────────────────────────────────
|
# ─── Blocking stream worker (runs in a thread) ────────────────────────────────
|
||||||
|
|
||||||
|
def _reopen_if_needed(path: Path, fh: Optional[Any]) -> Any:
|
||||||
|
"""Return fh if it still points to the same inode as path; otherwise close
|
||||||
|
fh and open a fresh handle. Handles the file being deleted (manual rm) or
|
||||||
|
rotated (logrotate rename + create)."""
|
||||||
|
try:
|
||||||
|
if fh is not None and os.fstat(fh.fileno()).st_ino == os.stat(path).st_ino:
|
||||||
|
return fh
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
# File gone or inode changed — close stale handle and open a new one.
|
||||||
|
if fh is not None:
|
||||||
|
try:
|
||||||
|
fh.close()
|
||||||
|
except Exception: # nosec B110 — best-effort file handle cleanup
|
||||||
|
pass
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
return open(path, "a", encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("collector.stream_container")
|
||||||
def _stream_container(container_id: str, log_path: Path, json_path: Path) -> None:
|
def _stream_container(container_id: str, log_path: Path, json_path: Path) -> None:
|
||||||
"""Stream logs from one container and append to the host log files."""
|
"""Stream logs from one container and append to the host log files."""
|
||||||
import docker # type: ignore[import]
|
import docker # type: ignore[import]
|
||||||
|
|
||||||
|
lf: Optional[Any] = None
|
||||||
|
jf: Optional[Any] = None
|
||||||
try:
|
try:
|
||||||
client = docker.from_env()
|
client = docker.from_env()
|
||||||
container = client.containers.get(container_id)
|
container = client.containers.get(container_id)
|
||||||
log_stream = container.logs(stream=True, follow=True, stdout=True, stderr=False)
|
log_stream = container.logs(stream=True, follow=True, stdout=True, stderr=False)
|
||||||
buf = ""
|
buf = ""
|
||||||
with (
|
|
||||||
open(log_path, "a", encoding="utf-8") as lf,
|
|
||||||
open(json_path, "a", encoding="utf-8") as jf,
|
|
||||||
):
|
|
||||||
for chunk in log_stream:
|
for chunk in log_stream:
|
||||||
buf += chunk.decode("utf-8", errors="replace")
|
buf += chunk.decode("utf-8", errors="replace")
|
||||||
while "\n" in buf:
|
while "\n" in buf:
|
||||||
@@ -135,14 +262,40 @@ def _stream_container(container_id: str, log_path: Path, json_path: Path) -> Non
|
|||||||
line = line.rstrip()
|
line = line.rstrip()
|
||||||
if not line:
|
if not line:
|
||||||
continue
|
continue
|
||||||
|
lf = _reopen_if_needed(log_path, lf)
|
||||||
lf.write(line + "\n")
|
lf.write(line + "\n")
|
||||||
lf.flush()
|
lf.flush()
|
||||||
parsed = parse_rfc5424(line)
|
parsed = parse_rfc5424(line)
|
||||||
if parsed:
|
if parsed:
|
||||||
|
if _should_ingest(parsed):
|
||||||
|
_tracer = _get_tracer("collector")
|
||||||
|
with _tracer.start_as_current_span("collector.event") as _span:
|
||||||
|
_span.set_attribute("decky", parsed.get("decky", ""))
|
||||||
|
_span.set_attribute("service", parsed.get("service", ""))
|
||||||
|
_span.set_attribute("event_type", parsed.get("event_type", ""))
|
||||||
|
_span.set_attribute("attacker_ip", parsed.get("attacker_ip", ""))
|
||||||
|
_inject_ctx(parsed)
|
||||||
|
logger.debug("collector: event written decky=%s type=%s", parsed.get("decky"), parsed.get("event_type"))
|
||||||
|
jf = _reopen_if_needed(json_path, jf)
|
||||||
jf.write(json.dumps(parsed) + "\n")
|
jf.write(json.dumps(parsed) + "\n")
|
||||||
jf.flush()
|
jf.flush()
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
"collector: rate-limited decky=%s service=%s type=%s attacker=%s",
|
||||||
|
parsed.get("decky"), parsed.get("service"),
|
||||||
|
parsed.get("event_type"), parsed.get("attacker_ip"),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug("collector: malformed RFC5424 line snippet=%r", line[:80])
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("Log stream ended for container %s: %s", container_id, exc)
|
logger.debug("collector: log stream ended container_id=%s reason=%s", container_id, exc)
|
||||||
|
finally:
|
||||||
|
for fh in (lf, jf):
|
||||||
|
if fh is not None:
|
||||||
|
try:
|
||||||
|
fh.close()
|
||||||
|
except Exception: # nosec B110 — best-effort file handle cleanup
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# ─── Async collector ──────────────────────────────────────────────────────────
|
# ─── Async collector ──────────────────────────────────────────────────────────
|
||||||
@@ -164,15 +317,26 @@ async def log_collector_worker(log_file: str) -> None:
|
|||||||
active: dict[str, asyncio.Task[None]] = {}
|
active: dict[str, asyncio.Task[None]] = {}
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
|
# Dedicated thread pool so long-running container log streams don't
|
||||||
|
# saturate the default asyncio executor and starve short-lived
|
||||||
|
# to_thread() calls elsewhere (e.g. load_state in the web API).
|
||||||
|
collector_pool = ThreadPoolExecutor(
|
||||||
|
max_workers=64, thread_name_prefix="decnet-collector",
|
||||||
|
)
|
||||||
|
|
||||||
def _spawn(container_id: str, container_name: str) -> None:
|
def _spawn(container_id: str, container_name: str) -> None:
|
||||||
if container_id not in active or active[container_id].done():
|
if container_id not in active or active[container_id].done():
|
||||||
active[container_id] = asyncio.ensure_future(
|
active[container_id] = asyncio.ensure_future(
|
||||||
asyncio.to_thread(_stream_container, container_id, log_path, json_path),
|
loop.run_in_executor(
|
||||||
|
collector_pool, _stream_container,
|
||||||
|
container_id, log_path, json_path,
|
||||||
|
),
|
||||||
loop=loop,
|
loop=loop,
|
||||||
)
|
)
|
||||||
logger.info("Collecting logs from container: %s", container_name)
|
logger.info("collector: streaming container=%s", container_name)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
logger.info("collector started log_path=%s", log_path)
|
||||||
client = docker.from_env()
|
client = docker.from_env()
|
||||||
|
|
||||||
for container in client.containers.list():
|
for container in client.containers.list():
|
||||||
@@ -190,11 +354,15 @@ async def log_collector_worker(log_file: str) -> None:
|
|||||||
if cid and is_service_event(attrs):
|
if cid and is_service_event(attrs):
|
||||||
loop.call_soon_threadsafe(_spawn, cid, name)
|
loop.call_soon_threadsafe(_spawn, cid, name)
|
||||||
|
|
||||||
await asyncio.to_thread(_watch_events)
|
await loop.run_in_executor(collector_pool, _watch_events)
|
||||||
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
|
logger.info("collector shutdown requested cancelling %d tasks", len(active))
|
||||||
for task in active.values():
|
for task in active.values():
|
||||||
task.cancel()
|
task.cancel()
|
||||||
|
collector_pool.shutdown(wait=False)
|
||||||
raise
|
raise
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.error("Collector error: %s", exc)
|
logger.error("collector error: %s", exc)
|
||||||
|
finally:
|
||||||
|
collector_pool.shutdown(wait=False)
|
||||||
|
|||||||
@@ -64,6 +64,8 @@ def generate_compose(config: DecnetConfig) -> dict:
|
|||||||
# --- Service containers: share base network namespace ---
|
# --- Service containers: share base network namespace ---
|
||||||
for svc_name in decky.services:
|
for svc_name in decky.services:
|
||||||
svc = get_service(svc_name)
|
svc = get_service(svc_name)
|
||||||
|
if svc.fleet_singleton:
|
||||||
|
continue
|
||||||
svc_cfg = decky.service_config.get(svc_name, {})
|
svc_cfg = decky.service_config.get(svc_name, {})
|
||||||
fragment = svc.compose_fragment(decky.name, service_cfg=svc_cfg)
|
fragment = svc.compose_fragment(decky.name, service_cfg=svc_cfg)
|
||||||
|
|
||||||
|
|||||||
@@ -48,23 +48,53 @@ class Rfc5424Formatter(logging.Formatter):
|
|||||||
msg = record.getMessage()
|
msg = record.getMessage()
|
||||||
if record.exc_info:
|
if record.exc_info:
|
||||||
msg += "\n" + self.formatException(record.exc_info)
|
msg += "\n" + self.formatException(record.exc_info)
|
||||||
|
app = getattr(record, "decnet_component", self._app)
|
||||||
return (
|
return (
|
||||||
f"<{prival}>1 {ts} {self._hostname} {self._app}"
|
f"<{prival}>1 {ts} {self._hostname} {app}"
|
||||||
f" {os.getpid()} {record.name} - {msg}"
|
f" {os.getpid()} {record.name} - {msg}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _configure_logging(dev: bool) -> None:
|
def _configure_logging(dev: bool) -> None:
|
||||||
"""Install the RFC 5424 handler on the root logger (idempotent)."""
|
"""Install RFC 5424 handlers on the root logger (idempotent).
|
||||||
|
|
||||||
|
Always adds a StreamHandler (stderr). Also adds a RotatingFileHandler
|
||||||
|
writing to DECNET_SYSTEM_LOGS (default: decnet.system.log in $PWD) so
|
||||||
|
all microservice daemons — which redirect stderr to /dev/null — still
|
||||||
|
produce readable logs. File handler is skipped under pytest.
|
||||||
|
"""
|
||||||
|
from decnet.logging.inode_aware_handler import InodeAwareRotatingFileHandler
|
||||||
|
|
||||||
root = logging.getLogger()
|
root = logging.getLogger()
|
||||||
# Avoid adding duplicate handlers on re-import (e.g. during testing)
|
# Guard: if our StreamHandler is already installed, all handlers are set.
|
||||||
if any(isinstance(h, logging.StreamHandler) and isinstance(h.formatter, Rfc5424Formatter)
|
if any(isinstance(h, logging.StreamHandler) and isinstance(h.formatter, Rfc5424Formatter)
|
||||||
for h in root.handlers):
|
for h in root.handlers):
|
||||||
return
|
return
|
||||||
handler = logging.StreamHandler()
|
|
||||||
handler.setFormatter(Rfc5424Formatter())
|
fmt = Rfc5424Formatter()
|
||||||
root.setLevel(logging.DEBUG if dev else logging.INFO)
|
root.setLevel(logging.DEBUG if dev else logging.INFO)
|
||||||
root.addHandler(handler)
|
|
||||||
|
stream_handler = logging.StreamHandler()
|
||||||
|
stream_handler.setFormatter(fmt)
|
||||||
|
root.addHandler(stream_handler)
|
||||||
|
|
||||||
|
# Skip the file handler during pytest runs to avoid polluting the test cwd.
|
||||||
|
_in_pytest = any(k.startswith("PYTEST") for k in os.environ)
|
||||||
|
if not _in_pytest:
|
||||||
|
_log_path = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.log")
|
||||||
|
file_handler = InodeAwareRotatingFileHandler(
|
||||||
|
_log_path,
|
||||||
|
mode="a",
|
||||||
|
maxBytes=10 * 1024 * 1024, # 10 MB
|
||||||
|
backupCount=5,
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
file_handler.setFormatter(fmt)
|
||||||
|
root.addHandler(file_handler)
|
||||||
|
# Drop root ownership when invoked via sudo so non-root follow-up
|
||||||
|
# commands (e.g. `decnet api` after `sudo decnet deploy`) can append.
|
||||||
|
from decnet.privdrop import chown_to_invoking_user
|
||||||
|
chown_to_invoking_user(_log_path)
|
||||||
|
|
||||||
|
|
||||||
_dev = os.environ.get("DECNET_DEVELOPER", "").lower() == "true"
|
_dev = os.environ.get("DECNET_DEVELOPER", "").lower() == "true"
|
||||||
|
|||||||
90
decnet/config_ini.py
Normal file
90
decnet/config_ini.py
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
"""Parse /etc/decnet/decnet.ini and seed os.environ defaults.
|
||||||
|
|
||||||
|
The INI file is a convenience layer on top of the existing DECNET_* env
|
||||||
|
vars. It never overrides an explicit environment variable (uses
|
||||||
|
os.environ.setdefault). Call load_ini_config() once, very early, before
|
||||||
|
any decnet.env import, so env.py picks up the seeded values as if they
|
||||||
|
had been exported by the shell.
|
||||||
|
|
||||||
|
Shape::
|
||||||
|
|
||||||
|
[decnet]
|
||||||
|
mode = agent # or "master"
|
||||||
|
log-directory = /var/log/decnet
|
||||||
|
disallow-master = true
|
||||||
|
|
||||||
|
[agent]
|
||||||
|
master-host = 192.168.1.50
|
||||||
|
master-port = 8770
|
||||||
|
agent-port = 8765
|
||||||
|
agent-dir = /home/anti/.decnet/agent
|
||||||
|
...
|
||||||
|
|
||||||
|
[master]
|
||||||
|
api-host = 0.0.0.0
|
||||||
|
swarmctl-port = 8770
|
||||||
|
listener-port = 6514
|
||||||
|
...
|
||||||
|
|
||||||
|
Only the section matching `mode` is loaded. The other section is
|
||||||
|
ignored silently so an agent host never reads master secrets (and
|
||||||
|
vice versa). Keys are converted to SCREAMING_SNAKE_CASE and prefixed
|
||||||
|
with ``DECNET_`` — e.g. ``master-host`` → ``DECNET_MASTER_HOST``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import configparser
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_CONFIG_PATH = Path("/etc/decnet/decnet.ini")
|
||||||
|
|
||||||
|
# The [decnet] section keys are role-agnostic and always exported.
|
||||||
|
_COMMON_KEYS = frozenset({"mode", "disallow-master", "log-directory"})
|
||||||
|
|
||||||
|
|
||||||
|
def _key_to_env(key: str) -> str:
|
||||||
|
return "DECNET_" + key.replace("-", "_").upper()
|
||||||
|
|
||||||
|
|
||||||
|
def load_ini_config(path: Optional[Path] = None) -> Optional[Path]:
|
||||||
|
"""Seed os.environ defaults from the DECNET INI file.
|
||||||
|
|
||||||
|
Returns the path that was actually loaded (so callers can log it), or
|
||||||
|
None if no file was read. Missing file is a no-op — callers fall back
|
||||||
|
to env vars / CLI flags / hardcoded defaults.
|
||||||
|
|
||||||
|
Precedence: real os.environ > INI > defaults. Real env vars are never
|
||||||
|
overwritten because we use setdefault().
|
||||||
|
"""
|
||||||
|
if path is None:
|
||||||
|
override = os.environ.get("DECNET_CONFIG")
|
||||||
|
path = Path(override) if override else DEFAULT_CONFIG_PATH
|
||||||
|
|
||||||
|
if not path.is_file():
|
||||||
|
return None
|
||||||
|
|
||||||
|
parser = configparser.ConfigParser()
|
||||||
|
parser.read(path)
|
||||||
|
|
||||||
|
# [decnet] first — mode/disallow-master/log-directory. These seed the
|
||||||
|
# mode decision for the section selection below.
|
||||||
|
if parser.has_section("decnet"):
|
||||||
|
for key, value in parser.items("decnet"):
|
||||||
|
os.environ.setdefault(_key_to_env(key), value)
|
||||||
|
|
||||||
|
mode = os.environ.get("DECNET_MODE", "master").lower()
|
||||||
|
if mode not in ("agent", "master"):
|
||||||
|
raise ValueError(
|
||||||
|
f"decnet.ini: [decnet] mode must be 'agent' or 'master', got '{mode}'"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Role-specific section.
|
||||||
|
section = mode
|
||||||
|
if parser.has_section(section):
|
||||||
|
for key, value in parser.items(section):
|
||||||
|
os.environ.setdefault(_key_to_env(key), value)
|
||||||
|
|
||||||
|
return path
|
||||||
@@ -33,6 +33,7 @@ from decnet.logging.syslog_formatter import (
|
|||||||
SEVERITY_WARNING,
|
SEVERITY_WARNING,
|
||||||
format_rfc5424,
|
format_rfc5424,
|
||||||
)
|
)
|
||||||
|
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
|
||||||
|
|
||||||
|
|
||||||
class CorrelationEngine:
|
class CorrelationEngine:
|
||||||
@@ -64,6 +65,7 @@ class CorrelationEngine:
|
|||||||
self.events_indexed += 1
|
self.events_indexed += 1
|
||||||
return event
|
return event
|
||||||
|
|
||||||
|
@_traced("correlation.ingest_file")
|
||||||
def ingest_file(self, path: Path) -> int:
|
def ingest_file(self, path: Path) -> int:
|
||||||
"""
|
"""
|
||||||
Parse every line of *path* and index it.
|
Parse every line of *path* and index it.
|
||||||
@@ -73,12 +75,18 @@ class CorrelationEngine:
|
|||||||
with open(path) as fh:
|
with open(path) as fh:
|
||||||
for line in fh:
|
for line in fh:
|
||||||
self.ingest(line)
|
self.ingest(line)
|
||||||
|
_tracer = _get_tracer("correlation")
|
||||||
|
with _tracer.start_as_current_span("correlation.ingest_file.summary") as _span:
|
||||||
|
_span.set_attribute("lines_parsed", self.lines_parsed)
|
||||||
|
_span.set_attribute("events_indexed", self.events_indexed)
|
||||||
|
_span.set_attribute("unique_ips", len(self._events))
|
||||||
return self.events_indexed
|
return self.events_indexed
|
||||||
|
|
||||||
# ------------------------------------------------------------------ #
|
# ------------------------------------------------------------------ #
|
||||||
# Query #
|
# Query #
|
||||||
# ------------------------------------------------------------------ #
|
# ------------------------------------------------------------------ #
|
||||||
|
|
||||||
|
@_traced("correlation.traversals")
|
||||||
def traversals(self, min_deckies: int = 2) -> list[AttackerTraversal]:
|
def traversals(self, min_deckies: int = 2) -> list[AttackerTraversal]:
|
||||||
"""
|
"""
|
||||||
Return all attackers that touched at least *min_deckies* distinct
|
Return all attackers that touched at least *min_deckies* distinct
|
||||||
@@ -135,6 +143,7 @@ class CorrelationEngine:
|
|||||||
)
|
)
|
||||||
return table
|
return table
|
||||||
|
|
||||||
|
@_traced("correlation.report_json")
|
||||||
def report_json(self, min_deckies: int = 2) -> dict:
|
def report_json(self, min_deckies: int = 2) -> dict:
|
||||||
"""Serialisable dict representation of all traversals."""
|
"""Serialisable dict representation of all traversals."""
|
||||||
return {
|
return {
|
||||||
@@ -147,6 +156,7 @@ class CorrelationEngine:
|
|||||||
"traversals": [t.to_dict() for t in self.traversals(min_deckies)],
|
"traversals": [t.to_dict() for t in self.traversals(min_deckies)],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@_traced("correlation.traversal_syslog_lines")
|
||||||
def traversal_syslog_lines(self, min_deckies: int = 2) -> list[str]:
|
def traversal_syslog_lines(self, min_deckies: int = 2) -> list[str]:
|
||||||
"""
|
"""
|
||||||
Emit one RFC 5424 syslog line per detected traversal.
|
Emit one RFC 5424 syslog line per detected traversal.
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ the fields needed for cross-decky correlation: attacker IP, decky name,
|
|||||||
service, event type, and timestamp.
|
service, event type, and timestamp.
|
||||||
|
|
||||||
Log format (produced by decnet.logging.syslog_formatter):
|
Log format (produced by decnet.logging.syslog_formatter):
|
||||||
<PRI>1 TIMESTAMP HOSTNAME APP-NAME - MSGID [decnet@55555 k1="v1" k2="v2"] [MSG]
|
<PRI>1 TIMESTAMP HOSTNAME APP-NAME - MSGID [relay@55555 k1="v1" k2="v2"] [MSG]
|
||||||
|
|
||||||
The attacker IP may appear under several field names depending on service:
|
The attacker IP may appear under several field names depending on service:
|
||||||
src_ip — ftp, smtp, http, most services
|
src_ip — ftp, smtp, http, most services
|
||||||
@@ -31,14 +31,14 @@ _RFC5424_RE = re.compile(
|
|||||||
r"(.+)$", # 5: SD element + optional MSG
|
r"(.+)$", # 5: SD element + optional MSG
|
||||||
)
|
)
|
||||||
|
|
||||||
# Structured data block: [decnet@55555 k="v" ...]
|
# Structured data block: [relay@55555 k="v" ...]
|
||||||
_SD_BLOCK_RE = re.compile(r'\[decnet@55555\s+(.*?)\]', re.DOTALL)
|
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||||
|
|
||||||
# Individual param: key="value" (with escaped chars inside value)
|
# Individual param: key="value" (with escaped chars inside value)
|
||||||
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
||||||
|
|
||||||
# Field names to probe for attacker IP, in priority order
|
# Field names to probe for attacker IP, in priority order
|
||||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip")
|
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ import docker
|
|||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from rich.table import Table
|
from rich.table import Table
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.telemetry import traced as _traced
|
||||||
from decnet.config import DecnetConfig, clear_state, load_state, save_state
|
from decnet.config import DecnetConfig, clear_state, load_state, save_state
|
||||||
from decnet.composer import write_compose
|
from decnet.composer import write_compose
|
||||||
from decnet.network import (
|
from decnet.network import (
|
||||||
@@ -26,13 +28,14 @@ from decnet.network import (
|
|||||||
teardown_host_macvlan,
|
teardown_host_macvlan,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
log = get_logger("engine")
|
||||||
console = Console()
|
console = Console()
|
||||||
COMPOSE_FILE = Path("decnet-compose.yml")
|
COMPOSE_FILE = Path("decnet-compose.yml")
|
||||||
_CANONICAL_LOGGING = Path(__file__).parent.parent.parent / "templates" / "decnet_logging.py"
|
_CANONICAL_LOGGING = Path(__file__).parent.parent / "templates" / "syslog_bridge.py"
|
||||||
|
|
||||||
|
|
||||||
def _sync_logging_helper(config: DecnetConfig) -> None:
|
def _sync_logging_helper(config: DecnetConfig) -> None:
|
||||||
"""Copy the canonical decnet_logging.py into every active template build context."""
|
"""Copy the canonical syslog_bridge.py into every active template build context."""
|
||||||
from decnet.services.registry import get_service
|
from decnet.services.registry import get_service
|
||||||
seen: set[Path] = set()
|
seen: set[Path] = set()
|
||||||
for decky in config.deckies:
|
for decky in config.deckies:
|
||||||
@@ -44,16 +47,32 @@ def _sync_logging_helper(config: DecnetConfig) -> None:
|
|||||||
if ctx is None or ctx in seen:
|
if ctx is None or ctx in seen:
|
||||||
continue
|
continue
|
||||||
seen.add(ctx)
|
seen.add(ctx)
|
||||||
dest = ctx / "decnet_logging.py"
|
dest = ctx / "syslog_bridge.py"
|
||||||
if not dest.exists() or dest.read_bytes() != _CANONICAL_LOGGING.read_bytes():
|
if not dest.exists() or dest.read_bytes() != _CANONICAL_LOGGING.read_bytes():
|
||||||
shutil.copy2(_CANONICAL_LOGGING, dest)
|
shutil.copy2(_CANONICAL_LOGGING, dest)
|
||||||
|
|
||||||
|
|
||||||
def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
|
def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
|
||||||
import os
|
import os
|
||||||
cmd = ["docker", "compose", "-f", str(compose_file), *args]
|
# -p decnet pins the compose project name. Without it, docker compose
|
||||||
|
# derives the project from basename($PWD); when a daemon (systemd) runs
|
||||||
|
# with WorkingDirectory=/ that basename is empty and compose aborts with
|
||||||
|
# "project name must not be empty".
|
||||||
|
cmd = ["docker", "compose", "-p", "decnet", "-f", str(compose_file), *args]
|
||||||
merged = {**os.environ, **(env or {})}
|
merged = {**os.environ, **(env or {})}
|
||||||
subprocess.run(cmd, check=True, env=merged) # nosec B603
|
result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603
|
||||||
|
if result.stdout:
|
||||||
|
print(result.stdout, end="")
|
||||||
|
if result.returncode != 0:
|
||||||
|
# Docker emits the useful detail ("Address already in use", which IP,
|
||||||
|
# which port) on stderr. Surface it to the structured log so the
|
||||||
|
# agent's journal carries it — without this the upstream traceback
|
||||||
|
# just shows the exit code.
|
||||||
|
if result.stderr:
|
||||||
|
log.error("docker compose %s failed: %s", " ".join(args), result.stderr.strip())
|
||||||
|
raise subprocess.CalledProcessError(
|
||||||
|
result.returncode, cmd, result.stdout, result.stderr
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
_PERMANENT_ERRORS = (
|
_PERMANENT_ERRORS = (
|
||||||
@@ -65,6 +84,7 @@ _PERMANENT_ERRORS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("engine.compose_with_retry")
|
||||||
def _compose_with_retry(
|
def _compose_with_retry(
|
||||||
*args: str,
|
*args: str,
|
||||||
compose_file: Path = COMPOSE_FILE,
|
compose_file: Path = COMPOSE_FILE,
|
||||||
@@ -75,7 +95,11 @@ def _compose_with_retry(
|
|||||||
"""Run a docker compose command, retrying on transient failures."""
|
"""Run a docker compose command, retrying on transient failures."""
|
||||||
import os
|
import os
|
||||||
last_exc: subprocess.CalledProcessError | None = None
|
last_exc: subprocess.CalledProcessError | None = None
|
||||||
cmd = ["docker", "compose", "-f", str(compose_file), *args]
|
# -p decnet pins the compose project name. Without it, docker compose
|
||||||
|
# derives the project from basename($PWD); when a daemon (systemd) runs
|
||||||
|
# with WorkingDirectory=/ that basename is empty and compose aborts with
|
||||||
|
# "project name must not be empty".
|
||||||
|
cmd = ["docker", "compose", "-p", "decnet", "-f", str(compose_file), *args]
|
||||||
merged = {**os.environ, **(env or {})}
|
merged = {**os.environ, **(env or {})}
|
||||||
for attempt in range(1, retries + 1):
|
for attempt in range(1, retries + 1):
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603
|
result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603
|
||||||
@@ -102,15 +126,21 @@ def _compose_with_retry(
|
|||||||
else:
|
else:
|
||||||
if result.stderr:
|
if result.stderr:
|
||||||
console.print(f"[red]{result.stderr.strip()}[/]")
|
console.print(f"[red]{result.stderr.strip()}[/]")
|
||||||
|
log.error("docker compose %s failed after %d attempts: %s",
|
||||||
|
" ".join(args), retries, result.stderr.strip())
|
||||||
raise last_exc
|
raise last_exc
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("engine.deploy")
|
||||||
def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False, parallel: bool = False) -> None:
|
def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False, parallel: bool = False) -> None:
|
||||||
|
log.info("deployment started n_deckies=%d interface=%s subnet=%s dry_run=%s", len(config.deckies), config.interface, config.subnet, dry_run)
|
||||||
|
log.debug("deploy: deckies=%s", [d.name for d in config.deckies])
|
||||||
client = docker.from_env()
|
client = docker.from_env()
|
||||||
|
|
||||||
ip_list = [d.ip for d in config.deckies]
|
ip_list = [d.ip for d in config.deckies]
|
||||||
decky_range = ips_to_range(ip_list)
|
decky_range = ips_to_range(ip_list)
|
||||||
host_ip = get_host_ip(config.interface)
|
host_ip = get_host_ip(config.interface)
|
||||||
|
log.debug("deploy: ip_range=%s host_ip=%s", decky_range, host_ip)
|
||||||
|
|
||||||
net_driver = "IPvlan L2" if config.ipvlan else "MACVLAN"
|
net_driver = "IPvlan L2" if config.ipvlan else "MACVLAN"
|
||||||
console.print(f"[bold cyan]Creating {net_driver} network[/] ({MACVLAN_NETWORK_NAME}) on {config.interface}")
|
console.print(f"[bold cyan]Creating {net_driver} network[/] ({MACVLAN_NETWORK_NAME}) on {config.interface}")
|
||||||
@@ -140,11 +170,21 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False,
|
|||||||
console.print(f"[bold cyan]Compose file written[/] → {compose_path}")
|
console.print(f"[bold cyan]Compose file written[/] → {compose_path}")
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
|
log.info("deployment dry-run complete compose_path=%s", compose_path)
|
||||||
console.print("[yellow]Dry run — no containers started.[/]")
|
console.print("[yellow]Dry run — no containers started.[/]")
|
||||||
return
|
return
|
||||||
|
|
||||||
save_state(config, compose_path)
|
save_state(config, compose_path)
|
||||||
|
|
||||||
|
# Pre-up cleanup: a prior half-failed `up` can leave containers still
|
||||||
|
# holding the IPs/ports this run wants, which surfaces as the recurring
|
||||||
|
# "Address already in use" from Docker's IPAM. Best-effort — ignore
|
||||||
|
# failure (e.g. nothing to tear down on a clean host).
|
||||||
|
try:
|
||||||
|
_compose("down", "--remove-orphans", compose_file=compose_path)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
log.debug("pre-up cleanup: compose down failed (likely nothing to remove)")
|
||||||
|
|
||||||
build_env = {"DOCKER_BUILDKIT": "1"} if parallel else {}
|
build_env = {"DOCKER_BUILDKIT": "1"} if parallel else {}
|
||||||
|
|
||||||
console.print("[bold cyan]Building images and starting deckies...[/]")
|
console.print("[bold cyan]Building images and starting deckies...[/]")
|
||||||
@@ -161,12 +201,16 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False,
|
|||||||
_compose_with_retry("build", "--no-cache", compose_file=compose_path)
|
_compose_with_retry("build", "--no-cache", compose_file=compose_path)
|
||||||
_compose_with_retry("up", "--build", "-d", compose_file=compose_path)
|
_compose_with_retry("up", "--build", "-d", compose_file=compose_path)
|
||||||
|
|
||||||
|
log.info("deployment complete n_deckies=%d", len(config.deckies))
|
||||||
_print_status(config)
|
_print_status(config)
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("engine.teardown")
|
||||||
def teardown(decky_id: str | None = None) -> None:
|
def teardown(decky_id: str | None = None) -> None:
|
||||||
|
log.info("teardown requested decky_id=%s", decky_id or "all")
|
||||||
state = load_state()
|
state = load_state()
|
||||||
if state is None:
|
if state is None:
|
||||||
|
log.warning("teardown: no active deployment found")
|
||||||
console.print("[red]No active deployment found (no decnet-state.json).[/]")
|
console.print("[red]No active deployment found (no decnet-state.json).[/]")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -174,10 +218,14 @@ def teardown(decky_id: str | None = None) -> None:
|
|||||||
client = docker.from_env()
|
client = docker.from_env()
|
||||||
|
|
||||||
if decky_id:
|
if decky_id:
|
||||||
svc_names = [f"{decky_id}-{svc}" for svc in [d.services for d in config.deckies if d.name == decky_id]]
|
decky = next((d for d in config.deckies if d.name == decky_id), None)
|
||||||
if not svc_names:
|
if decky is None:
|
||||||
console.print(f"[red]Decky '{decky_id}' not found in current deployment.[/]")
|
console.print(f"[red]Decky '{decky_id}' not found in current deployment.[/]")
|
||||||
return
|
return
|
||||||
|
svc_names = [f"{decky_id}-{svc}" for svc in decky.services]
|
||||||
|
if not svc_names:
|
||||||
|
log.warning("teardown: decky %s has no services to stop", decky_id)
|
||||||
|
return
|
||||||
_compose("stop", *svc_names, compose_file=compose_path)
|
_compose("stop", *svc_names, compose_file=compose_path)
|
||||||
_compose("rm", "-f", *svc_names, compose_file=compose_path)
|
_compose("rm", "-f", *svc_names, compose_file=compose_path)
|
||||||
else:
|
else:
|
||||||
@@ -193,6 +241,7 @@ def teardown(decky_id: str | None = None) -> None:
|
|||||||
clear_state()
|
clear_state()
|
||||||
|
|
||||||
net_driver = "IPvlan" if config.ipvlan else "MACVLAN"
|
net_driver = "IPvlan" if config.ipvlan else "MACVLAN"
|
||||||
|
log.info("teardown complete all deckies removed network_driver=%s", net_driver)
|
||||||
console.print(f"[green]All deckies torn down. {net_driver} network removed.[/]")
|
console.print(f"[green]All deckies torn down. {net_driver} network removed.[/]")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -6,9 +6,14 @@ from dotenv import load_dotenv
|
|||||||
# Calculate absolute path to the project root
|
# Calculate absolute path to the project root
|
||||||
_ROOT: Path = Path(__file__).parent.parent.absolute()
|
_ROOT: Path = Path(__file__).parent.parent.absolute()
|
||||||
|
|
||||||
# Load .env.local first, then fallback to .env
|
# Load .env.local first, then fallback to .env.
|
||||||
|
# Also check CWD so deployments that install into site-packages (e.g. the
|
||||||
|
# self-updater's release slots) can ship a per-host .env.local at the
|
||||||
|
# process's working directory without having to edit site-packages.
|
||||||
load_dotenv(_ROOT / ".env.local")
|
load_dotenv(_ROOT / ".env.local")
|
||||||
load_dotenv(_ROOT / ".env")
|
load_dotenv(_ROOT / ".env")
|
||||||
|
load_dotenv(Path.cwd() / ".env.local")
|
||||||
|
load_dotenv(Path.cwd() / ".env")
|
||||||
|
|
||||||
|
|
||||||
def _port(name: str, default: int) -> int:
|
def _port(name: str, default: int) -> int:
|
||||||
@@ -40,30 +45,109 @@ def _require_env(name: str) -> str:
|
|||||||
f"Environment variable '{name}' is set to an insecure default ('{value}'). "
|
f"Environment variable '{name}' is set to an insecure default ('{value}'). "
|
||||||
f"Choose a strong, unique value before starting DECNET."
|
f"Choose a strong, unique value before starting DECNET."
|
||||||
)
|
)
|
||||||
|
if name == "DECNET_JWT_SECRET" and len(value) < 32:
|
||||||
|
_developer = os.environ.get("DECNET_DEVELOPER", "False").lower() == "true"
|
||||||
|
if not _developer:
|
||||||
|
raise ValueError(
|
||||||
|
f"DECNET_JWT_SECRET is too short ({len(value)} bytes). "
|
||||||
|
f"Use at least 32 characters to satisfy HS256 requirements (RFC 7518 §3.2)."
|
||||||
|
)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# System logging — all microservice daemons append here.
|
||||||
|
DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.log")
|
||||||
|
|
||||||
|
# Set to "true" to embed the profiler inside the API process.
|
||||||
|
# Leave unset (default) when the standalone `decnet profiler --daemon` is
|
||||||
|
# running — embedding both produces two workers sharing the same DB cursor,
|
||||||
|
# which causes events to be skipped or processed twice.
|
||||||
|
DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
|
||||||
|
|
||||||
|
# Set to "true" to embed the MACVLAN sniffer inside the API process.
|
||||||
|
# Leave unset (default) when the standalone `decnet sniffer --daemon` is
|
||||||
|
# running (which `decnet deploy` always does). Embedding both produces two
|
||||||
|
# workers sniffing the same interface — duplicated events and wasted CPU.
|
||||||
|
DECNET_EMBED_SNIFFER: bool = os.environ.get("DECNET_EMBED_SNIFFER", "").lower() == "true"
|
||||||
|
|
||||||
|
# Set to "true" to mount the Pyinstrument ASGI middleware on the FastAPI app.
|
||||||
|
# Produces per-request HTML flamegraphs under ./profiles/. Off by default so
|
||||||
|
# production and normal dev runs pay zero profiling overhead.
|
||||||
|
DECNET_PROFILE_REQUESTS: bool = os.environ.get("DECNET_PROFILE_REQUESTS", "").lower() == "true"
|
||||||
|
DECNET_PROFILE_DIR: str = os.environ.get("DECNET_PROFILE_DIR", "profiles")
|
||||||
|
|
||||||
# API Options
|
# API Options
|
||||||
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "0.0.0.0") # nosec B104
|
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "127.0.0.1")
|
||||||
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
|
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
|
||||||
DECNET_JWT_SECRET: str = _require_env("DECNET_JWT_SECRET")
|
# DECNET_JWT_SECRET is resolved lazily via module __getattr__ so that agent /
|
||||||
|
# updater / swarmctl subcommands (which never touch auth) can start without
|
||||||
|
# the master's JWT secret being present in the environment.
|
||||||
DECNET_INGEST_LOG_FILE: str | None = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
|
DECNET_INGEST_LOG_FILE: str | None = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
|
||||||
|
|
||||||
|
# SWARM log pipeline — RFC 5425 syslog-over-TLS between worker forwarders
|
||||||
|
# and the master listener. Plaintext syslog across hosts is forbidden.
|
||||||
|
DECNET_SWARM_SYSLOG_PORT: int = _port("DECNET_SWARM_SYSLOG_PORT", 6514)
|
||||||
|
DECNET_SWARM_MASTER_HOST: str | None = os.environ.get("DECNET_SWARM_MASTER_HOST")
|
||||||
|
|
||||||
|
# Worker-side identity + swarmctl locator, seeded by the enroll bundle's
|
||||||
|
# /etc/decnet/decnet.ini ([agent] host-uuid / master-host / swarmctl-port).
|
||||||
|
# The agent heartbeat loop uses these to self-identify to the master.
|
||||||
|
DECNET_HOST_UUID: str | None = os.environ.get("DECNET_HOST_UUID")
|
||||||
|
DECNET_MASTER_HOST: str | None = os.environ.get("DECNET_MASTER_HOST")
|
||||||
|
DECNET_SWARMCTL_PORT: int = _port("DECNET_SWARMCTL_PORT", 8770)
|
||||||
|
|
||||||
|
# Ingester batching: how many log rows to accumulate per commit, and the
|
||||||
|
# max wait (ms) before flushing a partial batch. Larger batches reduce
|
||||||
|
# SQLite write-lock contention; the timeout keeps latency bounded during
|
||||||
|
# low-traffic periods.
|
||||||
|
DECNET_BATCH_SIZE: int = int(os.environ.get("DECNET_BATCH_SIZE", "100"))
|
||||||
|
DECNET_BATCH_MAX_WAIT_MS: int = int(os.environ.get("DECNET_BATCH_MAX_WAIT_MS", "250"))
|
||||||
|
|
||||||
# Web Dashboard Options
|
# Web Dashboard Options
|
||||||
DECNET_WEB_HOST: str = os.environ.get("DECNET_WEB_HOST", "0.0.0.0") # nosec B104
|
DECNET_WEB_HOST: str = os.environ.get("DECNET_WEB_HOST", "127.0.0.1")
|
||||||
DECNET_WEB_PORT: int = _port("DECNET_WEB_PORT", 8080)
|
DECNET_WEB_PORT: int = _port("DECNET_WEB_PORT", 8080)
|
||||||
DECNET_ADMIN_USER: str = os.environ.get("DECNET_ADMIN_USER", "admin")
|
DECNET_ADMIN_USER: str = os.environ.get("DECNET_ADMIN_USER", "admin")
|
||||||
DECNET_ADMIN_PASSWORD: str = os.environ.get("DECNET_ADMIN_PASSWORD", "admin")
|
DECNET_ADMIN_PASSWORD: str = os.environ.get("DECNET_ADMIN_PASSWORD", "admin")
|
||||||
DECNET_DEVELOPER: bool = os.environ.get("DECNET_DEVELOPER", "False").lower() == "true"
|
DECNET_DEVELOPER: bool = os.environ.get("DECNET_DEVELOPER", "False").lower() == "true"
|
||||||
|
|
||||||
|
# Host role — seeded by /etc/decnet/decnet.ini or exported directly.
|
||||||
|
# "master" = the central server (api, web, swarmctl, listener).
|
||||||
|
# "agent" = a worker node (agent, forwarder, updater). Workers gate their
|
||||||
|
# Typer CLI to hide master-only commands (see decnet/cli.py).
|
||||||
|
DECNET_MODE: str = os.environ.get("DECNET_MODE", "master").lower()
|
||||||
|
# When mode=agent, hide master-only Typer commands. Set to "false" for dual-
|
||||||
|
# role dev hosts where a single machine plays both sides.
|
||||||
|
DECNET_DISALLOW_MASTER: bool = (
|
||||||
|
os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Tracing — set to "true" to enable OpenTelemetry distributed tracing.
|
||||||
|
# Separate from DECNET_DEVELOPER so tracing can be toggled independently.
|
||||||
|
DECNET_DEVELOPER_TRACING: bool = os.environ.get("DECNET_DEVELOPER_TRACING", "").lower() == "true"
|
||||||
|
DECNET_OTEL_ENDPOINT: str = os.environ.get("DECNET_OTEL_ENDPOINT", "http://localhost:4317")
|
||||||
|
|
||||||
# Database Options
|
# Database Options
|
||||||
DECNET_DB_TYPE: str = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
|
DECNET_DB_TYPE: str = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
|
||||||
DECNET_DB_URL: Optional[str] = os.environ.get("DECNET_DB_URL")
|
DECNET_DB_URL: Optional[str] = os.environ.get("DECNET_DB_URL")
|
||||||
|
# MySQL component vars (used only when DECNET_DB_URL is not set)
|
||||||
|
DECNET_DB_HOST: str = os.environ.get("DECNET_DB_HOST", "localhost")
|
||||||
|
DECNET_DB_PORT: int = _port("DECNET_DB_PORT", 3306) if os.environ.get("DECNET_DB_PORT") else 3306
|
||||||
|
DECNET_DB_NAME: str = os.environ.get("DECNET_DB_NAME", "decnet")
|
||||||
|
DECNET_DB_USER: str = os.environ.get("DECNET_DB_USER", "decnet")
|
||||||
|
DECNET_DB_PASSWORD: Optional[str] = os.environ.get("DECNET_DB_PASSWORD")
|
||||||
|
|
||||||
# CORS — comma-separated list of allowed origins for the web dashboard API.
|
# CORS — comma-separated list of allowed origins for the web dashboard API.
|
||||||
# Defaults to the configured web host/port. Override with DECNET_CORS_ORIGINS if needed.
|
# Defaults to the configured web host/port. Override with DECNET_CORS_ORIGINS if needed.
|
||||||
# Example: DECNET_CORS_ORIGINS=http://192.168.1.50:9090,https://dashboard.example.com
|
# Example: DECNET_CORS_ORIGINS=http://192.168.1.50:9090,https://dashboard.example.com
|
||||||
_web_hostname: str = "localhost" if DECNET_WEB_HOST in ("0.0.0.0", "127.0.0.1", "::") else DECNET_WEB_HOST # nosec B104
|
_WILDCARD_ADDRS = {"0.0.0.0", "127.0.0.1", "::"} # nosec B104 — comparison only, not a bind
|
||||||
|
_web_hostname: str = "localhost" if DECNET_WEB_HOST in _WILDCARD_ADDRS else DECNET_WEB_HOST
|
||||||
_cors_default: str = f"http://{_web_hostname}:{DECNET_WEB_PORT}"
|
_cors_default: str = f"http://{_web_hostname}:{DECNET_WEB_PORT}"
|
||||||
_cors_raw: str = os.environ.get("DECNET_CORS_ORIGINS", _cors_default)
|
_cors_raw: str = os.environ.get("DECNET_CORS_ORIGINS", _cors_default)
|
||||||
DECNET_CORS_ORIGINS: list[str] = [o.strip() for o in _cors_raw.split(",") if o.strip()]
|
DECNET_CORS_ORIGINS: list[str] = [o.strip() for o in _cors_raw.split(",") if o.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def __getattr__(name: str) -> str:
|
||||||
|
"""Lazy resolution for secrets only the master web/api process needs."""
|
||||||
|
if name == "DECNET_JWT_SECRET":
|
||||||
|
return _require_env("DECNET_JWT_SECRET")
|
||||||
|
raise AttributeError(f"module 'decnet.env' has no attribute {name!r}")
|
||||||
|
|||||||
@@ -17,8 +17,11 @@ from decnet.services.registry import all_services
|
|||||||
|
|
||||||
|
|
||||||
def all_service_names() -> list[str]:
|
def all_service_names() -> list[str]:
|
||||||
"""Return all registered service names from the live plugin registry."""
|
"""Return all registered per-decky service names (excludes fleet singletons)."""
|
||||||
return sorted(all_services().keys())
|
return sorted(
|
||||||
|
name for name, svc in all_services().items()
|
||||||
|
if not svc.fleet_singleton
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def resolve_distros(
|
def resolve_distros(
|
||||||
|
|||||||
@@ -0,0 +1,92 @@
|
|||||||
|
"""
|
||||||
|
DECNET application logging helpers.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
log = get_logger("engine") # APP-NAME in RFC 5424 output becomes "engine"
|
||||||
|
|
||||||
|
The returned logger propagates to the root logger (configured in config.py with
|
||||||
|
Rfc5424Formatter), so level control via DECNET_DEVELOPER still applies globally.
|
||||||
|
|
||||||
|
When ``DECNET_DEVELOPER_TRACING`` is active, every LogRecord is enriched with
|
||||||
|
``otel_trace_id`` and ``otel_span_id`` from the current OTEL span context.
|
||||||
|
This lets you correlate log lines with Jaeger traces — click a log entry and
|
||||||
|
jump straight to the span that produced it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
class _ComponentFilter(logging.Filter):
|
||||||
|
"""Injects *decnet_component* onto every LogRecord so Rfc5424Formatter can
|
||||||
|
use it as the RFC 5424 APP-NAME field instead of the hardcoded "decnet"."""
|
||||||
|
|
||||||
|
def __init__(self, component: str) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.component = component
|
||||||
|
|
||||||
|
def filter(self, record: logging.LogRecord) -> bool:
|
||||||
|
record.decnet_component = self.component # type: ignore[attr-defined]
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class _TraceContextFilter(logging.Filter):
|
||||||
|
"""Injects ``otel_trace_id`` and ``otel_span_id`` onto every LogRecord
|
||||||
|
from the active OTEL span context.
|
||||||
|
|
||||||
|
Installed once by ``enable_trace_context()`` on the root ``decnet`` logger
|
||||||
|
so all child loggers inherit the enrichment via propagation.
|
||||||
|
|
||||||
|
When no span is active, both fields are set to ``"0"`` (cheap string
|
||||||
|
comparison downstream, no None-checks needed).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def filter(self, record: logging.LogRecord) -> bool:
|
||||||
|
try:
|
||||||
|
from opentelemetry import trace
|
||||||
|
span = trace.get_current_span()
|
||||||
|
ctx = span.get_span_context()
|
||||||
|
if ctx and ctx.trace_id:
|
||||||
|
record.otel_trace_id = format(ctx.trace_id, "032x") # type: ignore[attr-defined]
|
||||||
|
record.otel_span_id = format(ctx.span_id, "016x") # type: ignore[attr-defined]
|
||||||
|
else:
|
||||||
|
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
||||||
|
record.otel_span_id = "0" # type: ignore[attr-defined]
|
||||||
|
except Exception:
|
||||||
|
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
||||||
|
record.otel_span_id = "0" # type: ignore[attr-defined]
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
_trace_filter_installed: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
def enable_trace_context() -> None:
|
||||||
|
"""Install the OTEL trace-context filter on the root ``decnet`` logger.
|
||||||
|
|
||||||
|
Called once from ``decnet.telemetry.setup_tracing()`` after the
|
||||||
|
TracerProvider is initialised. Safe to call multiple times (idempotent).
|
||||||
|
"""
|
||||||
|
global _trace_filter_installed
|
||||||
|
if _trace_filter_installed:
|
||||||
|
return
|
||||||
|
root = logging.getLogger("decnet")
|
||||||
|
root.addFilter(_TraceContextFilter())
|
||||||
|
_trace_filter_installed = True
|
||||||
|
|
||||||
|
|
||||||
|
def get_logger(component: str) -> logging.Logger:
|
||||||
|
"""Return a named logger that self-identifies as *component* in RFC 5424.
|
||||||
|
|
||||||
|
Valid components: cli, engine, api, mutator, collector.
|
||||||
|
|
||||||
|
The logger is named ``decnet.<component>`` and propagates normally, so the
|
||||||
|
root handler (Rfc5424Formatter + level gate from DECNET_DEVELOPER) handles
|
||||||
|
output. Calling this function multiple times for the same component is safe.
|
||||||
|
"""
|
||||||
|
logger = logging.getLogger(f"decnet.{component}")
|
||||||
|
if not any(isinstance(f, _ComponentFilter) for f in logger.filters):
|
||||||
|
logger.addFilter(_ComponentFilter(component))
|
||||||
|
return logger
|
||||||
|
|||||||
@@ -13,29 +13,37 @@ import logging.handlers
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from decnet.logging.inode_aware_handler import InodeAwareRotatingFileHandler
|
||||||
|
from decnet.privdrop import chown_to_invoking_user, chown_tree_to_invoking_user
|
||||||
|
from decnet.telemetry import traced as _traced
|
||||||
|
|
||||||
_LOG_FILE_ENV = "DECNET_LOG_FILE"
|
_LOG_FILE_ENV = "DECNET_LOG_FILE"
|
||||||
_DEFAULT_LOG_FILE = "/var/log/decnet/decnet.log"
|
_DEFAULT_LOG_FILE = "/var/log/decnet/decnet.log"
|
||||||
_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
|
_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
|
||||||
_BACKUP_COUNT = 5
|
_BACKUP_COUNT = 5
|
||||||
|
|
||||||
_handler: logging.handlers.RotatingFileHandler | None = None
|
_handler: InodeAwareRotatingFileHandler | None = None
|
||||||
_logger: logging.Logger | None = None
|
_logger: logging.Logger | None = None
|
||||||
|
|
||||||
|
|
||||||
def _get_logger() -> logging.Logger:
|
@_traced("logging.init_file_handler")
|
||||||
|
def _init_file_handler() -> logging.Logger:
|
||||||
|
"""One-time initialisation of the rotating file handler."""
|
||||||
global _handler, _logger
|
global _handler, _logger
|
||||||
if _logger is not None:
|
|
||||||
return _logger
|
|
||||||
|
|
||||||
log_path = Path(os.environ.get(_LOG_FILE_ENV, _DEFAULT_LOG_FILE))
|
log_path = Path(os.environ.get(_LOG_FILE_ENV, _DEFAULT_LOG_FILE))
|
||||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
# When running under sudo, hand the parent dir back to the invoking user
|
||||||
|
# so a subsequent non-root `decnet api` can also write to it.
|
||||||
|
chown_tree_to_invoking_user(log_path.parent)
|
||||||
|
|
||||||
_handler = logging.handlers.RotatingFileHandler(
|
_handler = InodeAwareRotatingFileHandler(
|
||||||
log_path,
|
log_path,
|
||||||
maxBytes=_MAX_BYTES,
|
maxBytes=_MAX_BYTES,
|
||||||
backupCount=_BACKUP_COUNT,
|
backupCount=_BACKUP_COUNT,
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
|
chown_to_invoking_user(log_path)
|
||||||
_handler.setFormatter(logging.Formatter("%(message)s"))
|
_handler.setFormatter(logging.Formatter("%(message)s"))
|
||||||
|
|
||||||
_logger = logging.getLogger("decnet.syslog")
|
_logger = logging.getLogger("decnet.syslog")
|
||||||
@@ -46,6 +54,12 @@ def _get_logger() -> logging.Logger:
|
|||||||
return _logger
|
return _logger
|
||||||
|
|
||||||
|
|
||||||
|
def _get_logger() -> logging.Logger:
|
||||||
|
if _logger is not None:
|
||||||
|
return _logger
|
||||||
|
return _init_file_handler()
|
||||||
|
|
||||||
|
|
||||||
def write_syslog(line: str) -> None:
|
def write_syslog(line: str) -> None:
|
||||||
"""Write a single RFC 5424 syslog line to the rotating log file."""
|
"""Write a single RFC 5424 syslog line to the rotating log file."""
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ shared utilities for validating and parsing the log_target string.
|
|||||||
|
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
from decnet.telemetry import traced as _traced
|
||||||
|
|
||||||
|
|
||||||
def parse_log_target(log_target: str) -> tuple[str, int]:
|
def parse_log_target(log_target: str) -> tuple[str, int]:
|
||||||
"""
|
"""
|
||||||
@@ -23,6 +25,7 @@ def parse_log_target(log_target: str) -> tuple[str, int]:
|
|||||||
return parts[0], int(parts[1])
|
return parts[0], int(parts[1])
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("logging.probe_log_target")
|
||||||
def probe_log_target(log_target: str, timeout: float = 2.0) -> bool:
|
def probe_log_target(log_target: str, timeout: float = 2.0) -> bool:
|
||||||
"""
|
"""
|
||||||
Return True if the log target is reachable (TCP connect succeeds).
|
Return True if the log target is reachable (TCP connect succeeds).
|
||||||
|
|||||||
60
decnet/logging/inode_aware_handler.py
Normal file
60
decnet/logging/inode_aware_handler.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
"""
|
||||||
|
RotatingFileHandler that detects external deletion or rotation.
|
||||||
|
|
||||||
|
Stdlib ``RotatingFileHandler`` holds an open file descriptor for the
|
||||||
|
lifetime of the handler. If the target file is deleted (``rm``) or
|
||||||
|
rotated out (``logrotate`` without ``copytruncate``), the handler keeps
|
||||||
|
writing to the now-orphaned inode until its own size-based rotation
|
||||||
|
finally triggers — silently losing every line in between.
|
||||||
|
|
||||||
|
Stdlib ``WatchedFileHandler`` solves exactly this problem but doesn't
|
||||||
|
rotate by size. This subclass combines both: before each emit we stat
|
||||||
|
the configured path and compare its inode/device to the currently open
|
||||||
|
file; on mismatch we close and reopen.
|
||||||
|
|
||||||
|
Cheap: one ``os.stat`` per log record. Matches the pattern used by
|
||||||
|
``decnet/collector/worker.py:_reopen_if_needed``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import logging.handlers
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class InodeAwareRotatingFileHandler(logging.handlers.RotatingFileHandler):
|
||||||
|
"""RotatingFileHandler that reopens the target on external rotation/deletion."""
|
||||||
|
|
||||||
|
def _should_reopen(self) -> bool:
|
||||||
|
if self.stream is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
disk_stat = os.stat(self.baseFilename)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return True
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
open_stat = os.fstat(self.stream.fileno())
|
||||||
|
except OSError:
|
||||||
|
return True
|
||||||
|
return (disk_stat.st_ino != open_stat.st_ino
|
||||||
|
or disk_stat.st_dev != open_stat.st_dev)
|
||||||
|
|
||||||
|
def emit(self, record: logging.LogRecord) -> None:
|
||||||
|
if self._should_reopen():
|
||||||
|
try:
|
||||||
|
if self.stream is not None:
|
||||||
|
self.close()
|
||||||
|
except Exception: # nosec B110
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.stream = self._open()
|
||||||
|
except OSError:
|
||||||
|
# A logging handler MUST NOT crash its caller. If we can't
|
||||||
|
# reopen (e.g. file is root-owned after `sudo decnet deploy`
|
||||||
|
# and the current process is non-root), defer to the stdlib
|
||||||
|
# error path, which just prints a traceback to stderr.
|
||||||
|
self.handleError(record)
|
||||||
|
return
|
||||||
|
super().emit(record)
|
||||||
@@ -5,7 +5,7 @@ Produces fully-compliant syslog messages:
|
|||||||
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
|
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
|
||||||
|
|
||||||
Facility: local0 (16)
|
Facility: local0 (16)
|
||||||
PEN for structured data: decnet@55555
|
PEN for structured data: relay@55555
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -16,7 +16,7 @@ from typing import Any
|
|||||||
|
|
||||||
FACILITY_LOCAL0 = 16
|
FACILITY_LOCAL0 = 16
|
||||||
NILVALUE = "-"
|
NILVALUE = "-"
|
||||||
_SD_ID = "decnet@55555"
|
_SD_ID = "relay@55555"
|
||||||
|
|
||||||
SEVERITY_INFO = 6
|
SEVERITY_INFO = 6
|
||||||
SEVERITY_WARNING = 4
|
SEVERITY_WARNING = 4
|
||||||
|
|||||||
@@ -99,6 +99,9 @@ class DeckyConfig(BaseModel):
|
|||||||
mutate_interval: int | None = None # automatic rotation interval in minutes
|
mutate_interval: int | None = None # automatic rotation interval in minutes
|
||||||
last_mutated: float = 0.0 # timestamp of last mutation
|
last_mutated: float = 0.0 # timestamp of last mutation
|
||||||
last_login_attempt: float = 0.0 # timestamp of most recent interaction
|
last_login_attempt: float = 0.0 # timestamp of most recent interaction
|
||||||
|
# SWARM: the SwarmHost.uuid that runs this decky. None in unihost mode
|
||||||
|
# so existing state files deserialize unchanged.
|
||||||
|
host_uuid: str | None = None
|
||||||
|
|
||||||
@field_validator("services")
|
@field_validator("services")
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -14,22 +14,28 @@ from decnet.fleet import all_service_names
|
|||||||
from decnet.composer import write_compose
|
from decnet.composer import write_compose
|
||||||
from decnet.config import DeckyConfig, DecnetConfig
|
from decnet.config import DeckyConfig, DecnetConfig
|
||||||
from decnet.engine import _compose_with_retry
|
from decnet.engine import _compose_with_retry
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.telemetry import traced as _traced
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import anyio
|
import anyio
|
||||||
import asyncio
|
import asyncio
|
||||||
from decnet.web.db.repository import BaseRepository
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
|
||||||
|
log = get_logger("mutator")
|
||||||
console = Console()
|
console = Console()
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("mutator.mutate_decky")
|
||||||
async def mutate_decky(decky_name: str, repo: BaseRepository) -> bool:
|
async def mutate_decky(decky_name: str, repo: BaseRepository) -> bool:
|
||||||
"""
|
"""
|
||||||
Perform an Intra-Archetype Shuffle for a specific decky.
|
Perform an Intra-Archetype Shuffle for a specific decky.
|
||||||
Returns True if mutation succeeded, False otherwise.
|
Returns True if mutation succeeded, False otherwise.
|
||||||
"""
|
"""
|
||||||
|
log.debug("mutate_decky: start decky=%s", decky_name)
|
||||||
state_dict = await repo.get_state("deployment")
|
state_dict = await repo.get_state("deployment")
|
||||||
if state_dict is None:
|
if state_dict is None:
|
||||||
|
log.error("mutate_decky: no active deployment found in database")
|
||||||
console.print("[red]No active deployment found in database.[/]")
|
console.print("[red]No active deployment found in database.[/]")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -73,25 +79,30 @@ async def mutate_decky(decky_name: str, repo: BaseRepository) -> bool:
|
|||||||
# Still writes files for Docker to use
|
# Still writes files for Docker to use
|
||||||
write_compose(config, compose_path)
|
write_compose(config, compose_path)
|
||||||
|
|
||||||
|
log.info("mutation applied decky=%s services=%s", decky_name, ",".join(decky.services))
|
||||||
console.print(f"[cyan]Mutating '{decky_name}' to services: {', '.join(decky.services)}[/]")
|
console.print(f"[cyan]Mutating '{decky_name}' to services: {', '.join(decky.services)}[/]")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Wrap blocking call in thread
|
# Wrap blocking call in thread
|
||||||
await anyio.to_thread.run_sync(_compose_with_retry, "up", "-d", "--remove-orphans", compose_path)
|
await anyio.to_thread.run_sync(_compose_with_retry, "up", "-d", "--remove-orphans", compose_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
log.error("mutation failed decky=%s error=%s", decky_name, e)
|
||||||
console.print(f"[red]Failed to mutate '{decky_name}': {e}[/]")
|
console.print(f"[red]Failed to mutate '{decky_name}': {e}[/]")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("mutator.mutate_all")
|
||||||
async def mutate_all(repo: BaseRepository, force: bool = False) -> None:
|
async def mutate_all(repo: BaseRepository, force: bool = False) -> None:
|
||||||
"""
|
"""
|
||||||
Check all deckies and mutate those that are due.
|
Check all deckies and mutate those that are due.
|
||||||
If force=True, mutates all deckies regardless of schedule.
|
If force=True, mutates all deckies regardless of schedule.
|
||||||
"""
|
"""
|
||||||
|
log.debug("mutate_all: start force=%s", force)
|
||||||
state_dict = await repo.get_state("deployment")
|
state_dict = await repo.get_state("deployment")
|
||||||
if state_dict is None:
|
if state_dict is None:
|
||||||
|
log.error("mutate_all: no active deployment found")
|
||||||
console.print("[red]No active deployment found.[/]")
|
console.print("[red]No active deployment found.[/]")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -116,15 +127,21 @@ async def mutate_all(repo: BaseRepository, force: bool = False) -> None:
|
|||||||
mutated_count += 1
|
mutated_count += 1
|
||||||
|
|
||||||
if mutated_count == 0 and not force:
|
if mutated_count == 0 and not force:
|
||||||
|
log.debug("mutate_all: no deckies due for mutation")
|
||||||
console.print("[dim]No deckies are due for mutation.[/]")
|
console.print("[dim]No deckies are due for mutation.[/]")
|
||||||
|
else:
|
||||||
|
log.info("mutate_all: complete mutated_count=%d", mutated_count)
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("mutator.watch_loop")
|
||||||
async def run_watch_loop(repo: BaseRepository, poll_interval_secs: int = 10) -> None:
|
async def run_watch_loop(repo: BaseRepository, poll_interval_secs: int = 10) -> None:
|
||||||
"""Run an infinite loop checking for deckies that need mutation."""
|
"""Run an infinite loop checking for deckies that need mutation."""
|
||||||
|
log.info("mutator watch loop started poll_interval_secs=%d", poll_interval_secs)
|
||||||
console.print(f"[green]DECNET Mutator Watcher started (polling every {poll_interval_secs}s).[/]")
|
console.print(f"[green]DECNET Mutator Watcher started (polling every {poll_interval_secs}s).[/]")
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
await mutate_all(force=False, repo=repo)
|
await mutate_all(force=False, repo=repo)
|
||||||
await asyncio.sleep(poll_interval_secs)
|
await asyncio.sleep(poll_interval_secs)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
log.info("mutator watch loop stopped")
|
||||||
console.print("\n[dim]Mutator watcher stopped.[/]")
|
console.print("\n[dim]Mutator watcher stopped.[/]")
|
||||||
|
|||||||
@@ -126,22 +126,57 @@ def allocate_ips(
|
|||||||
# Docker MACVLAN network
|
# Docker MACVLAN network
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def create_macvlan_network(
|
def _ensure_network(
|
||||||
client: docker.DockerClient,
|
client: docker.DockerClient,
|
||||||
|
*,
|
||||||
|
driver: str,
|
||||||
interface: str,
|
interface: str,
|
||||||
subnet: str,
|
subnet: str,
|
||||||
gateway: str,
|
gateway: str,
|
||||||
ip_range: str,
|
ip_range: str,
|
||||||
|
extra_options: dict | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Create the MACVLAN Docker network. No-op if it already exists."""
|
"""Create the decnet docker network with ``driver``, replacing any
|
||||||
existing = [n.name for n in client.networks.list()]
|
existing network of the same name that was built with a different driver.
|
||||||
if MACVLAN_NETWORK_NAME in existing:
|
|
||||||
return
|
Why the replace-on-driver-mismatch: macvlan and ipvlan slaves can't
|
||||||
|
coexist on the same parent interface. If an earlier run left behind a
|
||||||
|
macvlan-driver network and we're now asked for ipvlan (or vice versa),
|
||||||
|
short-circuiting on name alone leaves Docker attaching new containers
|
||||||
|
to the old driver and the host NIC ends up EBUSY on the next port
|
||||||
|
create. So: when driver disagrees, disconnect everything and DROP it.
|
||||||
|
"""
|
||||||
|
options = {"parent": interface}
|
||||||
|
if extra_options:
|
||||||
|
options.update(extra_options)
|
||||||
|
|
||||||
|
for net in client.networks.list(names=[MACVLAN_NETWORK_NAME]):
|
||||||
|
if net.attrs.get("Driver") == driver:
|
||||||
|
# Same driver — but if the IPAM pool drifted (different subnet,
|
||||||
|
# gateway, or ip-range than this deploy asks for), reusing it
|
||||||
|
# hands out addresses from the old pool and we race the real LAN.
|
||||||
|
# Compare and rebuild on mismatch.
|
||||||
|
pools = (net.attrs.get("IPAM") or {}).get("Config") or []
|
||||||
|
cur = pools[0] if pools else {}
|
||||||
|
if (
|
||||||
|
cur.get("Subnet") == subnet
|
||||||
|
and cur.get("Gateway") == gateway
|
||||||
|
and cur.get("IPRange") == ip_range
|
||||||
|
):
|
||||||
|
return # right driver AND matching pool, leave it alone
|
||||||
|
# Driver mismatch OR IPAM drift — tear it down. Disconnect any live
|
||||||
|
# containers first so `remove()` doesn't refuse with ErrNetworkInUse.
|
||||||
|
for cid in (net.attrs.get("Containers") or {}):
|
||||||
|
try:
|
||||||
|
net.disconnect(cid, force=True)
|
||||||
|
except docker.errors.APIError:
|
||||||
|
pass
|
||||||
|
net.remove()
|
||||||
|
|
||||||
client.networks.create(
|
client.networks.create(
|
||||||
name=MACVLAN_NETWORK_NAME,
|
name=MACVLAN_NETWORK_NAME,
|
||||||
driver="macvlan",
|
driver=driver,
|
||||||
options={"parent": interface},
|
options=options,
|
||||||
ipam=docker.types.IPAMConfig(
|
ipam=docker.types.IPAMConfig(
|
||||||
driver="default",
|
driver="default",
|
||||||
pool_configs=[
|
pool_configs=[
|
||||||
@@ -155,6 +190,21 @@ def create_macvlan_network(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_macvlan_network(
|
||||||
|
client: docker.DockerClient,
|
||||||
|
interface: str,
|
||||||
|
subnet: str,
|
||||||
|
gateway: str,
|
||||||
|
ip_range: str,
|
||||||
|
) -> None:
|
||||||
|
"""Create the MACVLAN Docker network, replacing an ipvlan-driver one of
|
||||||
|
the same name if necessary (parent-NIC can't host both drivers)."""
|
||||||
|
_ensure_network(
|
||||||
|
client, driver="macvlan", interface=interface,
|
||||||
|
subnet=subnet, gateway=gateway, ip_range=ip_range,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_ipvlan_network(
|
def create_ipvlan_network(
|
||||||
client: docker.DockerClient,
|
client: docker.DockerClient,
|
||||||
interface: str,
|
interface: str,
|
||||||
@@ -162,25 +212,12 @@ def create_ipvlan_network(
|
|||||||
gateway: str,
|
gateway: str,
|
||||||
ip_range: str,
|
ip_range: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Create an IPvlan L2 Docker network. No-op if it already exists."""
|
"""Create an IPvlan L2 Docker network, replacing a macvlan-driver one of
|
||||||
existing = [n.name for n in client.networks.list()]
|
the same name if necessary (parent-NIC can't host both drivers)."""
|
||||||
if MACVLAN_NETWORK_NAME in existing:
|
_ensure_network(
|
||||||
return
|
client, driver="ipvlan", interface=interface,
|
||||||
|
subnet=subnet, gateway=gateway, ip_range=ip_range,
|
||||||
client.networks.create(
|
extra_options={"ipvlan_mode": "l2"},
|
||||||
name=MACVLAN_NETWORK_NAME,
|
|
||||||
driver="ipvlan",
|
|
||||||
options={"parent": interface, "ipvlan_mode": "l2"},
|
|
||||||
ipam=docker.types.IPAMConfig(
|
|
||||||
driver="default",
|
|
||||||
pool_configs=[
|
|
||||||
docker.types.IPAMPool(
|
|
||||||
subnet=subnet,
|
|
||||||
gateway=gateway,
|
|
||||||
iprange=ip_range,
|
|
||||||
)
|
|
||||||
],
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -204,10 +241,14 @@ def _require_root() -> None:
|
|||||||
def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
|
def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
|
||||||
"""
|
"""
|
||||||
Create a macvlan interface on the host so the deployer can reach deckies.
|
Create a macvlan interface on the host so the deployer can reach deckies.
|
||||||
Idempotent — skips steps that are already done.
|
Idempotent — skips steps that are already done. Drops a stale ipvlan
|
||||||
|
host-helper first: the two drivers can share a parent NIC on paper but
|
||||||
|
leaving the opposite helper in place is just cruft after a driver swap.
|
||||||
"""
|
"""
|
||||||
_require_root()
|
_require_root()
|
||||||
|
|
||||||
|
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
||||||
|
|
||||||
# Check if interface already exists
|
# Check if interface already exists
|
||||||
result = _run(["ip", "link", "show", HOST_MACVLAN_IFACE], check=False)
|
result = _run(["ip", "link", "show", HOST_MACVLAN_IFACE], check=False)
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
@@ -227,10 +268,14 @@ def teardown_host_macvlan(decky_ip_range: str) -> None:
|
|||||||
def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str) -> None:
|
def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str) -> None:
|
||||||
"""
|
"""
|
||||||
Create an IPvlan interface on the host so the deployer can reach deckies.
|
Create an IPvlan interface on the host so the deployer can reach deckies.
|
||||||
Idempotent — skips steps that are already done.
|
Idempotent — skips steps that are already done. Drops a stale macvlan
|
||||||
|
host-helper first so a prior macvlan deploy doesn't leave its slave
|
||||||
|
dangling on the parent NIC after the driver swap.
|
||||||
"""
|
"""
|
||||||
_require_root()
|
_require_root()
|
||||||
|
|
||||||
|
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
||||||
|
|
||||||
result = _run(["ip", "link", "show", HOST_IPVLAN_IFACE], check=False)
|
result = _run(["ip", "link", "show", HOST_IPVLAN_IFACE], check=False)
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
_run(["ip", "link", "add", HOST_IPVLAN_IFACE, "link", interface, "type", "ipvlan", "mode", "l2"])
|
_run(["ip", "link", "add", HOST_IPVLAN_IFACE, "link", interface, "type", "ipvlan", "mode", "l2"])
|
||||||
|
|||||||
67
decnet/privdrop.py
Normal file
67
decnet/privdrop.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
"""
|
||||||
|
Helpers for dropping root ownership on files created during privileged
|
||||||
|
operations (e.g. `sudo decnet deploy` needs root for MACVLAN, but its log
|
||||||
|
files should be owned by the invoking user so a subsequent non-root
|
||||||
|
`decnet api` can append to them).
|
||||||
|
|
||||||
|
When sudo invokes a process, it sets SUDO_UID / SUDO_GID in the
|
||||||
|
environment to the original user's IDs. We use those to chown files
|
||||||
|
back after creation.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
def _sudo_ids() -> Optional[tuple[int, int]]:
|
||||||
|
"""Return (uid, gid) of the sudo-invoking user, or None when the
|
||||||
|
process was not launched via sudo / the env vars are missing."""
|
||||||
|
raw_uid = os.environ.get("SUDO_UID")
|
||||||
|
raw_gid = os.environ.get("SUDO_GID")
|
||||||
|
if not raw_uid or not raw_gid:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return int(raw_uid), int(raw_gid)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def chown_to_invoking_user(path: str | os.PathLike[str]) -> None:
|
||||||
|
"""Best-effort chown of *path* to the sudo-invoking user.
|
||||||
|
|
||||||
|
No-op when:
|
||||||
|
* not running as root (nothing to drop),
|
||||||
|
* not launched via sudo (no SUDO_UID/SUDO_GID),
|
||||||
|
* the path does not exist,
|
||||||
|
* chown fails (logged-only — never raises).
|
||||||
|
"""
|
||||||
|
if os.geteuid() != 0:
|
||||||
|
return
|
||||||
|
ids = _sudo_ids()
|
||||||
|
if ids is None:
|
||||||
|
return
|
||||||
|
uid, gid = ids
|
||||||
|
p = Path(path)
|
||||||
|
if not p.exists():
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
os.chown(p, uid, gid)
|
||||||
|
except OSError:
|
||||||
|
# Best-effort; a failed chown is not fatal to logging.
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def chown_tree_to_invoking_user(root: str | os.PathLike[str]) -> None:
|
||||||
|
"""Apply :func:`chown_to_invoking_user` to *root* and every file/dir
|
||||||
|
beneath it. Used for parent directories that we just created with
|
||||||
|
``mkdir(parents=True)`` as root."""
|
||||||
|
if os.geteuid() != 0 or _sudo_ids() is None:
|
||||||
|
return
|
||||||
|
root_path = Path(root)
|
||||||
|
if not root_path.exists():
|
||||||
|
return
|
||||||
|
chown_to_invoking_user(root_path)
|
||||||
|
for entry in root_path.rglob("*"):
|
||||||
|
chown_to_invoking_user(entry)
|
||||||
13
decnet/prober/__init__.py
Normal file
13
decnet/prober/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
"""
|
||||||
|
DECNET-PROBER — standalone active network probing service.
|
||||||
|
|
||||||
|
Runs as a detached host-level process (no container). Sends crafted TLS
|
||||||
|
probes to discover C2 frameworks and other attacker infrastructure via
|
||||||
|
JARM fingerprinting. Results are written as RFC 5424 syslog + JSON to the
|
||||||
|
same log file the collector uses, so the existing ingestion pipeline picks
|
||||||
|
them up automatically.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from decnet.prober.worker import prober_worker
|
||||||
|
|
||||||
|
__all__ = ["prober_worker"]
|
||||||
252
decnet/prober/hassh.py
Normal file
252
decnet/prober/hassh.py
Normal file
@@ -0,0 +1,252 @@
|
|||||||
|
"""
|
||||||
|
HASSHServer — SSH server fingerprinting via KEX_INIT algorithm ordering.
|
||||||
|
|
||||||
|
Connects to an SSH server, completes the version exchange, captures the
|
||||||
|
server's SSH_MSG_KEXINIT message, and hashes the server-to-client algorithm
|
||||||
|
fields (kex, encryption, MAC, compression) into a 32-character MD5 digest.
|
||||||
|
|
||||||
|
This is the *server* variant of HASSH (HASSHServer). It fingerprints what
|
||||||
|
the server *offers*, which identifies the SSH implementation (OpenSSH,
|
||||||
|
Paramiko, libssh, Cobalt Strike SSH, etc.).
|
||||||
|
|
||||||
|
Stdlib only (socket, struct, hashlib) plus decnet.telemetry for tracing (zero-cost when disabled).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import socket
|
||||||
|
import struct
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from decnet.telemetry import traced as _traced
|
||||||
|
|
||||||
|
# SSH protocol constants
|
||||||
|
_SSH_MSG_KEXINIT = 20
|
||||||
|
_KEX_INIT_COOKIE_LEN = 16
|
||||||
|
_KEX_INIT_NAME_LISTS = 10 # 10 name-list fields in KEX_INIT
|
||||||
|
|
||||||
|
# Blend in as a normal OpenSSH client
|
||||||
|
_CLIENT_BANNER = b"SSH-2.0-OpenSSH_9.6\r\n"
|
||||||
|
|
||||||
|
# Max bytes to read for server banner
|
||||||
|
_MAX_BANNER_LEN = 256
|
||||||
|
|
||||||
|
# Max bytes for a single SSH packet (KEX_INIT is typically < 2KB)
|
||||||
|
_MAX_PACKET_LEN = 35000
|
||||||
|
|
||||||
|
|
||||||
|
# ─── SSH connection + KEX_INIT capture ──────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("prober.hassh_ssh_connect")
|
||||||
|
def _ssh_connect(
|
||||||
|
host: str,
|
||||||
|
port: int,
|
||||||
|
timeout: float,
|
||||||
|
) -> tuple[str, bytes] | None:
|
||||||
|
"""
|
||||||
|
TCP connect, exchange version strings, read server's KEX_INIT.
|
||||||
|
|
||||||
|
Returns (server_banner, kex_init_payload) or None on failure.
|
||||||
|
The kex_init_payload starts at the SSH_MSG_KEXINIT type byte.
|
||||||
|
"""
|
||||||
|
sock = None
|
||||||
|
try:
|
||||||
|
sock = socket.create_connection((host, port), timeout=timeout)
|
||||||
|
sock.settimeout(timeout)
|
||||||
|
|
||||||
|
# 1. Read server banner (line ending \r\n or \n)
|
||||||
|
banner = _read_banner(sock)
|
||||||
|
if banner is None or not banner.startswith("SSH-"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 2. Send our client version string
|
||||||
|
sock.sendall(_CLIENT_BANNER)
|
||||||
|
|
||||||
|
# 3. Read the server's first binary packet (should be KEX_INIT)
|
||||||
|
payload = _read_ssh_packet(sock)
|
||||||
|
if payload is None or len(payload) < 1:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if payload[0] != _SSH_MSG_KEXINIT:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return (banner, payload)
|
||||||
|
|
||||||
|
except (OSError, socket.timeout, TimeoutError, ConnectionError):
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
if sock is not None:
|
||||||
|
try:
|
||||||
|
sock.close()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _read_banner(sock: socket.socket) -> str | None:
|
||||||
|
"""Read the SSH version banner line from the socket."""
|
||||||
|
buf = b""
|
||||||
|
while len(buf) < _MAX_BANNER_LEN:
|
||||||
|
try:
|
||||||
|
byte = sock.recv(1)
|
||||||
|
except (OSError, socket.timeout, TimeoutError):
|
||||||
|
return None
|
||||||
|
if not byte:
|
||||||
|
return None
|
||||||
|
buf += byte
|
||||||
|
if buf.endswith(b"\n"):
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
return buf.decode("utf-8", errors="replace").rstrip("\r\n")
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _read_ssh_packet(sock: socket.socket) -> bytes | None:
|
||||||
|
"""
|
||||||
|
Read a single SSH binary packet and return its payload.
|
||||||
|
|
||||||
|
SSH binary packet format:
|
||||||
|
uint32 packet_length (not including itself or MAC)
|
||||||
|
byte padding_length
|
||||||
|
byte[] payload (packet_length - padding_length - 1)
|
||||||
|
byte[] padding
|
||||||
|
"""
|
||||||
|
header = _recv_exact(sock, 4)
|
||||||
|
if header is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
packet_length = struct.unpack("!I", header)[0]
|
||||||
|
if packet_length < 2 or packet_length > _MAX_PACKET_LEN:
|
||||||
|
return None
|
||||||
|
|
||||||
|
rest = _recv_exact(sock, packet_length)
|
||||||
|
if rest is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
padding_length = rest[0]
|
||||||
|
payload_length = packet_length - padding_length - 1
|
||||||
|
if payload_length < 1 or payload_length > len(rest) - 1:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return rest[1 : 1 + payload_length]
|
||||||
|
|
||||||
|
|
||||||
|
def _recv_exact(sock: socket.socket, n: int) -> bytes | None:
|
||||||
|
"""Read exactly n bytes from socket, or None on failure."""
|
||||||
|
buf = b""
|
||||||
|
while len(buf) < n:
|
||||||
|
try:
|
||||||
|
chunk = sock.recv(n - len(buf))
|
||||||
|
except (OSError, socket.timeout, TimeoutError):
|
||||||
|
return None
|
||||||
|
if not chunk:
|
||||||
|
return None
|
||||||
|
buf += chunk
|
||||||
|
return buf
|
||||||
|
|
||||||
|
|
||||||
|
# ─── KEX_INIT parsing ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _parse_kex_init(payload: bytes) -> dict[str, str] | None:
|
||||||
|
"""
|
||||||
|
Parse SSH_MSG_KEXINIT payload and extract the 10 name-list fields.
|
||||||
|
|
||||||
|
Payload layout:
|
||||||
|
byte SSH_MSG_KEXINIT (20)
|
||||||
|
byte[16] cookie
|
||||||
|
10 × name-list:
|
||||||
|
uint32 length
|
||||||
|
byte[] utf-8 string (comma-separated algorithm names)
|
||||||
|
bool first_kex_packet_follows
|
||||||
|
uint32 reserved
|
||||||
|
|
||||||
|
Returns dict with keys: kex_algorithms, server_host_key_algorithms,
|
||||||
|
encryption_client_to_server, encryption_server_to_client,
|
||||||
|
mac_client_to_server, mac_server_to_client,
|
||||||
|
compression_client_to_server, compression_server_to_client,
|
||||||
|
languages_client_to_server, languages_server_to_client.
|
||||||
|
"""
|
||||||
|
if len(payload) < 1 + _KEX_INIT_COOKIE_LEN + 4:
|
||||||
|
return None
|
||||||
|
|
||||||
|
offset = 1 + _KEX_INIT_COOKIE_LEN # skip type byte + cookie
|
||||||
|
|
||||||
|
field_names = [
|
||||||
|
"kex_algorithms",
|
||||||
|
"server_host_key_algorithms",
|
||||||
|
"encryption_client_to_server",
|
||||||
|
"encryption_server_to_client",
|
||||||
|
"mac_client_to_server",
|
||||||
|
"mac_server_to_client",
|
||||||
|
"compression_client_to_server",
|
||||||
|
"compression_server_to_client",
|
||||||
|
"languages_client_to_server",
|
||||||
|
"languages_server_to_client",
|
||||||
|
]
|
||||||
|
|
||||||
|
fields: dict[str, str] = {}
|
||||||
|
for name in field_names:
|
||||||
|
if offset + 4 > len(payload):
|
||||||
|
return None
|
||||||
|
length = struct.unpack("!I", payload[offset : offset + 4])[0]
|
||||||
|
offset += 4
|
||||||
|
if offset + length > len(payload):
|
||||||
|
return None
|
||||||
|
fields[name] = payload[offset : offset + length].decode(
|
||||||
|
"utf-8", errors="replace"
|
||||||
|
)
|
||||||
|
offset += length
|
||||||
|
|
||||||
|
return fields
|
||||||
|
|
||||||
|
|
||||||
|
# ─── HASSH computation ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _compute_hassh(kex: str, enc: str, mac: str, comp: str) -> str:
|
||||||
|
"""
|
||||||
|
Compute HASSHServer hash: MD5 of "kex;enc_s2c;mac_s2c;comp_s2c".
|
||||||
|
|
||||||
|
Returns 32-character lowercase hex digest.
|
||||||
|
"""
|
||||||
|
raw = f"{kex};{enc};{mac};{comp}"
|
||||||
|
return hashlib.md5(raw.encode("utf-8"), usedforsecurity=False).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Public API ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("prober.hassh_server")
|
||||||
|
def hassh_server(
|
||||||
|
host: str,
|
||||||
|
port: int,
|
||||||
|
timeout: float = 5.0,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""
|
||||||
|
Connect to an SSH server and compute its HASSHServer fingerprint.
|
||||||
|
|
||||||
|
Returns a dict with the hash, banner, and raw algorithm fields,
|
||||||
|
or None if the host is not running an SSH server on the given port.
|
||||||
|
"""
|
||||||
|
result = _ssh_connect(host, port, timeout)
|
||||||
|
if result is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
banner, payload = result
|
||||||
|
fields = _parse_kex_init(payload)
|
||||||
|
if fields is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
kex = fields["kex_algorithms"]
|
||||||
|
enc = fields["encryption_server_to_client"]
|
||||||
|
mac = fields["mac_server_to_client"]
|
||||||
|
comp = fields["compression_server_to_client"]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"hassh_server": _compute_hassh(kex, enc, mac, comp),
|
||||||
|
"banner": banner,
|
||||||
|
"kex_algorithms": kex,
|
||||||
|
"encryption_s2c": enc,
|
||||||
|
"mac_s2c": mac,
|
||||||
|
"compression_s2c": comp,
|
||||||
|
}
|
||||||
506
decnet/prober/jarm.py
Normal file
506
decnet/prober/jarm.py
Normal file
@@ -0,0 +1,506 @@
|
|||||||
|
"""
|
||||||
|
JARM TLS fingerprinting — pure stdlib implementation.
|
||||||
|
|
||||||
|
JARM sends 10 crafted TLS ClientHello packets to a target, each varying
|
||||||
|
TLS version, cipher suite order, extensions, and ALPN values. The
|
||||||
|
ServerHello responses are parsed and hashed to produce a 62-character
|
||||||
|
fingerprint that identifies the TLS server implementation.
|
||||||
|
|
||||||
|
Reference: https://github.com/salesforce/jarm
|
||||||
|
|
||||||
|
Only DECNET import is decnet.telemetry for tracing (zero-cost when disabled).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import socket
|
||||||
|
import struct
|
||||||
|
import time
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from decnet.telemetry import traced as _traced
|
||||||
|
|
||||||
|
# ─── Constants ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
JARM_EMPTY_HASH = "0" * 62
|
||||||
|
|
||||||
|
_INTER_PROBE_DELAY = 0.1 # seconds between probes to avoid IDS triggers
|
||||||
|
|
||||||
|
# TLS version bytes
|
||||||
|
_TLS_1_0 = b"\x03\x01"
|
||||||
|
_TLS_1_1 = b"\x03\x02"
|
||||||
|
_TLS_1_2 = b"\x03\x03"
|
||||||
|
_TLS_1_3 = b"\x03\x03" # TLS 1.3 uses 0x0303 in record layer
|
||||||
|
|
||||||
|
# TLS record types
|
||||||
|
_CONTENT_HANDSHAKE = 0x16
|
||||||
|
_HANDSHAKE_CLIENT_HELLO = 0x01
|
||||||
|
_HANDSHAKE_SERVER_HELLO = 0x02
|
||||||
|
|
||||||
|
# Extension types
|
||||||
|
_EXT_SERVER_NAME = 0x0000
|
||||||
|
_EXT_EC_POINT_FORMATS = 0x000B
|
||||||
|
_EXT_SUPPORTED_GROUPS = 0x000A
|
||||||
|
_EXT_SESSION_TICKET = 0x0023
|
||||||
|
_EXT_ENCRYPT_THEN_MAC = 0x0016
|
||||||
|
_EXT_EXTENDED_MASTER_SECRET = 0x0017
|
||||||
|
_EXT_SIGNATURE_ALGORITHMS = 0x000D
|
||||||
|
_EXT_SUPPORTED_VERSIONS = 0x002B
|
||||||
|
_EXT_PSK_KEY_EXCHANGE_MODES = 0x002D
|
||||||
|
_EXT_KEY_SHARE = 0x0033
|
||||||
|
_EXT_ALPN = 0x0010
|
||||||
|
_EXT_PADDING = 0x0015
|
||||||
|
|
||||||
|
# ─── Cipher suite lists per JARM spec ────────────────────────────────────────
|
||||||
|
|
||||||
|
# Forward cipher order (standard)
|
||||||
|
_CIPHERS_FORWARD = [
|
||||||
|
0x0016, 0x0033, 0x0067, 0xC09E, 0xC0A2, 0x009E, 0x0039, 0x006B,
|
||||||
|
0xC09F, 0xC0A3, 0x009F, 0x0045, 0x00BE, 0x0088, 0x00C4, 0x009A,
|
||||||
|
0xC008, 0xC009, 0xC023, 0xC0AC, 0xC0AE, 0xC02B, 0xC00A, 0xC024,
|
||||||
|
0xC0AD, 0xC0AF, 0xC02C, 0xC072, 0xC073, 0xCCA8, 0x1301, 0x1302,
|
||||||
|
0x1303, 0xC013, 0xC014, 0xC02F, 0x009C, 0xC02E, 0x002F, 0x0035,
|
||||||
|
0x000A, 0x0005, 0x0004,
|
||||||
|
]
|
||||||
|
|
||||||
|
# Reverse cipher order
|
||||||
|
_CIPHERS_REVERSE = list(reversed(_CIPHERS_FORWARD))
|
||||||
|
|
||||||
|
# TLS 1.3-only ciphers
|
||||||
|
_CIPHERS_TLS13 = [0x1301, 0x1302, 0x1303]
|
||||||
|
|
||||||
|
# Middle-out cipher order (interleaved from center)
|
||||||
|
def _middle_out(lst: list[int]) -> list[int]:
|
||||||
|
result: list[int] = []
|
||||||
|
mid = len(lst) // 2
|
||||||
|
for i in range(mid + 1):
|
||||||
|
if mid + i < len(lst):
|
||||||
|
result.append(lst[mid + i])
|
||||||
|
if mid - i >= 0 and mid - i != mid + i:
|
||||||
|
result.append(lst[mid - i])
|
||||||
|
return result
|
||||||
|
|
||||||
|
_CIPHERS_MIDDLE_OUT = _middle_out(_CIPHERS_FORWARD)
|
||||||
|
|
||||||
|
# Rare/uncommon extensions cipher list
|
||||||
|
_CIPHERS_RARE = [
|
||||||
|
0x0016, 0x0033, 0xC011, 0xC012, 0x0067, 0xC09E, 0xC0A2, 0x009E,
|
||||||
|
0x0039, 0x006B, 0xC09F, 0xC0A3, 0x009F, 0x0045, 0x00BE, 0x0088,
|
||||||
|
0x00C4, 0x009A, 0xC008, 0xC009, 0xC023, 0xC0AC, 0xC0AE, 0xC02B,
|
||||||
|
0xC00A, 0xC024, 0xC0AD, 0xC0AF, 0xC02C, 0xC072, 0xC073, 0xCCA8,
|
||||||
|
0x1301, 0x1302, 0x1303, 0xC013, 0xC014, 0xC02F, 0x009C, 0xC02E,
|
||||||
|
0x002F, 0x0035, 0x000A, 0x0005, 0x0004,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Probe definitions ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# Each probe: (tls_version, cipher_list, tls13_support, alpn, extensions_style)
|
||||||
|
# tls_version: record-layer version bytes
|
||||||
|
# cipher_list: which cipher suite ordering to use
|
||||||
|
# tls13_support: whether to include TLS 1.3 extensions (supported_versions, key_share, psk)
|
||||||
|
# alpn: ALPN protocol string or None
|
||||||
|
# extensions_style: "standard", "rare", or "no_extensions"
|
||||||
|
|
||||||
|
_PROBE_CONFIGS: list[dict[str, Any]] = [
|
||||||
|
# 0: TLS 1.2 forward
|
||||||
|
{"version": _TLS_1_2, "ciphers": _CIPHERS_FORWARD, "tls13": False, "alpn": None, "style": "standard"},
|
||||||
|
# 1: TLS 1.2 reverse
|
||||||
|
{"version": _TLS_1_2, "ciphers": _CIPHERS_REVERSE, "tls13": False, "alpn": None, "style": "standard"},
|
||||||
|
# 2: TLS 1.1 forward
|
||||||
|
{"version": _TLS_1_1, "ciphers": _CIPHERS_FORWARD, "tls13": False, "alpn": None, "style": "standard"},
|
||||||
|
# 3: TLS 1.3 forward
|
||||||
|
{"version": _TLS_1_2, "ciphers": _CIPHERS_FORWARD, "tls13": True, "alpn": "h2", "style": "standard"},
|
||||||
|
# 4: TLS 1.3 reverse
|
||||||
|
{"version": _TLS_1_2, "ciphers": _CIPHERS_REVERSE, "tls13": True, "alpn": "h2", "style": "standard"},
|
||||||
|
# 5: TLS 1.3 invalid (advertise 1.3 support but no key_share)
|
||||||
|
{"version": _TLS_1_2, "ciphers": _CIPHERS_FORWARD, "tls13": "no_key_share", "alpn": None, "style": "standard"},
|
||||||
|
# 6: TLS 1.3 middle-out
|
||||||
|
{"version": _TLS_1_2, "ciphers": _CIPHERS_MIDDLE_OUT, "tls13": True, "alpn": None, "style": "standard"},
|
||||||
|
# 7: TLS 1.0 forward
|
||||||
|
{"version": _TLS_1_0, "ciphers": _CIPHERS_FORWARD, "tls13": False, "alpn": None, "style": "standard"},
|
||||||
|
# 8: TLS 1.2 middle-out
|
||||||
|
{"version": _TLS_1_2, "ciphers": _CIPHERS_MIDDLE_OUT, "tls13": False, "alpn": None, "style": "standard"},
|
||||||
|
# 9: TLS 1.2 with rare extensions
|
||||||
|
{"version": _TLS_1_2, "ciphers": _CIPHERS_RARE, "tls13": False, "alpn": "http/1.1", "style": "rare"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Extension builders ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _ext(ext_type: int, data: bytes) -> bytes:
|
||||||
|
return struct.pack("!HH", ext_type, len(data)) + data
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_sni(host: str) -> bytes:
|
||||||
|
host_bytes = host.encode("ascii")
|
||||||
|
# ServerNameList: length(2) + ServerName: type(1) + length(2) + name
|
||||||
|
sni_data = struct.pack("!HBH", len(host_bytes) + 3, 0, len(host_bytes)) + host_bytes
|
||||||
|
return _ext(_EXT_SERVER_NAME, sni_data)
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_supported_groups() -> bytes:
|
||||||
|
groups = [0x0017, 0x0018, 0x0019, 0x001D, 0x0100, 0x0101] # secp256r1, secp384r1, secp521r1, x25519, ffdhe2048, ffdhe3072
|
||||||
|
data = struct.pack("!H", len(groups) * 2) + b"".join(struct.pack("!H", g) for g in groups)
|
||||||
|
return _ext(_EXT_SUPPORTED_GROUPS, data)
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_ec_point_formats() -> bytes:
|
||||||
|
formats = b"\x00" # uncompressed only
|
||||||
|
return _ext(_EXT_EC_POINT_FORMATS, struct.pack("B", len(formats)) + formats)
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_signature_algorithms() -> bytes:
|
||||||
|
algos = [
|
||||||
|
0x0401, 0x0501, 0x0601, # RSA PKCS1 SHA256/384/512
|
||||||
|
0x0201, # RSA PKCS1 SHA1
|
||||||
|
0x0403, 0x0503, 0x0603, # ECDSA SHA256/384/512
|
||||||
|
0x0203, # ECDSA SHA1
|
||||||
|
0x0804, 0x0805, 0x0806, # RSA-PSS SHA256/384/512
|
||||||
|
]
|
||||||
|
data = struct.pack("!H", len(algos) * 2) + b"".join(struct.pack("!H", a) for a in algos)
|
||||||
|
return _ext(_EXT_SIGNATURE_ALGORITHMS, data)
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_supported_versions_13() -> bytes:
|
||||||
|
versions = [0x0304, 0x0303] # TLS 1.3, 1.2
|
||||||
|
data = struct.pack("B", len(versions) * 2) + b"".join(struct.pack("!H", v) for v in versions)
|
||||||
|
return _ext(_EXT_SUPPORTED_VERSIONS, data)
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_psk_key_exchange_modes() -> bytes:
|
||||||
|
return _ext(_EXT_PSK_KEY_EXCHANGE_MODES, b"\x01\x01") # psk_dhe_ke
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_key_share() -> bytes:
|
||||||
|
# x25519 key share with 32 random-looking bytes
|
||||||
|
key_data = b"\x00" * 32
|
||||||
|
entry = struct.pack("!HH", 0x001D, 32) + key_data # x25519 group
|
||||||
|
data = struct.pack("!H", len(entry)) + entry
|
||||||
|
return _ext(_EXT_KEY_SHARE, data)
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_alpn(protocol: str) -> bytes:
|
||||||
|
proto_bytes = protocol.encode("ascii")
|
||||||
|
proto_entry = struct.pack("B", len(proto_bytes)) + proto_bytes
|
||||||
|
data = struct.pack("!H", len(proto_entry)) + proto_entry
|
||||||
|
return _ext(_EXT_ALPN, data)
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_session_ticket() -> bytes:
|
||||||
|
return _ext(_EXT_SESSION_TICKET, b"")
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_encrypt_then_mac() -> bytes:
|
||||||
|
return _ext(_EXT_ENCRYPT_THEN_MAC, b"")
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_extended_master_secret() -> bytes:
|
||||||
|
return _ext(_EXT_EXTENDED_MASTER_SECRET, b"")
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_padding(target_length: int, current_length: int) -> bytes:
|
||||||
|
pad_needed = target_length - current_length - 4 # 4 bytes for ext type + length
|
||||||
|
if pad_needed < 0:
|
||||||
|
return b""
|
||||||
|
return _ext(_EXT_PADDING, b"\x00" * pad_needed)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── ClientHello builder ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _build_client_hello(probe_index: int, host: str = "localhost") -> bytes:
|
||||||
|
"""
|
||||||
|
Construct one of 10 JARM-specified ClientHello packets.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
probe_index: 0-9, selects the probe configuration
|
||||||
|
host: target hostname for SNI extension
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete TLS record bytes ready to send on the wire.
|
||||||
|
"""
|
||||||
|
cfg = _PROBE_CONFIGS[probe_index]
|
||||||
|
version: bytes = cfg["version"]
|
||||||
|
ciphers: list[int] = cfg["ciphers"]
|
||||||
|
tls13 = cfg["tls13"]
|
||||||
|
alpn: str | None = cfg["alpn"]
|
||||||
|
|
||||||
|
# Random (32 bytes)
|
||||||
|
random_bytes = b"\x00" * 32
|
||||||
|
|
||||||
|
# Session ID (32 bytes, all zeros)
|
||||||
|
session_id = b"\x00" * 32
|
||||||
|
|
||||||
|
# Cipher suites
|
||||||
|
cipher_bytes = b"".join(struct.pack("!H", c) for c in ciphers)
|
||||||
|
cipher_data = struct.pack("!H", len(cipher_bytes)) + cipher_bytes
|
||||||
|
|
||||||
|
# Compression methods (null only)
|
||||||
|
compression = b"\x01\x00"
|
||||||
|
|
||||||
|
# Extensions
|
||||||
|
extensions = b""
|
||||||
|
extensions += _ext_sni(host)
|
||||||
|
extensions += _ext_supported_groups()
|
||||||
|
extensions += _ext_ec_point_formats()
|
||||||
|
extensions += _ext_session_ticket()
|
||||||
|
extensions += _ext_encrypt_then_mac()
|
||||||
|
extensions += _ext_extended_master_secret()
|
||||||
|
extensions += _ext_signature_algorithms()
|
||||||
|
|
||||||
|
if tls13 == True: # noqa: E712
|
||||||
|
extensions += _ext_supported_versions_13()
|
||||||
|
extensions += _ext_psk_key_exchange_modes()
|
||||||
|
extensions += _ext_key_share()
|
||||||
|
elif tls13 == "no_key_share":
|
||||||
|
extensions += _ext_supported_versions_13()
|
||||||
|
extensions += _ext_psk_key_exchange_modes()
|
||||||
|
# Intentionally omit key_share
|
||||||
|
|
||||||
|
if alpn:
|
||||||
|
extensions += _ext_alpn(alpn)
|
||||||
|
|
||||||
|
ext_data = struct.pack("!H", len(extensions)) + extensions
|
||||||
|
|
||||||
|
# ClientHello body
|
||||||
|
body = (
|
||||||
|
version # client_version (2)
|
||||||
|
+ random_bytes # random (32)
|
||||||
|
+ struct.pack("B", len(session_id)) + session_id # session_id
|
||||||
|
+ cipher_data # cipher_suites
|
||||||
|
+ compression # compression_methods
|
||||||
|
+ ext_data # extensions
|
||||||
|
)
|
||||||
|
|
||||||
|
# Handshake header: type(1) + length(3)
|
||||||
|
handshake = struct.pack("B", _HANDSHAKE_CLIENT_HELLO) + struct.pack("!I", len(body))[1:] + body
|
||||||
|
|
||||||
|
# TLS record header: type(1) + version(2) + length(2)
|
||||||
|
record = struct.pack("B", _CONTENT_HANDSHAKE) + _TLS_1_0 + struct.pack("!H", len(handshake)) + handshake
|
||||||
|
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
# ─── ServerHello parser ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _parse_server_hello(data: bytes) -> str:
|
||||||
|
"""
|
||||||
|
Extract cipher suite and TLS version from a ServerHello response.
|
||||||
|
|
||||||
|
Returns a pipe-delimited string "cipher|version|extensions" that forms
|
||||||
|
one component of the JARM hash, or "|||" on parse failure.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if len(data) < 6:
|
||||||
|
return "|||"
|
||||||
|
|
||||||
|
# TLS record header
|
||||||
|
if data[0] != _CONTENT_HANDSHAKE:
|
||||||
|
return "|||"
|
||||||
|
|
||||||
|
struct.unpack_from("!H", data, 1)[0] # record_version (unused)
|
||||||
|
record_len = struct.unpack_from("!H", data, 3)[0]
|
||||||
|
hs = data[5: 5 + record_len]
|
||||||
|
|
||||||
|
if len(hs) < 4:
|
||||||
|
return "|||"
|
||||||
|
|
||||||
|
# Handshake header
|
||||||
|
if hs[0] != _HANDSHAKE_SERVER_HELLO:
|
||||||
|
return "|||"
|
||||||
|
|
||||||
|
hs_len = struct.unpack_from("!I", b"\x00" + hs[1:4])[0]
|
||||||
|
body = hs[4: 4 + hs_len]
|
||||||
|
|
||||||
|
if len(body) < 34:
|
||||||
|
return "|||"
|
||||||
|
|
||||||
|
pos = 0
|
||||||
|
# Server version
|
||||||
|
server_version = struct.unpack_from("!H", body, pos)[0]
|
||||||
|
pos += 2
|
||||||
|
|
||||||
|
# Random (32 bytes)
|
||||||
|
pos += 32
|
||||||
|
|
||||||
|
# Session ID
|
||||||
|
if pos >= len(body):
|
||||||
|
return "|||"
|
||||||
|
sid_len = body[pos]
|
||||||
|
pos += 1 + sid_len
|
||||||
|
|
||||||
|
# Cipher suite
|
||||||
|
if pos + 2 > len(body):
|
||||||
|
return "|||"
|
||||||
|
cipher = struct.unpack_from("!H", body, pos)[0]
|
||||||
|
pos += 2
|
||||||
|
|
||||||
|
# Compression method
|
||||||
|
if pos >= len(body):
|
||||||
|
return "|||"
|
||||||
|
pos += 1
|
||||||
|
|
||||||
|
# Parse extensions for supported_versions (to detect actual TLS 1.3)
|
||||||
|
actual_version = server_version
|
||||||
|
extensions_str = ""
|
||||||
|
if pos + 2 <= len(body):
|
||||||
|
ext_total = struct.unpack_from("!H", body, pos)[0]
|
||||||
|
pos += 2
|
||||||
|
ext_end = pos + ext_total
|
||||||
|
ext_types: list[str] = []
|
||||||
|
while pos + 4 <= ext_end and pos + 4 <= len(body):
|
||||||
|
ext_type = struct.unpack_from("!H", body, pos)[0]
|
||||||
|
ext_len = struct.unpack_from("!H", body, pos + 2)[0]
|
||||||
|
ext_types.append(f"{ext_type:04x}")
|
||||||
|
|
||||||
|
if ext_type == _EXT_SUPPORTED_VERSIONS and ext_len >= 2:
|
||||||
|
actual_version = struct.unpack_from("!H", body, pos + 4)[0]
|
||||||
|
|
||||||
|
pos += 4 + ext_len
|
||||||
|
extensions_str = "-".join(ext_types)
|
||||||
|
|
||||||
|
version_str = _version_to_str(actual_version)
|
||||||
|
cipher_str = f"{cipher:04x}"
|
||||||
|
|
||||||
|
return f"{cipher_str}|{version_str}|{extensions_str}"
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return "|||"
|
||||||
|
|
||||||
|
|
||||||
|
def _version_to_str(version: int) -> str:
|
||||||
|
return {
|
||||||
|
0x0304: "tls13",
|
||||||
|
0x0303: "tls12",
|
||||||
|
0x0302: "tls11",
|
||||||
|
0x0301: "tls10",
|
||||||
|
0x0300: "ssl30",
|
||||||
|
}.get(version, f"{version:04x}")
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Probe sender ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("prober.jarm_send_probe")
|
||||||
|
def _send_probe(host: str, port: int, hello: bytes, timeout: float = 5.0) -> bytes | None:
|
||||||
|
"""
|
||||||
|
Open a TCP connection, send the ClientHello, and read the ServerHello.
|
||||||
|
|
||||||
|
Returns raw response bytes or None on any failure.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
sock = socket.create_connection((host, port), timeout=timeout)
|
||||||
|
try:
|
||||||
|
sock.sendall(hello)
|
||||||
|
sock.settimeout(timeout)
|
||||||
|
response = b""
|
||||||
|
while True:
|
||||||
|
chunk = sock.recv(1484)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
response += chunk
|
||||||
|
# We only need the first TLS record (ServerHello)
|
||||||
|
if len(response) >= 5:
|
||||||
|
record_len = struct.unpack_from("!H", response, 3)[0]
|
||||||
|
if len(response) >= 5 + record_len:
|
||||||
|
break
|
||||||
|
return response if response else None
|
||||||
|
finally:
|
||||||
|
sock.close()
|
||||||
|
except (OSError, socket.error, socket.timeout):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ─── JARM hash computation ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _compute_jarm(responses: list[str]) -> str:
|
||||||
|
"""
|
||||||
|
Compute the final 62-character JARM hash from 10 probe response strings.
|
||||||
|
|
||||||
|
The first 30 characters are the raw cipher/version concatenation.
|
||||||
|
The remaining 32 characters are a truncated SHA256 of the extensions.
|
||||||
|
"""
|
||||||
|
if all(r == "|||" for r in responses):
|
||||||
|
return JARM_EMPTY_HASH
|
||||||
|
|
||||||
|
# Build the fuzzy hash
|
||||||
|
raw_parts: list[str] = []
|
||||||
|
ext_parts: list[str] = []
|
||||||
|
|
||||||
|
for r in responses:
|
||||||
|
parts = r.split("|")
|
||||||
|
if len(parts) >= 3 and parts[0] != "":
|
||||||
|
cipher = parts[0]
|
||||||
|
version = parts[1]
|
||||||
|
extensions = parts[2] if len(parts) > 2 else ""
|
||||||
|
|
||||||
|
# Map version to single char
|
||||||
|
ver_char = {
|
||||||
|
"tls13": "d", "tls12": "c", "tls11": "b",
|
||||||
|
"tls10": "a", "ssl30": "0",
|
||||||
|
}.get(version, "0")
|
||||||
|
|
||||||
|
raw_parts.append(f"{cipher}{ver_char}")
|
||||||
|
ext_parts.append(extensions)
|
||||||
|
else:
|
||||||
|
raw_parts.append("000")
|
||||||
|
ext_parts.append("")
|
||||||
|
|
||||||
|
# First 30 chars: cipher(4) + version(1) = 5 chars * 10 probes = 50... no
|
||||||
|
# JARM spec: first part is c|v per probe joined, then SHA256 of extensions
|
||||||
|
# Actual format: each response contributes 3 chars (cipher_first2 + ver_char)
|
||||||
|
# to the first 30, then all extensions hashed for the remaining 32.
|
||||||
|
|
||||||
|
fuzzy_raw = ""
|
||||||
|
for r in responses:
|
||||||
|
parts = r.split("|")
|
||||||
|
if len(parts) >= 3 and parts[0] != "":
|
||||||
|
cipher = parts[0] # 4-char hex
|
||||||
|
version = parts[1]
|
||||||
|
ver_char = {
|
||||||
|
"tls13": "d", "tls12": "c", "tls11": "b",
|
||||||
|
"tls10": "a", "ssl30": "0",
|
||||||
|
}.get(version, "0")
|
||||||
|
fuzzy_raw += f"{cipher[0:2]}{ver_char}"
|
||||||
|
else:
|
||||||
|
fuzzy_raw += "000"
|
||||||
|
|
||||||
|
# fuzzy_raw is 30 chars (3 * 10)
|
||||||
|
ext_str = ",".join(ext_parts)
|
||||||
|
ext_hash = hashlib.sha256(ext_str.encode()).hexdigest()[:32]
|
||||||
|
|
||||||
|
return fuzzy_raw + ext_hash
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Public API ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("prober.jarm_hash")
|
||||||
|
def jarm_hash(host: str, port: int, timeout: float = 5.0) -> str:
|
||||||
|
"""
|
||||||
|
Compute the JARM fingerprint for a TLS server.
|
||||||
|
|
||||||
|
Sends 10 crafted ClientHello packets and hashes the responses.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
host: target IP or hostname
|
||||||
|
port: target port
|
||||||
|
timeout: per-probe TCP timeout in seconds
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
62-character JARM hash string, or all-zeros on total failure.
|
||||||
|
"""
|
||||||
|
responses: list[str] = []
|
||||||
|
|
||||||
|
for i in range(10):
|
||||||
|
hello = _build_client_hello(i, host=host)
|
||||||
|
raw = _send_probe(host, port, hello, timeout=timeout)
|
||||||
|
if raw is not None:
|
||||||
|
parsed = _parse_server_hello(raw)
|
||||||
|
responses.append(parsed)
|
||||||
|
else:
|
||||||
|
responses.append("|||")
|
||||||
|
|
||||||
|
if i < 9:
|
||||||
|
time.sleep(_INTER_PROBE_DELAY)
|
||||||
|
|
||||||
|
return _compute_jarm(responses)
|
||||||
227
decnet/prober/tcpfp.py
Normal file
227
decnet/prober/tcpfp.py
Normal file
@@ -0,0 +1,227 @@
|
|||||||
|
"""
|
||||||
|
TCP/IP stack fingerprinting via SYN-ACK analysis.
|
||||||
|
|
||||||
|
Sends a crafted TCP SYN packet to a target host:port, captures the
|
||||||
|
SYN-ACK response, and extracts OS/tool-identifying characteristics:
|
||||||
|
TTL, window size, DF bit, MSS, window scale, SACK support, timestamps,
|
||||||
|
and TCP options ordering.
|
||||||
|
|
||||||
|
Uses scapy for packet crafting and parsing. Requires root/CAP_NET_RAW.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import random
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from decnet.telemetry import traced as _traced
|
||||||
|
|
||||||
|
# Lazy-import scapy to avoid breaking non-root usage of HASSH/JARM.
|
||||||
|
# The actual import happens inside functions that need it.
|
||||||
|
|
||||||
|
# ─── TCP option short codes ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_OPT_CODES: dict[str, str] = {
|
||||||
|
"MSS": "M",
|
||||||
|
"WScale": "W",
|
||||||
|
"SAckOK": "S",
|
||||||
|
"SAck": "S",
|
||||||
|
"Timestamp": "T",
|
||||||
|
"NOP": "N",
|
||||||
|
"EOL": "E",
|
||||||
|
"AltChkSum": "A",
|
||||||
|
"AltChkSumOpt": "A",
|
||||||
|
"UTO": "U",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Packet construction ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("prober.tcpfp_send_syn")
|
||||||
|
def _send_syn(
|
||||||
|
host: str,
|
||||||
|
port: int,
|
||||||
|
timeout: float,
|
||||||
|
) -> Any | None:
|
||||||
|
"""
|
||||||
|
Craft a TCP SYN with common options and send it. Returns the
|
||||||
|
SYN-ACK response packet or None on timeout/failure.
|
||||||
|
"""
|
||||||
|
from scapy.all import IP, TCP, conf, sr1
|
||||||
|
|
||||||
|
# Suppress scapy's noisy output
|
||||||
|
conf.verb = 0
|
||||||
|
|
||||||
|
src_port = random.randint(49152, 65535) # nosec B311 — ephemeral port, not crypto
|
||||||
|
|
||||||
|
pkt = (
|
||||||
|
IP(dst=host)
|
||||||
|
/ TCP(
|
||||||
|
sport=src_port,
|
||||||
|
dport=port,
|
||||||
|
flags="S",
|
||||||
|
options=[
|
||||||
|
("MSS", 1460),
|
||||||
|
("NOP", None),
|
||||||
|
("WScale", 7),
|
||||||
|
("NOP", None),
|
||||||
|
("NOP", None),
|
||||||
|
("Timestamp", (0, 0)),
|
||||||
|
("SAckOK", b""),
|
||||||
|
("EOL", None),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = sr1(pkt, timeout=timeout, verbose=0)
|
||||||
|
except (OSError, PermissionError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if resp is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Verify it's a SYN-ACK (flags == 0x12)
|
||||||
|
from scapy.all import TCP as TCPLayer
|
||||||
|
if not resp.haslayer(TCPLayer):
|
||||||
|
return None
|
||||||
|
if resp[TCPLayer].flags != 0x12: # SYN-ACK
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Send RST to clean up half-open connection
|
||||||
|
_send_rst(host, port, src_port, resp)
|
||||||
|
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
def _send_rst(
|
||||||
|
host: str,
|
||||||
|
dport: int,
|
||||||
|
sport: int,
|
||||||
|
resp: Any,
|
||||||
|
) -> None:
|
||||||
|
"""Send RST to clean up the half-open connection."""
|
||||||
|
try:
|
||||||
|
from scapy.all import IP, TCP, send
|
||||||
|
rst = (
|
||||||
|
IP(dst=host)
|
||||||
|
/ TCP(
|
||||||
|
sport=sport,
|
||||||
|
dport=dport,
|
||||||
|
flags="R",
|
||||||
|
seq=resp.ack,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
send(rst, verbose=0)
|
||||||
|
except Exception: # nosec B110 — best-effort RST cleanup
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Response parsing ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _parse_synack(resp: Any) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Extract fingerprint fields from a scapy SYN-ACK response packet.
|
||||||
|
"""
|
||||||
|
from scapy.all import IP, TCP
|
||||||
|
|
||||||
|
ip_layer = resp[IP]
|
||||||
|
tcp_layer = resp[TCP]
|
||||||
|
|
||||||
|
# IP fields
|
||||||
|
ttl = ip_layer.ttl
|
||||||
|
df_bit = 1 if (ip_layer.flags & 0x2) else 0 # DF = bit 1
|
||||||
|
ip_id = ip_layer.id
|
||||||
|
|
||||||
|
# TCP fields
|
||||||
|
window_size = tcp_layer.window
|
||||||
|
|
||||||
|
# Parse TCP options
|
||||||
|
mss = 0
|
||||||
|
window_scale = -1
|
||||||
|
sack_ok = 0
|
||||||
|
timestamp = 0
|
||||||
|
options_order = _extract_options_order(tcp_layer.options)
|
||||||
|
|
||||||
|
for opt_name, opt_value in tcp_layer.options:
|
||||||
|
if opt_name == "MSS":
|
||||||
|
mss = opt_value
|
||||||
|
elif opt_name == "WScale":
|
||||||
|
window_scale = opt_value
|
||||||
|
elif opt_name in ("SAckOK", "SAck"):
|
||||||
|
sack_ok = 1
|
||||||
|
elif opt_name == "Timestamp":
|
||||||
|
timestamp = 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
"ttl": ttl,
|
||||||
|
"window_size": window_size,
|
||||||
|
"df_bit": df_bit,
|
||||||
|
"ip_id": ip_id,
|
||||||
|
"mss": mss,
|
||||||
|
"window_scale": window_scale,
|
||||||
|
"sack_ok": sack_ok,
|
||||||
|
"timestamp": timestamp,
|
||||||
|
"options_order": options_order,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_options_order(options: list[tuple[str, Any]]) -> str:
|
||||||
|
"""
|
||||||
|
Map scapy TCP option tuples to a short-code string.
|
||||||
|
|
||||||
|
E.g. [("MSS", 1460), ("NOP", None), ("WScale", 7)] → "M,N,W"
|
||||||
|
"""
|
||||||
|
codes = []
|
||||||
|
for opt_name, _ in options:
|
||||||
|
code = _OPT_CODES.get(opt_name, "?")
|
||||||
|
codes.append(code)
|
||||||
|
return ",".join(codes)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Fingerprint computation ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _compute_fingerprint(fields: dict[str, Any]) -> tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Compute fingerprint raw string and SHA256 hash from parsed fields.
|
||||||
|
|
||||||
|
Returns (raw_string, hash_hex_32).
|
||||||
|
"""
|
||||||
|
raw = (
|
||||||
|
f"{fields['ttl']}:{fields['window_size']}:{fields['df_bit']}:"
|
||||||
|
f"{fields['mss']}:{fields['window_scale']}:{fields['sack_ok']}:"
|
||||||
|
f"{fields['timestamp']}:{fields['options_order']}"
|
||||||
|
)
|
||||||
|
h = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32]
|
||||||
|
return raw, h
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Public API ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("prober.tcp_fingerprint")
|
||||||
|
def tcp_fingerprint(
|
||||||
|
host: str,
|
||||||
|
port: int,
|
||||||
|
timeout: float = 5.0,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""
|
||||||
|
Send a TCP SYN to host:port and fingerprint the SYN-ACK response.
|
||||||
|
|
||||||
|
Returns a dict with the hash, raw fingerprint string, and individual
|
||||||
|
fields, or None if no SYN-ACK was received.
|
||||||
|
|
||||||
|
Requires root/CAP_NET_RAW.
|
||||||
|
"""
|
||||||
|
resp = _send_syn(host, port, timeout)
|
||||||
|
if resp is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
fields = _parse_synack(resp)
|
||||||
|
raw, h = _compute_fingerprint(fields)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"tcpfp_hash": h,
|
||||||
|
"tcpfp_raw": raw,
|
||||||
|
**fields,
|
||||||
|
}
|
||||||
478
decnet/prober/worker.py
Normal file
478
decnet/prober/worker.py
Normal file
@@ -0,0 +1,478 @@
|
|||||||
|
"""
|
||||||
|
DECNET-PROBER standalone worker.
|
||||||
|
|
||||||
|
Runs as a detached host-level process. Discovers attacker IPs by tailing the
|
||||||
|
collector's JSON log file, then fingerprints them via multiple active probes:
|
||||||
|
- JARM (TLS server fingerprinting)
|
||||||
|
- HASSHServer (SSH server fingerprinting)
|
||||||
|
- TCP/IP stack fingerprinting (OS/tool identification)
|
||||||
|
|
||||||
|
Results are written as RFC 5424 syslog + JSON to the same log files.
|
||||||
|
|
||||||
|
Target discovery is fully automatic — every unique attacker IP seen in the
|
||||||
|
log stream gets probed. No manual target list required.
|
||||||
|
|
||||||
|
Tech debt: writing directly to the collector's log files couples the
|
||||||
|
prober to the collector's file format. A future refactor should introduce
|
||||||
|
a shared log-sink abstraction.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.prober.hassh import hassh_server
|
||||||
|
from decnet.prober.jarm import JARM_EMPTY_HASH, jarm_hash
|
||||||
|
from decnet.prober.tcpfp import tcp_fingerprint
|
||||||
|
from decnet.telemetry import traced as _traced
|
||||||
|
|
||||||
|
logger = get_logger("prober")
|
||||||
|
|
||||||
|
# ─── Default ports per probe type ───────────────────────────────────────────
|
||||||
|
|
||||||
|
# JARM: common C2 callback / TLS server ports
|
||||||
|
DEFAULT_PROBE_PORTS: list[int] = [
|
||||||
|
443, 8443, 8080, 4443, 50050, 2222, 993, 995, 8888, 9001,
|
||||||
|
]
|
||||||
|
|
||||||
|
# HASSHServer: common SSH server ports
|
||||||
|
DEFAULT_SSH_PORTS: list[int] = [22, 2222, 22222, 2022]
|
||||||
|
|
||||||
|
# TCP/IP stack: probe on ports commonly open on attacker machines.
|
||||||
|
# Wide spread gives the best chance of a SYN-ACK for TTL/fingerprint extraction.
|
||||||
|
DEFAULT_TCPFP_PORTS: list[int] = [22, 80, 443, 8080, 8443, 445, 3389]
|
||||||
|
|
||||||
|
# ─── RFC 5424 formatting (inline, mirrors templates/*/decnet_logging.py) ─────
|
||||||
|
|
||||||
|
_FACILITY_LOCAL0 = 16
|
||||||
|
_SD_ID = "relay@55555"
|
||||||
|
_SEVERITY_INFO = 6
|
||||||
|
_SEVERITY_WARNING = 4
|
||||||
|
|
||||||
|
_MAX_HOSTNAME = 255
|
||||||
|
_MAX_APPNAME = 48
|
||||||
|
_MAX_MSGID = 32
|
||||||
|
|
||||||
|
|
||||||
|
def _sd_escape(value: str) -> str:
|
||||||
|
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
|
||||||
|
|
||||||
|
|
||||||
|
def _sd_element(fields: dict[str, Any]) -> str:
|
||||||
|
if not fields:
|
||||||
|
return "-"
|
||||||
|
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
|
||||||
|
return f"[{_SD_ID} {params}]"
|
||||||
|
|
||||||
|
|
||||||
|
def _syslog_line(
|
||||||
|
event_type: str,
|
||||||
|
severity: int = _SEVERITY_INFO,
|
||||||
|
msg: str | None = None,
|
||||||
|
**fields: Any,
|
||||||
|
) -> str:
|
||||||
|
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
|
||||||
|
ts = datetime.now(timezone.utc).isoformat()
|
||||||
|
hostname = "decnet-prober"
|
||||||
|
appname = "prober"
|
||||||
|
msgid = (event_type or "-")[:_MAX_MSGID]
|
||||||
|
sd = _sd_element(fields)
|
||||||
|
message = f" {msg}" if msg else ""
|
||||||
|
return f"{pri}1 {ts} {hostname} {appname} - {msgid} {sd}{message}"
|
||||||
|
|
||||||
|
|
||||||
|
# ─── RFC 5424 parser (subset of collector's, for JSON generation) ─────────────
|
||||||
|
|
||||||
|
_RFC5424_RE = re.compile(
|
||||||
|
r"^<\d+>1 "
|
||||||
|
r"(\S+) " # 1: TIMESTAMP
|
||||||
|
r"(\S+) " # 2: HOSTNAME
|
||||||
|
r"(\S+) " # 3: APP-NAME
|
||||||
|
r"- " # PROCID
|
||||||
|
r"(\S+) " # 4: MSGID (event_type)
|
||||||
|
r"(.+)$", # 5: SD + MSG
|
||||||
|
)
|
||||||
|
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||||
|
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
||||||
|
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip", "target_ip")
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_to_json(line: str) -> dict[str, Any] | None:
|
||||||
|
m = _RFC5424_RE.match(line)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
ts_raw, decky, service, event_type, sd_rest = m.groups()
|
||||||
|
|
||||||
|
fields: dict[str, str] = {}
|
||||||
|
msg = ""
|
||||||
|
|
||||||
|
if sd_rest.startswith("["):
|
||||||
|
block = _SD_BLOCK_RE.search(sd_rest)
|
||||||
|
if block:
|
||||||
|
for k, v in _PARAM_RE.findall(block.group(1)):
|
||||||
|
fields[k] = v.replace('\\"', '"').replace("\\\\", "\\").replace("\\]", "]")
|
||||||
|
msg_match = re.search(r'\]\s+(.+)$', sd_rest)
|
||||||
|
if msg_match:
|
||||||
|
msg = msg_match.group(1).strip()
|
||||||
|
|
||||||
|
attacker_ip = "Unknown"
|
||||||
|
for fname in _IP_FIELDS:
|
||||||
|
if fname in fields:
|
||||||
|
attacker_ip = fields[fname]
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
except ValueError:
|
||||||
|
ts_formatted = ts_raw
|
||||||
|
|
||||||
|
return {
|
||||||
|
"timestamp": ts_formatted,
|
||||||
|
"decky": decky,
|
||||||
|
"service": service,
|
||||||
|
"event_type": event_type,
|
||||||
|
"attacker_ip": attacker_ip,
|
||||||
|
"fields": fields,
|
||||||
|
"msg": msg,
|
||||||
|
"raw_line": line,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Log writer ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _write_event(
|
||||||
|
log_path: Path,
|
||||||
|
json_path: Path,
|
||||||
|
event_type: str,
|
||||||
|
severity: int = _SEVERITY_INFO,
|
||||||
|
msg: str | None = None,
|
||||||
|
**fields: Any,
|
||||||
|
) -> None:
|
||||||
|
line = _syslog_line(event_type, severity=severity, msg=msg, **fields)
|
||||||
|
|
||||||
|
with open(log_path, "a", encoding="utf-8") as f:
|
||||||
|
f.write(line + "\n")
|
||||||
|
f.flush()
|
||||||
|
|
||||||
|
parsed = _parse_to_json(line)
|
||||||
|
if parsed:
|
||||||
|
with open(json_path, "a", encoding="utf-8") as f:
|
||||||
|
f.write(json.dumps(parsed) + "\n")
|
||||||
|
f.flush()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Target discovery from log stream ────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("prober.discover_attackers")
|
||||||
|
def _discover_attackers(json_path: Path, position: int) -> tuple[set[str], int]:
|
||||||
|
"""
|
||||||
|
Read new JSON log lines from the given position and extract unique
|
||||||
|
attacker IPs. Returns (new_ips, new_position).
|
||||||
|
|
||||||
|
Only considers IPs that are not "Unknown" and come from events that
|
||||||
|
indicate real attacker interaction (not prober's own events).
|
||||||
|
"""
|
||||||
|
new_ips: set[str] = set()
|
||||||
|
|
||||||
|
if not json_path.exists():
|
||||||
|
return new_ips, position
|
||||||
|
|
||||||
|
size = json_path.stat().st_size
|
||||||
|
if size < position:
|
||||||
|
position = 0 # file rotated
|
||||||
|
|
||||||
|
if size == position:
|
||||||
|
return new_ips, position
|
||||||
|
|
||||||
|
with open(json_path, "r", encoding="utf-8", errors="replace") as f:
|
||||||
|
f.seek(position)
|
||||||
|
while True:
|
||||||
|
line = f.readline()
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
if not line.endswith("\n"):
|
||||||
|
break # partial line
|
||||||
|
|
||||||
|
try:
|
||||||
|
record = json.loads(line.strip())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
position = f.tell()
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip our own events
|
||||||
|
if record.get("service") == "prober":
|
||||||
|
position = f.tell()
|
||||||
|
continue
|
||||||
|
|
||||||
|
ip = record.get("attacker_ip", "Unknown")
|
||||||
|
if ip != "Unknown" and ip:
|
||||||
|
new_ips.add(ip)
|
||||||
|
|
||||||
|
position = f.tell()
|
||||||
|
|
||||||
|
return new_ips, position
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Probe cycle ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("prober.probe_cycle")
|
||||||
|
def _probe_cycle(
|
||||||
|
targets: set[str],
|
||||||
|
probed: dict[str, dict[str, set[int]]],
|
||||||
|
jarm_ports: list[int],
|
||||||
|
ssh_ports: list[int],
|
||||||
|
tcpfp_ports: list[int],
|
||||||
|
log_path: Path,
|
||||||
|
json_path: Path,
|
||||||
|
timeout: float = 5.0,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Probe all known attacker IPs with JARM, HASSH, and TCP/IP fingerprinting.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
targets: set of attacker IPs to probe
|
||||||
|
probed: dict mapping IP -> {probe_type -> set of ports already probed}
|
||||||
|
jarm_ports: TLS ports for JARM fingerprinting
|
||||||
|
ssh_ports: SSH ports for HASSHServer fingerprinting
|
||||||
|
tcpfp_ports: ports for TCP/IP stack fingerprinting
|
||||||
|
log_path: RFC 5424 log file
|
||||||
|
json_path: JSON log file
|
||||||
|
timeout: per-probe TCP timeout
|
||||||
|
"""
|
||||||
|
for ip in sorted(targets):
|
||||||
|
ip_probed = probed.setdefault(ip, {})
|
||||||
|
|
||||||
|
# Phase 1: JARM (TLS fingerprinting)
|
||||||
|
_jarm_phase(ip, ip_probed, jarm_ports, log_path, json_path, timeout)
|
||||||
|
|
||||||
|
# Phase 2: HASSHServer (SSH fingerprinting)
|
||||||
|
_hassh_phase(ip, ip_probed, ssh_ports, log_path, json_path, timeout)
|
||||||
|
|
||||||
|
# Phase 3: TCP/IP stack fingerprinting
|
||||||
|
_tcpfp_phase(ip, ip_probed, tcpfp_ports, log_path, json_path, timeout)
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("prober.jarm_phase")
|
||||||
|
def _jarm_phase(
|
||||||
|
ip: str,
|
||||||
|
ip_probed: dict[str, set[int]],
|
||||||
|
ports: list[int],
|
||||||
|
log_path: Path,
|
||||||
|
json_path: Path,
|
||||||
|
timeout: float,
|
||||||
|
) -> None:
|
||||||
|
"""JARM-fingerprint an IP on the given TLS ports."""
|
||||||
|
done = ip_probed.setdefault("jarm", set())
|
||||||
|
for port in ports:
|
||||||
|
if port in done:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
h = jarm_hash(ip, port, timeout=timeout)
|
||||||
|
done.add(port)
|
||||||
|
if h == JARM_EMPTY_HASH:
|
||||||
|
continue
|
||||||
|
_write_event(
|
||||||
|
log_path, json_path,
|
||||||
|
"jarm_fingerprint",
|
||||||
|
target_ip=ip,
|
||||||
|
target_port=str(port),
|
||||||
|
jarm_hash=h,
|
||||||
|
msg=f"JARM {ip}:{port} = {h}",
|
||||||
|
)
|
||||||
|
logger.info("prober: JARM %s:%d = %s", ip, port, h)
|
||||||
|
except Exception as exc:
|
||||||
|
done.add(port)
|
||||||
|
_write_event(
|
||||||
|
log_path, json_path,
|
||||||
|
"prober_error",
|
||||||
|
severity=_SEVERITY_WARNING,
|
||||||
|
target_ip=ip,
|
||||||
|
target_port=str(port),
|
||||||
|
error=str(exc),
|
||||||
|
msg=f"JARM probe failed for {ip}:{port}: {exc}",
|
||||||
|
)
|
||||||
|
logger.warning("prober: JARM probe failed %s:%d: %s", ip, port, exc)
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("prober.hassh_phase")
|
||||||
|
def _hassh_phase(
|
||||||
|
ip: str,
|
||||||
|
ip_probed: dict[str, set[int]],
|
||||||
|
ports: list[int],
|
||||||
|
log_path: Path,
|
||||||
|
json_path: Path,
|
||||||
|
timeout: float,
|
||||||
|
) -> None:
|
||||||
|
"""HASSHServer-fingerprint an IP on the given SSH ports."""
|
||||||
|
done = ip_probed.setdefault("hassh", set())
|
||||||
|
for port in ports:
|
||||||
|
if port in done:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
result = hassh_server(ip, port, timeout=timeout)
|
||||||
|
done.add(port)
|
||||||
|
if result is None:
|
||||||
|
continue
|
||||||
|
_write_event(
|
||||||
|
log_path, json_path,
|
||||||
|
"hassh_fingerprint",
|
||||||
|
target_ip=ip,
|
||||||
|
target_port=str(port),
|
||||||
|
hassh_server_hash=result["hassh_server"],
|
||||||
|
ssh_banner=result["banner"],
|
||||||
|
kex_algorithms=result["kex_algorithms"],
|
||||||
|
encryption_s2c=result["encryption_s2c"],
|
||||||
|
mac_s2c=result["mac_s2c"],
|
||||||
|
compression_s2c=result["compression_s2c"],
|
||||||
|
msg=f"HASSH {ip}:{port} = {result['hassh_server']}",
|
||||||
|
)
|
||||||
|
logger.info("prober: HASSH %s:%d = %s", ip, port, result["hassh_server"])
|
||||||
|
except Exception as exc:
|
||||||
|
done.add(port)
|
||||||
|
_write_event(
|
||||||
|
log_path, json_path,
|
||||||
|
"prober_error",
|
||||||
|
severity=_SEVERITY_WARNING,
|
||||||
|
target_ip=ip,
|
||||||
|
target_port=str(port),
|
||||||
|
error=str(exc),
|
||||||
|
msg=f"HASSH probe failed for {ip}:{port}: {exc}",
|
||||||
|
)
|
||||||
|
logger.warning("prober: HASSH probe failed %s:%d: %s", ip, port, exc)
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("prober.tcpfp_phase")
|
||||||
|
def _tcpfp_phase(
|
||||||
|
ip: str,
|
||||||
|
ip_probed: dict[str, set[int]],
|
||||||
|
ports: list[int],
|
||||||
|
log_path: Path,
|
||||||
|
json_path: Path,
|
||||||
|
timeout: float,
|
||||||
|
) -> None:
|
||||||
|
"""TCP/IP stack fingerprint an IP on the given ports."""
|
||||||
|
done = ip_probed.setdefault("tcpfp", set())
|
||||||
|
for port in ports:
|
||||||
|
if port in done:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
result = tcp_fingerprint(ip, port, timeout=timeout)
|
||||||
|
done.add(port)
|
||||||
|
if result is None:
|
||||||
|
continue
|
||||||
|
_write_event(
|
||||||
|
log_path, json_path,
|
||||||
|
"tcpfp_fingerprint",
|
||||||
|
target_ip=ip,
|
||||||
|
target_port=str(port),
|
||||||
|
tcpfp_hash=result["tcpfp_hash"],
|
||||||
|
tcpfp_raw=result["tcpfp_raw"],
|
||||||
|
ttl=str(result["ttl"]),
|
||||||
|
window_size=str(result["window_size"]),
|
||||||
|
df_bit=str(result["df_bit"]),
|
||||||
|
mss=str(result["mss"]),
|
||||||
|
window_scale=str(result["window_scale"]),
|
||||||
|
sack_ok=str(result["sack_ok"]),
|
||||||
|
timestamp=str(result["timestamp"]),
|
||||||
|
options_order=result["options_order"],
|
||||||
|
msg=f"TCPFP {ip}:{port} = {result['tcpfp_hash']}",
|
||||||
|
)
|
||||||
|
logger.info("prober: TCPFP %s:%d = %s", ip, port, result["tcpfp_hash"])
|
||||||
|
except Exception as exc:
|
||||||
|
done.add(port)
|
||||||
|
_write_event(
|
||||||
|
log_path, json_path,
|
||||||
|
"prober_error",
|
||||||
|
severity=_SEVERITY_WARNING,
|
||||||
|
target_ip=ip,
|
||||||
|
target_port=str(port),
|
||||||
|
error=str(exc),
|
||||||
|
msg=f"TCPFP probe failed for {ip}:{port}: {exc}",
|
||||||
|
)
|
||||||
|
logger.warning("prober: TCPFP probe failed %s:%d: %s", ip, port, exc)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Main worker ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("prober.worker")
|
||||||
|
async def prober_worker(
|
||||||
|
log_file: str,
|
||||||
|
interval: int = 300,
|
||||||
|
timeout: float = 5.0,
|
||||||
|
ports: list[int] | None = None,
|
||||||
|
ssh_ports: list[int] | None = None,
|
||||||
|
tcpfp_ports: list[int] | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Main entry point for the standalone prober process.
|
||||||
|
|
||||||
|
Discovers attacker IPs automatically by tailing the JSON log file,
|
||||||
|
then fingerprints each IP via JARM, HASSH, and TCP/IP stack probes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
log_file: base path for log files (RFC 5424 to .log, JSON to .json)
|
||||||
|
interval: seconds between probe cycles
|
||||||
|
timeout: per-probe TCP timeout
|
||||||
|
ports: JARM TLS ports (defaults to DEFAULT_PROBE_PORTS)
|
||||||
|
ssh_ports: HASSH SSH ports (defaults to DEFAULT_SSH_PORTS)
|
||||||
|
tcpfp_ports: TCP fingerprint ports (defaults to DEFAULT_TCPFP_PORTS)
|
||||||
|
"""
|
||||||
|
jarm_ports = ports or DEFAULT_PROBE_PORTS
|
||||||
|
hassh_ports = ssh_ports or DEFAULT_SSH_PORTS
|
||||||
|
tcp_ports = tcpfp_ports or DEFAULT_TCPFP_PORTS
|
||||||
|
|
||||||
|
all_ports_str = (
|
||||||
|
f"jarm={','.join(str(p) for p in jarm_ports)} "
|
||||||
|
f"ssh={','.join(str(p) for p in hassh_ports)} "
|
||||||
|
f"tcpfp={','.join(str(p) for p in tcp_ports)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log_path = Path(log_file)
|
||||||
|
json_path = log_path.with_suffix(".json")
|
||||||
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"prober started interval=%ds %s log=%s",
|
||||||
|
interval, all_ports_str, log_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
_write_event(
|
||||||
|
log_path, json_path,
|
||||||
|
"prober_startup",
|
||||||
|
interval=str(interval),
|
||||||
|
probe_ports=all_ports_str,
|
||||||
|
msg=f"DECNET-PROBER started, interval {interval}s, {all_ports_str}",
|
||||||
|
)
|
||||||
|
|
||||||
|
known_attackers: set[str] = set()
|
||||||
|
probed: dict[str, dict[str, set[int]]] = {} # IP -> {type -> ports}
|
||||||
|
log_position: int = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Discover new attacker IPs from the log stream
|
||||||
|
new_ips, log_position = await asyncio.to_thread(
|
||||||
|
_discover_attackers, json_path, log_position,
|
||||||
|
)
|
||||||
|
|
||||||
|
if new_ips - known_attackers:
|
||||||
|
fresh = new_ips - known_attackers
|
||||||
|
known_attackers.update(fresh)
|
||||||
|
logger.info(
|
||||||
|
"prober: discovered %d new attacker(s), total=%d",
|
||||||
|
len(fresh), len(known_attackers),
|
||||||
|
)
|
||||||
|
|
||||||
|
if known_attackers:
|
||||||
|
await asyncio.to_thread(
|
||||||
|
_probe_cycle, known_attackers, probed,
|
||||||
|
jarm_ports, hassh_ports, tcp_ports,
|
||||||
|
log_path, json_path, timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(interval)
|
||||||
5
decnet/profiler/__init__.py
Normal file
5
decnet/profiler/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
"""DECNET profiler — standalone attacker profile builder worker."""
|
||||||
|
|
||||||
|
from decnet.profiler.worker import attacker_profile_worker
|
||||||
|
|
||||||
|
__all__ = ["attacker_profile_worker"]
|
||||||
602
decnet/profiler/behavioral.py
Normal file
602
decnet/profiler/behavioral.py
Normal file
@@ -0,0 +1,602 @@
|
|||||||
|
"""
|
||||||
|
Behavioral and timing analysis for DECNET attacker profiles.
|
||||||
|
|
||||||
|
Consumes the chronological `LogEvent` stream already built by
|
||||||
|
`decnet.correlation.engine.CorrelationEngine` and derives per-IP metrics:
|
||||||
|
|
||||||
|
- Inter-event timing statistics (mean / median / stdev / min / max)
|
||||||
|
- Coefficient-of-variation (jitter metric)
|
||||||
|
- Beaconing vs. interactive vs. scanning vs. brute_force vs. slow_scan
|
||||||
|
classification
|
||||||
|
- Tool attribution against known C2 frameworks (Cobalt Strike, Sliver,
|
||||||
|
Havoc, Mythic) using default beacon/jitter profiles — returns a list,
|
||||||
|
since multiple tools can be in use simultaneously
|
||||||
|
- Header-based tool detection (Nmap NSE, Gophish, Nikto, sqlmap, etc.)
|
||||||
|
from HTTP request events
|
||||||
|
- Recon → exfil phase sequencing (latency between the last recon event
|
||||||
|
and the first exfil-like event)
|
||||||
|
- OS / TCP fingerprint + retransmit rollup from sniffer-emitted events,
|
||||||
|
with TTL-based fallback when p0f returns no match
|
||||||
|
|
||||||
|
Pure-Python; no external dependencies. All functions are safe to call from
|
||||||
|
both sync and async contexts.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import statistics
|
||||||
|
from collections import Counter
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from decnet.correlation.parser import LogEvent
|
||||||
|
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
|
||||||
|
|
||||||
|
# ─── Event-type taxonomy ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# Sniffer-emitted packet events that feed into fingerprint rollup.
|
||||||
|
_SNIFFER_SYN_EVENT: str = "tcp_syn_fingerprint"
|
||||||
|
_SNIFFER_FLOW_EVENT: str = "tcp_flow_timing"
|
||||||
|
# Prober-emitted active-probe result (SYN-ACK fingerprint of attacker machine).
|
||||||
|
_PROBER_TCPFP_EVENT: str = "tcpfp_fingerprint"
|
||||||
|
|
||||||
|
# Canonical initial TTL for each coarse OS bucket. Used to derive hop
|
||||||
|
# distance when only the observed TTL is available (prober path).
|
||||||
|
_INITIAL_TTL: dict[str, int] = {
|
||||||
|
"linux": 64,
|
||||||
|
"windows": 128,
|
||||||
|
"embedded": 255,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Events that signal "recon" phase (scans, probes, auth attempts).
|
||||||
|
_RECON_EVENT_TYPES: frozenset[str] = frozenset({
|
||||||
|
"scan", "connection", "banner", "probe",
|
||||||
|
"login_attempt", "auth", "auth_failure",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Events that signal "exfil" / action-on-objective phase.
|
||||||
|
_EXFIL_EVENT_TYPES: frozenset[str] = frozenset({
|
||||||
|
"download", "upload", "file_transfer", "data_exfil",
|
||||||
|
"command", "exec", "query", "shell_input",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Fields carrying payload byte counts (for "large payload" detection).
|
||||||
|
_PAYLOAD_SIZE_FIELDS: tuple[str, ...] = ("bytes", "size", "content_length")
|
||||||
|
|
||||||
|
# ─── C2 tool attribution signatures (beacon timing) ─────────────────────────
|
||||||
|
#
|
||||||
|
# Each entry lists the default beacon cadence profile of a popular C2.
|
||||||
|
# A profile *matches* an attacker when:
|
||||||
|
# - mean inter-event time is within ±`interval_tolerance` seconds, AND
|
||||||
|
# - jitter (cv = stdev / mean) is within ±`jitter_tolerance`
|
||||||
|
#
|
||||||
|
# Multiple matches are all returned (attacker may run multiple implants).
|
||||||
|
|
||||||
|
_TOOL_SIGNATURES: tuple[dict[str, Any], ...] = (
|
||||||
|
{
|
||||||
|
"name": "cobalt_strike",
|
||||||
|
"interval_s": 60.0,
|
||||||
|
"interval_tolerance_s": 8.0,
|
||||||
|
"jitter_cv": 0.20,
|
||||||
|
"jitter_tolerance": 0.05,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "sliver",
|
||||||
|
"interval_s": 60.0,
|
||||||
|
"interval_tolerance_s": 10.0,
|
||||||
|
"jitter_cv": 0.30,
|
||||||
|
"jitter_tolerance": 0.08,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "havoc",
|
||||||
|
"interval_s": 45.0,
|
||||||
|
"interval_tolerance_s": 8.0,
|
||||||
|
"jitter_cv": 0.10,
|
||||||
|
"jitter_tolerance": 0.03,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "mythic",
|
||||||
|
"interval_s": 30.0,
|
||||||
|
"interval_tolerance_s": 6.0,
|
||||||
|
"jitter_cv": 0.15,
|
||||||
|
"jitter_tolerance": 0.03,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# ─── Header-based tool signatures ───────────────────────────────────────────
|
||||||
|
#
|
||||||
|
# Scanned against HTTP `request` events. `pattern` is a case-insensitive
|
||||||
|
# substring (or a regex anchored with ^ if it starts with that character).
|
||||||
|
# `header` is matched case-insensitively against the event's headers dict.
|
||||||
|
|
||||||
|
_HEADER_TOOL_SIGNATURES: tuple[dict[str, str], ...] = (
|
||||||
|
{"name": "nmap", "header": "user-agent", "pattern": "Nmap Scripting Engine"},
|
||||||
|
{"name": "gophish", "header": "x-mailer", "pattern": "gophish"},
|
||||||
|
{"name": "nikto", "header": "user-agent", "pattern": "Nikto"},
|
||||||
|
{"name": "sqlmap", "header": "user-agent", "pattern": "sqlmap"},
|
||||||
|
{"name": "nuclei", "header": "user-agent", "pattern": "Nuclei"},
|
||||||
|
{"name": "masscan", "header": "user-agent", "pattern": "masscan"},
|
||||||
|
{"name": "zgrab", "header": "user-agent", "pattern": "zgrab"},
|
||||||
|
{"name": "metasploit", "header": "user-agent", "pattern": "Metasploit"},
|
||||||
|
{"name": "curl", "header": "user-agent", "pattern": "^curl/"},
|
||||||
|
{"name": "python_requests", "header": "user-agent", "pattern": "python-requests"},
|
||||||
|
{"name": "gobuster", "header": "user-agent", "pattern": "gobuster"},
|
||||||
|
{"name": "dirbuster", "header": "user-agent", "pattern": "DirBuster"},
|
||||||
|
{"name": "hydra", "header": "user-agent", "pattern": "hydra"},
|
||||||
|
{"name": "wfuzz", "header": "user-agent", "pattern": "Wfuzz"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# ─── TTL → coarse OS bucket (fallback when p0f returns nothing) ─────────────
|
||||||
|
|
||||||
|
def _os_from_ttl(ttl_str: str | None) -> str | None:
|
||||||
|
"""Derive a coarse OS guess from observed TTL when p0f has no match."""
|
||||||
|
if not ttl_str:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
ttl = int(ttl_str)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
if 55 <= ttl <= 70:
|
||||||
|
return "linux"
|
||||||
|
if 115 <= ttl <= 135:
|
||||||
|
return "windows"
|
||||||
|
if 235 <= ttl <= 255:
|
||||||
|
return "embedded"
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Timing stats ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("profiler.timing_stats")
|
||||||
|
def timing_stats(events: list[LogEvent]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Compute inter-arrival-time statistics across *events* (sorted by ts).
|
||||||
|
|
||||||
|
Returns a dict with:
|
||||||
|
mean_iat_s, median_iat_s, stdev_iat_s, min_iat_s, max_iat_s, cv,
|
||||||
|
event_count, duration_s
|
||||||
|
|
||||||
|
For n < 2 events the interval-based fields are None/0.
|
||||||
|
"""
|
||||||
|
if not events:
|
||||||
|
return {
|
||||||
|
"event_count": 0,
|
||||||
|
"duration_s": 0.0,
|
||||||
|
"mean_iat_s": None,
|
||||||
|
"median_iat_s": None,
|
||||||
|
"stdev_iat_s": None,
|
||||||
|
"min_iat_s": None,
|
||||||
|
"max_iat_s": None,
|
||||||
|
"cv": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
sorted_events = sorted(events, key=lambda e: e.timestamp)
|
||||||
|
duration_s = (sorted_events[-1].timestamp - sorted_events[0].timestamp).total_seconds()
|
||||||
|
|
||||||
|
if len(sorted_events) < 2:
|
||||||
|
return {
|
||||||
|
"event_count": len(sorted_events),
|
||||||
|
"duration_s": round(duration_s, 3),
|
||||||
|
"mean_iat_s": None,
|
||||||
|
"median_iat_s": None,
|
||||||
|
"stdev_iat_s": None,
|
||||||
|
"min_iat_s": None,
|
||||||
|
"max_iat_s": None,
|
||||||
|
"cv": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
iats = [
|
||||||
|
(sorted_events[i].timestamp - sorted_events[i - 1].timestamp).total_seconds()
|
||||||
|
for i in range(1, len(sorted_events))
|
||||||
|
]
|
||||||
|
# Exclude spuriously-negative (clock-skew) intervals.
|
||||||
|
iats = [v for v in iats if v >= 0]
|
||||||
|
if not iats:
|
||||||
|
return {
|
||||||
|
"event_count": len(sorted_events),
|
||||||
|
"duration_s": round(duration_s, 3),
|
||||||
|
"mean_iat_s": None,
|
||||||
|
"median_iat_s": None,
|
||||||
|
"stdev_iat_s": None,
|
||||||
|
"min_iat_s": None,
|
||||||
|
"max_iat_s": None,
|
||||||
|
"cv": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
mean = statistics.fmean(iats)
|
||||||
|
median = statistics.median(iats)
|
||||||
|
stdev = statistics.pstdev(iats) if len(iats) > 1 else 0.0
|
||||||
|
cv = (stdev / mean) if mean > 0 else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"event_count": len(sorted_events),
|
||||||
|
"duration_s": round(duration_s, 3),
|
||||||
|
"mean_iat_s": round(mean, 3),
|
||||||
|
"median_iat_s": round(median, 3),
|
||||||
|
"stdev_iat_s": round(stdev, 3),
|
||||||
|
"min_iat_s": round(min(iats), 3),
|
||||||
|
"max_iat_s": round(max(iats), 3),
|
||||||
|
"cv": round(cv, 4) if cv is not None else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Behavior classification ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("profiler.classify_behavior")
|
||||||
|
def classify_behavior(stats: dict[str, Any], services_count: int) -> str:
|
||||||
|
"""
|
||||||
|
Coarse behavior bucket:
|
||||||
|
beaconing | interactive | scanning | brute_force | slow_scan | mixed | unknown
|
||||||
|
|
||||||
|
Heuristics (evaluated in priority order):
|
||||||
|
* `scanning` — ≥ 3 services touched OR mean IAT < 2 s, ≥ 3 events
|
||||||
|
* `brute_force` — 1 service, n ≥ 8, mean IAT < 5 s, CV < 0.6
|
||||||
|
* `beaconing` — CV < 0.35, mean IAT ≥ 5 s, ≥ 4 events
|
||||||
|
* `slow_scan` — ≥ 2 services, mean IAT ≥ 10 s, ≥ 4 events
|
||||||
|
* `interactive` — mean IAT < 5 s AND CV ≥ 0.5, ≥ 6 events
|
||||||
|
* `mixed` — catch-all for sessions with enough data
|
||||||
|
* `unknown` — too few data points
|
||||||
|
"""
|
||||||
|
n = stats.get("event_count") or 0
|
||||||
|
mean = stats.get("mean_iat_s")
|
||||||
|
cv = stats.get("cv")
|
||||||
|
|
||||||
|
if n < 3 or mean is None:
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
# Slow scan / low-and-slow: multiple services with long gaps.
|
||||||
|
# Must be checked before generic scanning so slow multi-service sessions
|
||||||
|
# don't get mis-bucketed as a fast sweep.
|
||||||
|
if services_count >= 2 and mean >= 10.0 and n >= 4:
|
||||||
|
return "slow_scan"
|
||||||
|
|
||||||
|
# Scanning: broad service sweep (multi-service) or very rapid single-service bursts.
|
||||||
|
if n >= 3 and (
|
||||||
|
(services_count >= 3 and mean < 10.0)
|
||||||
|
or (services_count >= 2 and mean < 2.0)
|
||||||
|
):
|
||||||
|
return "scanning"
|
||||||
|
|
||||||
|
# Brute force: hammering one service rapidly and repeatedly.
|
||||||
|
if services_count == 1 and n >= 8 and mean < 5.0 and cv is not None and cv < 0.6:
|
||||||
|
return "brute_force"
|
||||||
|
|
||||||
|
# Beaconing: regular cadence over multiple events.
|
||||||
|
if cv is not None and cv < 0.35 and mean >= 5.0 and n >= 4:
|
||||||
|
return "beaconing"
|
||||||
|
|
||||||
|
# Interactive: short but irregular bursts (human or tool with think time).
|
||||||
|
if cv is not None and cv >= 0.5 and mean < 5.0 and n >= 6:
|
||||||
|
return "interactive"
|
||||||
|
|
||||||
|
return "mixed"
|
||||||
|
|
||||||
|
|
||||||
|
# ─── C2 tool attribution (beacon timing) ────────────────────────────────────
|
||||||
|
|
||||||
|
def guess_tools(mean_iat_s: float | None, cv: float | None) -> list[str]:
|
||||||
|
"""
|
||||||
|
Match (mean_iat, cv) against known C2 default beacon profiles.
|
||||||
|
|
||||||
|
Returns a list of all matching tool names (may be empty). Multiple
|
||||||
|
matches are all returned because an attacker can run several implants.
|
||||||
|
"""
|
||||||
|
if mean_iat_s is None or cv is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
hits: list[str] = []
|
||||||
|
for sig in _TOOL_SIGNATURES:
|
||||||
|
if abs(mean_iat_s - sig["interval_s"]) > sig["interval_tolerance_s"]:
|
||||||
|
continue
|
||||||
|
if abs(cv - sig["jitter_cv"]) > sig["jitter_tolerance"]:
|
||||||
|
continue
|
||||||
|
hits.append(sig["name"])
|
||||||
|
|
||||||
|
return hits
|
||||||
|
|
||||||
|
|
||||||
|
# Keep the old name as an alias so callers that expected a single string still
|
||||||
|
# compile, but mark it deprecated. Returns the first hit or None.
|
||||||
|
def guess_tool(mean_iat_s: float | None, cv: float | None) -> str | None:
|
||||||
|
"""Deprecated: use guess_tools() instead."""
|
||||||
|
hits = guess_tools(mean_iat_s, cv)
|
||||||
|
if len(hits) == 1:
|
||||||
|
return hits[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Header-based tool detection ────────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("profiler.detect_tools_from_headers")
|
||||||
|
def detect_tools_from_headers(events: list[LogEvent]) -> list[str]:
|
||||||
|
"""
|
||||||
|
Scan HTTP `request` events for tool-identifying headers.
|
||||||
|
|
||||||
|
Checks User-Agent, X-Mailer, and other headers case-insensitively
|
||||||
|
against `_HEADER_TOOL_SIGNATURES`. Returns a deduplicated list of
|
||||||
|
matched tool names in detection order.
|
||||||
|
"""
|
||||||
|
found: list[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
|
||||||
|
for e in events:
|
||||||
|
if e.event_type != "request":
|
||||||
|
continue
|
||||||
|
|
||||||
|
raw_headers = e.fields.get("headers")
|
||||||
|
if not raw_headers:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# headers may arrive as a JSON string, a Python-repr string (legacy),
|
||||||
|
# or a dict already (in-memory / test paths).
|
||||||
|
if isinstance(raw_headers, str):
|
||||||
|
try:
|
||||||
|
headers: dict[str, str] = json.loads(raw_headers)
|
||||||
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
# Backward-compat: events written before the JSON-encode fix
|
||||||
|
# were serialized as Python repr via str(dict). ast.literal_eval
|
||||||
|
# handles that safely (no arbitrary code execution).
|
||||||
|
try:
|
||||||
|
import ast as _ast
|
||||||
|
_parsed = _ast.literal_eval(raw_headers)
|
||||||
|
if isinstance(_parsed, dict):
|
||||||
|
headers = _parsed
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
except Exception: # nosec B112 — skip unparseable header values
|
||||||
|
continue
|
||||||
|
elif isinstance(raw_headers, dict):
|
||||||
|
headers = raw_headers
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Normalise header keys to lowercase for matching.
|
||||||
|
lc_headers: dict[str, str] = {k.lower(): str(v) for k, v in headers.items()}
|
||||||
|
|
||||||
|
for sig in _HEADER_TOOL_SIGNATURES:
|
||||||
|
name = sig["name"]
|
||||||
|
if name in seen:
|
||||||
|
continue
|
||||||
|
value = lc_headers.get(sig["header"])
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
pattern = sig["pattern"]
|
||||||
|
if pattern.startswith("^"):
|
||||||
|
if re.match(pattern, value, re.IGNORECASE):
|
||||||
|
found.append(name)
|
||||||
|
seen.add(name)
|
||||||
|
else:
|
||||||
|
if pattern.lower() in value.lower():
|
||||||
|
found.append(name)
|
||||||
|
seen.add(name)
|
||||||
|
|
||||||
|
return found
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Phase sequencing ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@_traced("profiler.phase_sequence")
|
||||||
|
def phase_sequence(events: list[LogEvent]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Derive recon→exfil phase transition info.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
recon_end_ts : ISO timestamp of last recon-class event (or None)
|
||||||
|
exfil_start_ts : ISO timestamp of first exfil-class event (or None)
|
||||||
|
exfil_latency_s : seconds between them (None if not both present)
|
||||||
|
large_payload_count: count of events whose *fields* report a payload
|
||||||
|
≥ 1 MiB (heuristic for bulk data transfer)
|
||||||
|
"""
|
||||||
|
recon_end = None
|
||||||
|
exfil_start = None
|
||||||
|
large_payload_count = 0
|
||||||
|
|
||||||
|
for e in sorted(events, key=lambda x: x.timestamp):
|
||||||
|
if e.event_type in _RECON_EVENT_TYPES:
|
||||||
|
recon_end = e.timestamp
|
||||||
|
elif e.event_type in _EXFIL_EVENT_TYPES and exfil_start is None:
|
||||||
|
exfil_start = e.timestamp
|
||||||
|
|
||||||
|
for fname in _PAYLOAD_SIZE_FIELDS:
|
||||||
|
raw = e.fields.get(fname)
|
||||||
|
if raw is None:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
if int(raw) >= 1_048_576:
|
||||||
|
large_payload_count += 1
|
||||||
|
break
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
latency: float | None = None
|
||||||
|
if recon_end is not None and exfil_start is not None and exfil_start >= recon_end:
|
||||||
|
latency = round((exfil_start - recon_end).total_seconds(), 3)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"recon_end_ts": recon_end.isoformat() if recon_end else None,
|
||||||
|
"exfil_start_ts": exfil_start.isoformat() if exfil_start else None,
|
||||||
|
"exfil_latency_s": latency,
|
||||||
|
"large_payload_count": large_payload_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Sniffer rollup (OS fingerprint + retransmits) ──────────────────────────
|
||||||
|
|
||||||
|
@_traced("profiler.sniffer_rollup")
|
||||||
|
def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Roll up sniffer-emitted `tcp_syn_fingerprint` and `tcp_flow_timing`
|
||||||
|
events into a per-attacker summary.
|
||||||
|
|
||||||
|
OS guess priority:
|
||||||
|
1. Modal p0f label from os_guess field (if not "unknown"/empty).
|
||||||
|
2. TTL-based coarse bucket (linux / windows / embedded) as fallback.
|
||||||
|
Hop distance: median of non-zero reported values only.
|
||||||
|
"""
|
||||||
|
os_guesses: list[str] = []
|
||||||
|
ttl_values: list[str] = []
|
||||||
|
hops: list[int] = []
|
||||||
|
tcp_fp: dict[str, Any] | None = None
|
||||||
|
retransmits = 0
|
||||||
|
|
||||||
|
for e in events:
|
||||||
|
if e.event_type == _SNIFFER_SYN_EVENT:
|
||||||
|
og = e.fields.get("os_guess")
|
||||||
|
if og and og != "unknown":
|
||||||
|
os_guesses.append(og)
|
||||||
|
|
||||||
|
# Collect raw TTL for fallback OS derivation.
|
||||||
|
ttl_raw = e.fields.get("ttl") or e.fields.get("initial_ttl")
|
||||||
|
if ttl_raw:
|
||||||
|
ttl_values.append(ttl_raw)
|
||||||
|
|
||||||
|
# Only include hop distances that are valid and non-zero.
|
||||||
|
hop_raw = e.fields.get("hop_distance")
|
||||||
|
if hop_raw:
|
||||||
|
try:
|
||||||
|
hop_val = int(hop_raw)
|
||||||
|
if hop_val > 0:
|
||||||
|
hops.append(hop_val)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Keep the latest fingerprint snapshot.
|
||||||
|
tcp_fp = {
|
||||||
|
"window": _int_or_none(e.fields.get("window")),
|
||||||
|
"wscale": _int_or_none(e.fields.get("wscale")),
|
||||||
|
"mss": _int_or_none(e.fields.get("mss")),
|
||||||
|
"options_sig": e.fields.get("options_sig", ""),
|
||||||
|
"has_sack": e.fields.get("has_sack") == "true",
|
||||||
|
"has_timestamps": e.fields.get("has_timestamps") == "true",
|
||||||
|
}
|
||||||
|
|
||||||
|
elif e.event_type == _SNIFFER_FLOW_EVENT:
|
||||||
|
try:
|
||||||
|
retransmits += int(e.fields.get("retransmits", "0"))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
elif e.event_type == _PROBER_TCPFP_EVENT:
|
||||||
|
# Active-probe result: prober sent SYN to attacker, got SYN-ACK back.
|
||||||
|
# Field names differ from the passive sniffer (different emitter).
|
||||||
|
ttl_raw = e.fields.get("ttl")
|
||||||
|
if ttl_raw:
|
||||||
|
ttl_values.append(ttl_raw)
|
||||||
|
|
||||||
|
# Derive hop distance from observed TTL vs canonical initial TTL.
|
||||||
|
os_hint = _os_from_ttl(ttl_raw)
|
||||||
|
if os_hint:
|
||||||
|
initial = _INITIAL_TTL.get(os_hint)
|
||||||
|
if initial:
|
||||||
|
try:
|
||||||
|
hop_val = initial - int(ttl_raw)
|
||||||
|
if hop_val > 0:
|
||||||
|
hops.append(hop_val)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Prober uses window_size/window_scale/options_order instead of
|
||||||
|
# the sniffer's window/wscale/options_sig.
|
||||||
|
tcp_fp = {
|
||||||
|
"window": _int_or_none(e.fields.get("window_size")),
|
||||||
|
"wscale": _int_or_none(e.fields.get("window_scale")),
|
||||||
|
"mss": _int_or_none(e.fields.get("mss")),
|
||||||
|
"options_sig": e.fields.get("options_order", ""),
|
||||||
|
"has_sack": e.fields.get("sack_ok") == "1",
|
||||||
|
"has_timestamps": e.fields.get("timestamp") == "1",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mode for the OS bucket — most frequently observed label.
|
||||||
|
os_guess: str | None = None
|
||||||
|
if os_guesses:
|
||||||
|
os_guess = Counter(os_guesses).most_common(1)[0][0]
|
||||||
|
else:
|
||||||
|
# TTL-based fallback: use the most common observed TTL value.
|
||||||
|
if ttl_values:
|
||||||
|
modal_ttl = Counter(ttl_values).most_common(1)[0][0]
|
||||||
|
os_guess = _os_from_ttl(modal_ttl)
|
||||||
|
|
||||||
|
# Median hop distance (robust to the occasional weird TTL).
|
||||||
|
hop_distance: int | None = None
|
||||||
|
if hops:
|
||||||
|
hop_distance = int(statistics.median(hops))
|
||||||
|
|
||||||
|
return {
|
||||||
|
"os_guess": os_guess,
|
||||||
|
"hop_distance": hop_distance,
|
||||||
|
"tcp_fingerprint": tcp_fp or {},
|
||||||
|
"retransmit_count": retransmits,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _int_or_none(v: Any) -> int | None:
|
||||||
|
if v is None or v == "":
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return int(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Composite: build the full AttackerBehavior record ──────────────────────
|
||||||
|
|
||||||
|
@_traced("profiler.build_behavior_record")
|
||||||
|
def build_behavior_record(events: list[LogEvent]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Build the dict to persist in the `attacker_behavior` table.
|
||||||
|
|
||||||
|
Callers (profiler worker) pre-serialize JSON-typed fields; we do the
|
||||||
|
JSON encoding here to keep the repo layer schema-agnostic.
|
||||||
|
"""
|
||||||
|
# Timing stats are computed across *all* events (not filtered), because
|
||||||
|
# a C2 beacon often reuses the same "connection" event_type on each
|
||||||
|
# check-in. Filtering would throw that signal away.
|
||||||
|
stats = timing_stats(events)
|
||||||
|
services = {e.service for e in events}
|
||||||
|
behavior = classify_behavior(stats, len(services))
|
||||||
|
rollup = sniffer_rollup(events)
|
||||||
|
phase = phase_sequence(events)
|
||||||
|
|
||||||
|
# Combine beacon-timing tool matches with header-based detections.
|
||||||
|
beacon_tools = guess_tools(stats.get("mean_iat_s"), stats.get("cv"))
|
||||||
|
header_tools = detect_tools_from_headers(events)
|
||||||
|
all_tools: list[str] = list(dict.fromkeys(beacon_tools + header_tools)) # dedup, preserve order
|
||||||
|
|
||||||
|
# Promote TCP-level scanner identification to tool_guesses.
|
||||||
|
# p0f fingerprints nmap from the TCP handshake alone — this fires even
|
||||||
|
# when no HTTP service is present, making it far more reliable than the
|
||||||
|
# header-based path for raw port scans.
|
||||||
|
if rollup["os_guess"] == "nmap" and "nmap" not in all_tools:
|
||||||
|
all_tools.insert(0, "nmap")
|
||||||
|
|
||||||
|
# Beacon-specific projection: only surface interval/jitter when we've
|
||||||
|
# classified the flow as beaconing (otherwise these numbers are noise).
|
||||||
|
beacon_interval_s: float | None = None
|
||||||
|
beacon_jitter_pct: float | None = None
|
||||||
|
if behavior == "beaconing":
|
||||||
|
beacon_interval_s = stats.get("mean_iat_s")
|
||||||
|
cv = stats.get("cv")
|
||||||
|
beacon_jitter_pct = round(cv * 100, 2) if cv is not None else None
|
||||||
|
|
||||||
|
_tracer = _get_tracer("profiler")
|
||||||
|
with _tracer.start_as_current_span("profiler.behavior_summary") as _span:
|
||||||
|
_span.set_attribute("behavior_class", behavior)
|
||||||
|
_span.set_attribute("os_guess", rollup["os_guess"] or "unknown")
|
||||||
|
_span.set_attribute("tool_count", len(all_tools))
|
||||||
|
_span.set_attribute("event_count", stats.get("event_count", 0))
|
||||||
|
if all_tools:
|
||||||
|
_span.set_attribute("tools", ",".join(all_tools))
|
||||||
|
|
||||||
|
return {
|
||||||
|
"os_guess": rollup["os_guess"],
|
||||||
|
"hop_distance": rollup["hop_distance"],
|
||||||
|
"tcp_fingerprint": json.dumps(rollup["tcp_fingerprint"]),
|
||||||
|
"retransmit_count": rollup["retransmit_count"],
|
||||||
|
"behavior_class": behavior,
|
||||||
|
"beacon_interval_s": beacon_interval_s,
|
||||||
|
"beacon_jitter_pct": beacon_jitter_pct,
|
||||||
|
"tool_guesses": json.dumps(all_tools),
|
||||||
|
"timing_stats": json.dumps(stats),
|
||||||
|
"phase_sequence": json.dumps(phase),
|
||||||
|
}
|
||||||
215
decnet/profiler/worker.py
Normal file
215
decnet/profiler/worker.py
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
"""
|
||||||
|
Attacker profile builder — incremental background worker.
|
||||||
|
|
||||||
|
Maintains a persistent CorrelationEngine and a log-ID cursor across cycles.
|
||||||
|
On cold start (first cycle or process restart), performs one full build from
|
||||||
|
all stored logs. Subsequent cycles fetch only new logs via the cursor,
|
||||||
|
ingest them into the existing engine, and rebuild profiles for affected IPs
|
||||||
|
only.
|
||||||
|
|
||||||
|
Complexity per cycle: O(new_logs + affected_ips) instead of O(total_logs²).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from decnet.correlation.engine import CorrelationEngine
|
||||||
|
from decnet.correlation.parser import LogEvent
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.profiler.behavioral import build_behavior_record
|
||||||
|
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
|
||||||
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
|
||||||
|
logger = get_logger("attacker_worker")
|
||||||
|
|
||||||
|
_BATCH_SIZE = 500
|
||||||
|
_STATE_KEY = "attacker_worker_cursor"
|
||||||
|
|
||||||
|
# Event types that indicate active command/query execution (not just connection/scan)
|
||||||
|
_COMMAND_EVENT_TYPES = frozenset({
|
||||||
|
"command", "exec", "query", "input", "shell_input",
|
||||||
|
"execute", "run", "sql_query", "redis_command",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Fields that carry the executed command/query text
|
||||||
|
_COMMAND_FIELDS = ("command", "query", "input", "line", "sql", "cmd")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _WorkerState:
|
||||||
|
engine: CorrelationEngine = field(default_factory=CorrelationEngine)
|
||||||
|
last_log_id: int = 0
|
||||||
|
initialized: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
async def attacker_profile_worker(repo: BaseRepository, *, interval: int = 30) -> None:
|
||||||
|
"""Periodically updates the Attacker table incrementally. Designed to run as an asyncio Task."""
|
||||||
|
logger.info("attacker profile worker started interval=%ds", interval)
|
||||||
|
state = _WorkerState()
|
||||||
|
_saved_cursor = await repo.get_state(_STATE_KEY)
|
||||||
|
if _saved_cursor:
|
||||||
|
state.last_log_id = _saved_cursor.get("last_log_id", 0)
|
||||||
|
state.initialized = True
|
||||||
|
logger.info("attacker worker: resumed from cursor last_log_id=%d", state.last_log_id)
|
||||||
|
while True:
|
||||||
|
await asyncio.sleep(interval)
|
||||||
|
try:
|
||||||
|
await _incremental_update(repo, state)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("attacker worker: update failed: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("profiler.incremental_update")
|
||||||
|
async def _incremental_update(repo: BaseRepository, state: _WorkerState) -> None:
|
||||||
|
was_cold = not state.initialized
|
||||||
|
affected_ips: set[str] = set()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
batch = await repo.get_logs_after_id(state.last_log_id, limit=_BATCH_SIZE)
|
||||||
|
if not batch:
|
||||||
|
break
|
||||||
|
|
||||||
|
for row in batch:
|
||||||
|
event = state.engine.ingest(row["raw_line"])
|
||||||
|
if event and event.attacker_ip:
|
||||||
|
affected_ips.add(event.attacker_ip)
|
||||||
|
state.last_log_id = row["id"]
|
||||||
|
|
||||||
|
await asyncio.sleep(0) # yield to event loop after each batch
|
||||||
|
|
||||||
|
if len(batch) < _BATCH_SIZE:
|
||||||
|
break
|
||||||
|
|
||||||
|
state.initialized = True
|
||||||
|
|
||||||
|
if not affected_ips:
|
||||||
|
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
|
||||||
|
return
|
||||||
|
|
||||||
|
await _update_profiles(repo, state, affected_ips)
|
||||||
|
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
|
||||||
|
|
||||||
|
if was_cold:
|
||||||
|
logger.info("attacker worker: cold start rebuilt %d profiles", len(affected_ips))
|
||||||
|
else:
|
||||||
|
logger.info("attacker worker: updated %d profiles (incremental)", len(affected_ips))
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("profiler.update_profiles")
|
||||||
|
async def _update_profiles(
|
||||||
|
repo: BaseRepository,
|
||||||
|
state: _WorkerState,
|
||||||
|
ips: set[str],
|
||||||
|
) -> None:
|
||||||
|
traversal_map = {t.attacker_ip: t for t in state.engine.traversals(min_deckies=2)}
|
||||||
|
bounties_map = await repo.get_bounties_for_ips(ips)
|
||||||
|
|
||||||
|
_tracer = _get_tracer("profiler")
|
||||||
|
for ip in ips:
|
||||||
|
events = state.engine._events.get(ip, [])
|
||||||
|
if not events:
|
||||||
|
continue
|
||||||
|
|
||||||
|
with _tracer.start_as_current_span("profiler.process_ip") as _span:
|
||||||
|
_span.set_attribute("attacker_ip", ip)
|
||||||
|
_span.set_attribute("event_count", len(events))
|
||||||
|
|
||||||
|
traversal = traversal_map.get(ip)
|
||||||
|
bounties = bounties_map.get(ip, [])
|
||||||
|
commands = _extract_commands_from_events(events)
|
||||||
|
|
||||||
|
record = _build_record(ip, events, traversal, bounties, commands)
|
||||||
|
attacker_uuid = await repo.upsert_attacker(record)
|
||||||
|
|
||||||
|
_span.set_attribute("is_traversal", traversal is not None)
|
||||||
|
_span.set_attribute("bounty_count", len(bounties))
|
||||||
|
_span.set_attribute("command_count", len(commands))
|
||||||
|
|
||||||
|
# Behavioral / fingerprint rollup lives in a sibling table so failures
|
||||||
|
# here never block the core attacker profile upsert.
|
||||||
|
try:
|
||||||
|
behavior = build_behavior_record(events)
|
||||||
|
await repo.upsert_attacker_behavior(attacker_uuid, behavior)
|
||||||
|
except Exception as exc:
|
||||||
|
_span.record_exception(exc)
|
||||||
|
logger.error("attacker worker: behavior upsert failed for %s: %s", ip, exc)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_record(
|
||||||
|
ip: str,
|
||||||
|
events: list[LogEvent],
|
||||||
|
traversal: Any,
|
||||||
|
bounties: list[dict[str, Any]],
|
||||||
|
commands: list[dict[str, Any]],
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
services = sorted({e.service for e in events})
|
||||||
|
deckies = (
|
||||||
|
traversal.deckies
|
||||||
|
if traversal
|
||||||
|
else _first_contact_deckies(events)
|
||||||
|
)
|
||||||
|
fingerprints = [b for b in bounties if b.get("bounty_type") == "fingerprint"]
|
||||||
|
credential_count = sum(1 for b in bounties if b.get("bounty_type") == "credential")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"ip": ip,
|
||||||
|
"first_seen": min(e.timestamp for e in events),
|
||||||
|
"last_seen": max(e.timestamp for e in events),
|
||||||
|
"event_count": len(events),
|
||||||
|
"service_count": len(services),
|
||||||
|
"decky_count": len({e.decky for e in events}),
|
||||||
|
"services": json.dumps(services),
|
||||||
|
"deckies": json.dumps(deckies),
|
||||||
|
"traversal_path": traversal.path if traversal else None,
|
||||||
|
"is_traversal": traversal is not None,
|
||||||
|
"bounty_count": len(bounties),
|
||||||
|
"credential_count": credential_count,
|
||||||
|
"fingerprints": json.dumps(fingerprints),
|
||||||
|
"commands": json.dumps(commands),
|
||||||
|
"updated_at": datetime.now(timezone.utc),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _first_contact_deckies(events: list[LogEvent]) -> list[str]:
|
||||||
|
"""Return unique deckies in first-contact order (for non-traversal attackers)."""
|
||||||
|
seen: list[str] = []
|
||||||
|
for e in sorted(events, key=lambda x: x.timestamp):
|
||||||
|
if e.decky not in seen:
|
||||||
|
seen.append(e.decky)
|
||||||
|
return seen
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_commands_from_events(events: list[LogEvent]) -> list[dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Extract executed commands from LogEvent objects.
|
||||||
|
|
||||||
|
Works directly on LogEvent.fields (already a dict), so no JSON parsing needed.
|
||||||
|
"""
|
||||||
|
commands: list[dict[str, Any]] = []
|
||||||
|
for event in events:
|
||||||
|
if event.event_type not in _COMMAND_EVENT_TYPES:
|
||||||
|
continue
|
||||||
|
|
||||||
|
cmd_text: str | None = None
|
||||||
|
for key in _COMMAND_FIELDS:
|
||||||
|
val = event.fields.get(key)
|
||||||
|
if val:
|
||||||
|
cmd_text = str(val)
|
||||||
|
break
|
||||||
|
|
||||||
|
if not cmd_text:
|
||||||
|
continue
|
||||||
|
|
||||||
|
commands.append({
|
||||||
|
"service": event.service,
|
||||||
|
"decky": event.decky,
|
||||||
|
"command": cmd_text,
|
||||||
|
"timestamp": event.timestamp.isoformat(),
|
||||||
|
})
|
||||||
|
|
||||||
|
return commands
|
||||||
@@ -13,6 +13,7 @@ class BaseService(ABC):
|
|||||||
name: str # unique slug, e.g. "ssh", "smb"
|
name: str # unique slug, e.g. "ssh", "smb"
|
||||||
ports: list[int] # ports this service listens on inside the container
|
ports: list[int] # ports this service listens on inside the container
|
||||||
default_image: str # Docker image tag, or "build" if a Dockerfile is needed
|
default_image: str # Docker image tag, or "build" if a Dockerfile is needed
|
||||||
|
fleet_singleton: bool = False # True = runs once fleet-wide, not per-decky
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def compose_fragment(
|
def compose_fragment(
|
||||||
|
|||||||
@@ -32,4 +32,4 @@ class ConpotService(BaseService):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def dockerfile_context(self):
|
def dockerfile_context(self):
|
||||||
return Path(__file__).parent.parent.parent / "templates" / "conpot"
|
return Path(__file__).parent.parent / "templates" / "conpot"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "docker_api"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "docker_api"
|
||||||
|
|
||||||
|
|
||||||
class DockerAPIService(BaseService):
|
class DockerAPIService(BaseService):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "elasticsearch"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "elasticsearch"
|
||||||
|
|
||||||
|
|
||||||
class ElasticsearchService(BaseService):
|
class ElasticsearchService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "ftp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ftp"
|
||||||
|
|
||||||
|
|
||||||
class FTPService(BaseService):
|
class FTPService(BaseService):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import json
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "http"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "http"
|
||||||
|
|
||||||
|
|
||||||
class HTTPService(BaseService):
|
class HTTPService(BaseService):
|
||||||
|
|||||||
59
decnet/services/https.py
Normal file
59
decnet/services/https.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "https"
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPSService(BaseService):
|
||||||
|
name = "https"
|
||||||
|
ports = [443]
|
||||||
|
default_image = "build"
|
||||||
|
|
||||||
|
def compose_fragment(
|
||||||
|
self,
|
||||||
|
decky_name: str,
|
||||||
|
log_target: str | None = None,
|
||||||
|
service_cfg: dict | None = None,
|
||||||
|
) -> dict:
|
||||||
|
cfg = service_cfg or {}
|
||||||
|
fragment: dict = {
|
||||||
|
"build": {"context": str(TEMPLATES_DIR)},
|
||||||
|
"container_name": f"{decky_name}-https",
|
||||||
|
"restart": "unless-stopped",
|
||||||
|
"environment": {
|
||||||
|
"NODE_NAME": decky_name,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if log_target:
|
||||||
|
fragment["environment"]["LOG_TARGET"] = log_target
|
||||||
|
|
||||||
|
# Optional persona overrides — only injected when explicitly set
|
||||||
|
if "server_header" in cfg:
|
||||||
|
fragment["environment"]["SERVER_HEADER"] = cfg["server_header"]
|
||||||
|
if "response_code" in cfg:
|
||||||
|
fragment["environment"]["RESPONSE_CODE"] = str(cfg["response_code"])
|
||||||
|
if "fake_app" in cfg:
|
||||||
|
fragment["environment"]["FAKE_APP"] = cfg["fake_app"]
|
||||||
|
if "extra_headers" in cfg:
|
||||||
|
val = cfg["extra_headers"]
|
||||||
|
fragment["environment"]["EXTRA_HEADERS"] = (
|
||||||
|
json.dumps(val) if isinstance(val, dict) else val
|
||||||
|
)
|
||||||
|
if "custom_body" in cfg:
|
||||||
|
fragment["environment"]["CUSTOM_BODY"] = cfg["custom_body"]
|
||||||
|
if "files" in cfg:
|
||||||
|
files_path = str(Path(cfg["files"]).resolve())
|
||||||
|
fragment["environment"]["FILES_DIR"] = "/opt/html_files"
|
||||||
|
fragment.setdefault("volumes", []).append(f"{files_path}:/opt/html_files:ro")
|
||||||
|
if "tls_cert" in cfg:
|
||||||
|
fragment["environment"]["TLS_CERT"] = cfg["tls_cert"]
|
||||||
|
if "tls_key" in cfg:
|
||||||
|
fragment["environment"]["TLS_KEY"] = cfg["tls_key"]
|
||||||
|
if "tls_cn" in cfg:
|
||||||
|
fragment["environment"]["TLS_CN"] = cfg["tls_cn"]
|
||||||
|
|
||||||
|
return fragment
|
||||||
|
|
||||||
|
def dockerfile_context(self) -> Path | None:
|
||||||
|
return TEMPLATES_DIR
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "imap"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "imap"
|
||||||
|
|
||||||
|
|
||||||
class IMAPService(BaseService):
|
class IMAPService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "k8s"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "k8s"
|
||||||
|
|
||||||
|
|
||||||
class KubernetesAPIService(BaseService):
|
class KubernetesAPIService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "ldap"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ldap"
|
||||||
|
|
||||||
|
|
||||||
class LDAPService(BaseService):
|
class LDAPService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "llmnr"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "llmnr"
|
||||||
|
|
||||||
|
|
||||||
class LLMNRService(BaseService):
|
class LLMNRService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mongodb"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mongodb"
|
||||||
|
|
||||||
|
|
||||||
class MongoDBService(BaseService):
|
class MongoDBService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mqtt"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mqtt"
|
||||||
|
|
||||||
|
|
||||||
class MQTTService(BaseService):
|
class MQTTService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mssql"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mssql"
|
||||||
|
|
||||||
|
|
||||||
class MSSQLService(BaseService):
|
class MSSQLService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mysql"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mysql"
|
||||||
|
|
||||||
|
|
||||||
class MySQLService(BaseService):
|
class MySQLService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "pop3"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "pop3"
|
||||||
|
|
||||||
|
|
||||||
class POP3Service(BaseService):
|
class POP3Service(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "postgres"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "postgres"
|
||||||
|
|
||||||
|
|
||||||
class PostgresService(BaseService):
|
class PostgresService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "rdp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "rdp"
|
||||||
|
|
||||||
|
|
||||||
class RDPService(BaseService):
|
class RDPService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "redis"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "redis"
|
||||||
|
|
||||||
|
|
||||||
class RedisService(BaseService):
|
class RedisService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "sip"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "sip"
|
||||||
|
|
||||||
|
|
||||||
class SIPService(BaseService):
|
class SIPService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "smb"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smb"
|
||||||
|
|
||||||
|
|
||||||
class SMBService(BaseService):
|
class SMBService(BaseService):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "smtp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smtp"
|
||||||
|
|
||||||
|
|
||||||
class SMTPService(BaseService):
|
class SMTPService(BaseService):
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from decnet.services.base import BaseService
|
|||||||
|
|
||||||
# Reuses the same template as the smtp service — only difference is
|
# Reuses the same template as the smtp service — only difference is
|
||||||
# SMTP_OPEN_RELAY=1 in the environment, which enables the open relay persona.
|
# SMTP_OPEN_RELAY=1 in the environment, which enables the open relay persona.
|
||||||
_TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "smtp"
|
_TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smtp"
|
||||||
|
|
||||||
|
|
||||||
class SMTPRelayService(BaseService):
|
class SMTPRelayService(BaseService):
|
||||||
|
|||||||
41
decnet/services/sniffer.py
Normal file
41
decnet/services/sniffer.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "sniffer"
|
||||||
|
|
||||||
|
|
||||||
|
class SnifferService(BaseService):
|
||||||
|
"""
|
||||||
|
Passive network sniffer deployed alongside deckies on the MACVLAN.
|
||||||
|
|
||||||
|
Captures TLS handshakes in promiscuous mode and extracts JA3/JA3S hashes
|
||||||
|
plus connection metadata. Requires NET_RAW + NET_ADMIN capabilities.
|
||||||
|
No inbound ports — purely passive.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "sniffer"
|
||||||
|
ports: list[int] = []
|
||||||
|
default_image = "build"
|
||||||
|
fleet_singleton = True
|
||||||
|
|
||||||
|
def compose_fragment(
|
||||||
|
self,
|
||||||
|
decky_name: str,
|
||||||
|
log_target: str | None = None,
|
||||||
|
service_cfg: dict | None = None,
|
||||||
|
) -> dict:
|
||||||
|
fragment: dict = {
|
||||||
|
"build": {"context": str(TEMPLATES_DIR)},
|
||||||
|
"container_name": f"{decky_name}-sniffer",
|
||||||
|
"restart": "unless-stopped",
|
||||||
|
"cap_add": ["NET_RAW", "NET_ADMIN"],
|
||||||
|
"environment": {
|
||||||
|
"NODE_NAME": decky_name,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if log_target:
|
||||||
|
fragment["environment"]["LOG_TARGET"] = log_target
|
||||||
|
return fragment
|
||||||
|
|
||||||
|
def dockerfile_context(self) -> Path | None:
|
||||||
|
return TEMPLATES_DIR
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "snmp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "snmp"
|
||||||
|
|
||||||
|
|
||||||
class SNMPService(BaseService):
|
class SNMPService(BaseService):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "ssh"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ssh"
|
||||||
|
|
||||||
|
|
||||||
class SSHService(BaseService):
|
class SSHService(BaseService):
|
||||||
@@ -32,16 +32,28 @@ class SSHService(BaseService):
|
|||||||
cfg = service_cfg or {}
|
cfg = service_cfg or {}
|
||||||
env: dict = {
|
env: dict = {
|
||||||
"SSH_ROOT_PASSWORD": cfg.get("password", "admin"),
|
"SSH_ROOT_PASSWORD": cfg.get("password", "admin"),
|
||||||
|
# NODE_NAME is the authoritative decky identifier for log
|
||||||
|
# attribution — matches the host path used for the artifacts
|
||||||
|
# bind mount below. The container hostname (optionally overridden
|
||||||
|
# via SSH_HOSTNAME) is cosmetic and may differ to keep the
|
||||||
|
# decoy looking heterogeneous.
|
||||||
|
"NODE_NAME": decky_name,
|
||||||
}
|
}
|
||||||
if "hostname" in cfg:
|
if "hostname" in cfg:
|
||||||
env["SSH_HOSTNAME"] = cfg["hostname"]
|
env["SSH_HOSTNAME"] = cfg["hostname"]
|
||||||
|
|
||||||
|
# File-catcher quarantine: bind-mount a per-decky host dir so attacker
|
||||||
|
# drops (scp/sftp/wget) are mirrored out-of-band for forensic analysis.
|
||||||
|
# The in-container path masquerades as systemd-coredump so `mount`/`df`
|
||||||
|
# from inside the container looks benign.
|
||||||
|
quarantine_host = f"/var/lib/decnet/artifacts/{decky_name}/ssh"
|
||||||
return {
|
return {
|
||||||
"build": {"context": str(TEMPLATES_DIR)},
|
"build": {"context": str(TEMPLATES_DIR)},
|
||||||
"container_name": f"{decky_name}-ssh",
|
"container_name": f"{decky_name}-ssh",
|
||||||
"restart": "unless-stopped",
|
"restart": "unless-stopped",
|
||||||
"cap_add": ["NET_BIND_SERVICE"],
|
"cap_add": ["NET_BIND_SERVICE"],
|
||||||
"environment": env,
|
"environment": env,
|
||||||
|
"volumes": [f"{quarantine_host}:/var/lib/systemd/coredump:rw"],
|
||||||
}
|
}
|
||||||
|
|
||||||
def dockerfile_context(self) -> Path:
|
def dockerfile_context(self) -> Path:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "telnet"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "telnet"
|
||||||
|
|
||||||
|
|
||||||
class TelnetService(BaseService):
|
class TelnetService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "tftp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "tftp"
|
||||||
|
|
||||||
|
|
||||||
class TFTPService(BaseService):
|
class TFTPService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "vnc"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "vnc"
|
||||||
|
|
||||||
|
|
||||||
class VNCService(BaseService):
|
class VNCService(BaseService):
|
||||||
|
|||||||
11
decnet/sniffer/__init__.py
Normal file
11
decnet/sniffer/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
"""
|
||||||
|
Fleet-wide MACVLAN sniffer microservice.
|
||||||
|
|
||||||
|
Runs as a single host-side background task (not per-decky) that sniffs
|
||||||
|
all TLS traffic on the MACVLAN interface, extracts fingerprints, and
|
||||||
|
feeds events into the existing log pipeline.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from decnet.sniffer.worker import sniffer_worker
|
||||||
|
|
||||||
|
__all__ = ["sniffer_worker"]
|
||||||
1166
decnet/sniffer/fingerprint.py
Normal file
1166
decnet/sniffer/fingerprint.py
Normal file
File diff suppressed because it is too large
Load Diff
238
decnet/sniffer/p0f.py
Normal file
238
decnet/sniffer/p0f.py
Normal file
@@ -0,0 +1,238 @@
|
|||||||
|
"""
|
||||||
|
Passive OS fingerprinting (p0f-lite) for the DECNET sniffer.
|
||||||
|
|
||||||
|
Pure-Python lookup module. Given the values of an incoming TCP SYN packet
|
||||||
|
(TTL, window, MSS, window-scale, and TCP option ordering), returns a coarse
|
||||||
|
OS bucket (linux / windows / macos_ios / freebsd / openbsd / nmap / unknown)
|
||||||
|
plus derived hop distance and inferred initial TTL.
|
||||||
|
|
||||||
|
Rationale
|
||||||
|
---------
|
||||||
|
Full p0f v3 distinguishes several dozen OS/tool profiles by combining dozens
|
||||||
|
of low-level quirks (OLEN, WSIZE, EOL padding, PCLASS, quirks, payload class).
|
||||||
|
For DECNET we only need a coarse bucket — enough to tag an attacker as
|
||||||
|
"linux beacon" vs "windows interactive" vs "active scan". The curated
|
||||||
|
table below covers default stacks that dominate real-world attacker traffic.
|
||||||
|
|
||||||
|
References (public p0f v3 DB, nmap-os-db, and Mozilla OS Fingerprint table):
|
||||||
|
https://github.com/p0f/p0f/blob/master/p0f.fp
|
||||||
|
|
||||||
|
No external dependencies.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.telemetry import traced as _traced
|
||||||
|
|
||||||
|
# ─── TTL → initial TTL bucket ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
# Common "hop 0" TTLs. Packets decrement TTL once per hop, so we round up
|
||||||
|
# the observed TTL to the nearest known starting value.
|
||||||
|
_TTL_BUCKETS: tuple[int, ...] = (32, 64, 128, 255)
|
||||||
|
|
||||||
|
|
||||||
|
def initial_ttl(ttl: int) -> int:
|
||||||
|
"""
|
||||||
|
Round *ttl* up to the nearest known initial-TTL bucket.
|
||||||
|
|
||||||
|
A SYN with TTL=59 was almost certainly emitted by a Linux/BSD host
|
||||||
|
(initial 64) five hops away; TTL=120 by a Windows host (initial 128)
|
||||||
|
eight hops away.
|
||||||
|
"""
|
||||||
|
for bucket in _TTL_BUCKETS:
|
||||||
|
if ttl <= bucket:
|
||||||
|
return bucket
|
||||||
|
return 255
|
||||||
|
|
||||||
|
|
||||||
|
def hop_distance(ttl: int) -> int:
|
||||||
|
"""
|
||||||
|
Estimate hops between the attacker and the sniffer based on TTL.
|
||||||
|
|
||||||
|
Upper-bounded at 64 (anything further has most likely been mangled
|
||||||
|
by a misconfigured firewall or a TTL-spoofing NAT).
|
||||||
|
"""
|
||||||
|
dist = initial_ttl(ttl) - ttl
|
||||||
|
if dist < 0:
|
||||||
|
return 0
|
||||||
|
if dist > 64:
|
||||||
|
return 64
|
||||||
|
return dist
|
||||||
|
|
||||||
|
|
||||||
|
# ─── OS signature table (TTL bucket, window, MSS, wscale, option-order) ─────
|
||||||
|
|
||||||
|
# Each entry is a set of loose predicates. If all predicates match, the
|
||||||
|
# OS label is returned. First-match wins. `None` means "don't care".
|
||||||
|
#
|
||||||
|
# The option signatures use the short-code alphabet from
|
||||||
|
# decnet/prober/tcpfp.py :: _OPT_CODES (M=MSS, N=NOP, W=WScale,
|
||||||
|
# T=Timestamp, S=SAckOK, E=EOL).
|
||||||
|
|
||||||
|
_SIGNATURES: tuple[tuple[dict, str], ...] = (
|
||||||
|
# ── nmap -sS / -sT default probe ───────────────────────────────────────
|
||||||
|
# nmap crafts very distinctive SYNs: tiny window (1024/4096/etc.), full
|
||||||
|
# option set including WScale=10 and SAckOK. Match these first so they
|
||||||
|
# don't get misclassified as Linux.
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ttl_bucket": 64,
|
||||||
|
"window_in": {1024, 2048, 3072, 4096, 31337, 32768, 65535},
|
||||||
|
"mss": 1460,
|
||||||
|
"wscale": 10,
|
||||||
|
"options": "M,W,T,S,S",
|
||||||
|
},
|
||||||
|
"nmap",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ttl_bucket": 64,
|
||||||
|
"window_in": {1024, 2048, 3072, 4096, 31337, 32768, 65535},
|
||||||
|
"options_starts_with": "M,W,T,S",
|
||||||
|
},
|
||||||
|
"nmap",
|
||||||
|
),
|
||||||
|
# ── macOS / iOS default SYN (match before Linux — shares TTL 64) ──────
|
||||||
|
# TTL 64, window 65535, MSS 1460, WScale 6, specific option order
|
||||||
|
# M,N,W,N,N,T,S,E (Darwin signature with EOL padding).
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ttl_bucket": 64,
|
||||||
|
"window": 65535,
|
||||||
|
"wscale": 6,
|
||||||
|
"options": "M,N,W,N,N,T,S,E",
|
||||||
|
},
|
||||||
|
"macos_ios",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ttl_bucket": 64,
|
||||||
|
"window_in": {65535},
|
||||||
|
"wscale_in": {5, 6},
|
||||||
|
"has_timestamps": True,
|
||||||
|
"options_ends_with": "E",
|
||||||
|
},
|
||||||
|
"macos_ios",
|
||||||
|
),
|
||||||
|
# ── FreeBSD default SYN (TTL 64, no EOL) ───────────────────────────────
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ttl_bucket": 64,
|
||||||
|
"window": 65535,
|
||||||
|
"wscale": 6,
|
||||||
|
"has_sack": True,
|
||||||
|
"has_timestamps": True,
|
||||||
|
"options_no_eol": True,
|
||||||
|
},
|
||||||
|
"freebsd",
|
||||||
|
),
|
||||||
|
# ── Linux (kernel 3.x – 6.x) default SYN ───────────────────────────────
|
||||||
|
# TTL 64, window 29200 / 64240 / 65535, MSS 1460, WScale 7, full options.
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ttl_bucket": 64,
|
||||||
|
"window_min": 5000,
|
||||||
|
"wscale_in": {6, 7, 8, 9, 10, 11, 12, 13, 14},
|
||||||
|
"has_sack": True,
|
||||||
|
"has_timestamps": True,
|
||||||
|
},
|
||||||
|
"linux",
|
||||||
|
),
|
||||||
|
# ── OpenBSD default SYN ─────────────────────────────────────────────────
|
||||||
|
# TTL 64, window 16384, WScale 3-6, MSS 1460
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ttl_bucket": 64,
|
||||||
|
"window_in": {16384, 16960},
|
||||||
|
"wscale_in": {3, 4, 5, 6},
|
||||||
|
},
|
||||||
|
"openbsd",
|
||||||
|
),
|
||||||
|
# ── Windows 10/11/Server default SYN ────────────────────────────────────
|
||||||
|
# TTL 128, window 64240/65535, MSS 1460, WScale 8, SACK+TS
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ttl_bucket": 128,
|
||||||
|
"window_min": 8192,
|
||||||
|
"wscale_in": {2, 6, 7, 8},
|
||||||
|
"has_sack": True,
|
||||||
|
},
|
||||||
|
"windows",
|
||||||
|
),
|
||||||
|
# ── Windows 7/XP (legacy) ───────────────────────────────────────────────
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ttl_bucket": 128,
|
||||||
|
"window_in": {8192, 16384, 65535},
|
||||||
|
},
|
||||||
|
"windows",
|
||||||
|
),
|
||||||
|
# ── Embedded / Cisco / network gear ─────────────────────────────────────
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ttl_bucket": 255,
|
||||||
|
},
|
||||||
|
"embedded",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _match_signature(
|
||||||
|
sig: dict,
|
||||||
|
ttl: int,
|
||||||
|
window: int,
|
||||||
|
mss: int,
|
||||||
|
wscale: int | None,
|
||||||
|
options_sig: str,
|
||||||
|
) -> bool:
|
||||||
|
"""Evaluate every predicate in *sig* against the observed values."""
|
||||||
|
tb = initial_ttl(ttl)
|
||||||
|
if "ttl_bucket" in sig and sig["ttl_bucket"] != tb:
|
||||||
|
return False
|
||||||
|
if "window" in sig and sig["window"] != window:
|
||||||
|
return False
|
||||||
|
if "window_in" in sig and window not in sig["window_in"]:
|
||||||
|
return False
|
||||||
|
if "window_min" in sig and window < sig["window_min"]:
|
||||||
|
return False
|
||||||
|
if "mss" in sig and sig["mss"] != mss:
|
||||||
|
return False
|
||||||
|
if "wscale" in sig and sig["wscale"] != wscale:
|
||||||
|
return False
|
||||||
|
if "wscale_in" in sig and wscale not in sig["wscale_in"]:
|
||||||
|
return False
|
||||||
|
if "has_sack" in sig:
|
||||||
|
if sig["has_sack"] != ("S" in options_sig):
|
||||||
|
return False
|
||||||
|
if "has_timestamps" in sig:
|
||||||
|
if sig["has_timestamps"] != ("T" in options_sig):
|
||||||
|
return False
|
||||||
|
if "options" in sig and sig["options"] != options_sig:
|
||||||
|
return False
|
||||||
|
if "options_starts_with" in sig and not options_sig.startswith(sig["options_starts_with"]):
|
||||||
|
return False
|
||||||
|
if "options_ends_with" in sig and not options_sig.endswith(sig["options_ends_with"]):
|
||||||
|
return False
|
||||||
|
if "options_no_eol" in sig and sig["options_no_eol"] and "E" in options_sig:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("sniffer.p0f_guess_os")
|
||||||
|
def guess_os(
|
||||||
|
ttl: int,
|
||||||
|
window: int,
|
||||||
|
mss: int = 0,
|
||||||
|
wscale: int | None = None,
|
||||||
|
options_sig: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Return a coarse OS bucket for the given SYN characteristics.
|
||||||
|
|
||||||
|
One of: "linux", "windows", "macos_ios", "freebsd", "openbsd",
|
||||||
|
"embedded", "nmap", "unknown".
|
||||||
|
"""
|
||||||
|
for sig, label in _SIGNATURES:
|
||||||
|
if _match_signature(sig, ttl, window, mss, wscale, options_sig):
|
||||||
|
return label
|
||||||
|
return "unknown"
|
||||||
71
decnet/sniffer/syslog.py
Normal file
71
decnet/sniffer/syslog.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
"""
|
||||||
|
RFC 5424 syslog formatting and log-file writing for the fleet sniffer.
|
||||||
|
|
||||||
|
Reuses the same wire format as templates/sniffer/decnet_logging.py so the
|
||||||
|
existing collector parser and ingester can consume events without changes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from decnet.collector.worker import parse_rfc5424
|
||||||
|
from decnet.telemetry import traced as _traced
|
||||||
|
|
||||||
|
# ─── Constants (must match templates/sniffer/decnet_logging.py) ──────────────
|
||||||
|
|
||||||
|
_FACILITY_LOCAL0 = 16
|
||||||
|
_SD_ID = "relay@55555"
|
||||||
|
_NILVALUE = "-"
|
||||||
|
|
||||||
|
SEVERITY_INFO = 6
|
||||||
|
SEVERITY_WARNING = 4
|
||||||
|
|
||||||
|
_MAX_HOSTNAME = 255
|
||||||
|
_MAX_APPNAME = 48
|
||||||
|
_MAX_MSGID = 32
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Formatter ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _sd_escape(value: str) -> str:
|
||||||
|
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
|
||||||
|
|
||||||
|
|
||||||
|
def _sd_element(fields: dict[str, Any]) -> str:
|
||||||
|
if not fields:
|
||||||
|
return _NILVALUE
|
||||||
|
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
|
||||||
|
return f"[{_SD_ID} {params}]"
|
||||||
|
|
||||||
|
|
||||||
|
def syslog_line(
|
||||||
|
service: str,
|
||||||
|
hostname: str,
|
||||||
|
event_type: str,
|
||||||
|
severity: int = SEVERITY_INFO,
|
||||||
|
msg: str | None = None,
|
||||||
|
**fields: Any,
|
||||||
|
) -> str:
|
||||||
|
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
|
||||||
|
ts = datetime.now(timezone.utc).isoformat()
|
||||||
|
host = (hostname or _NILVALUE)[:_MAX_HOSTNAME]
|
||||||
|
appname = (service or _NILVALUE)[:_MAX_APPNAME]
|
||||||
|
msgid = (event_type or _NILVALUE)[:_MAX_MSGID]
|
||||||
|
sd = _sd_element(fields)
|
||||||
|
message = f" {msg}" if msg else ""
|
||||||
|
return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}"
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("sniffer.write_event")
|
||||||
|
def write_event(line: str, log_path: Path, json_path: Path) -> None:
|
||||||
|
"""Append a syslog line to the raw log and its parsed JSON to the json log."""
|
||||||
|
with open(log_path, "a", encoding="utf-8") as lf:
|
||||||
|
lf.write(line + "\n")
|
||||||
|
lf.flush()
|
||||||
|
parsed = parse_rfc5424(line)
|
||||||
|
if parsed:
|
||||||
|
with open(json_path, "a", encoding="utf-8") as jf:
|
||||||
|
jf.write(json.dumps(parsed) + "\n")
|
||||||
|
jf.flush()
|
||||||
176
decnet/sniffer/worker.py
Normal file
176
decnet/sniffer/worker.py
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
"""
|
||||||
|
Fleet-wide MACVLAN sniffer worker.
|
||||||
|
|
||||||
|
Runs as a single host-side async background task that sniffs all TLS
|
||||||
|
traffic on the MACVLAN host interface. Maps packets to deckies by IP
|
||||||
|
and feeds fingerprint events into the existing log pipeline.
|
||||||
|
|
||||||
|
Modeled on decnet.collector.worker — same lifecycle pattern.
|
||||||
|
Fault-isolated: any exception is logged and the worker exits cleanly.
|
||||||
|
The API never depends on this worker being alive.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import subprocess # nosec B404 — needed for interface checks
|
||||||
|
import threading
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.network import HOST_IPVLAN_IFACE, HOST_MACVLAN_IFACE
|
||||||
|
from decnet.sniffer.fingerprint import SnifferEngine
|
||||||
|
from decnet.sniffer.syslog import write_event
|
||||||
|
from decnet.telemetry import traced as _traced
|
||||||
|
|
||||||
|
logger = get_logger("sniffer")
|
||||||
|
|
||||||
|
_IP_MAP_REFRESH_INTERVAL: float = 60.0
|
||||||
|
|
||||||
|
|
||||||
|
def _load_ip_to_decky() -> dict[str, str]:
|
||||||
|
"""Build IP → decky-name mapping from decnet-state.json."""
|
||||||
|
from decnet.config import load_state
|
||||||
|
state = load_state()
|
||||||
|
if state is None:
|
||||||
|
return {}
|
||||||
|
config, _ = state
|
||||||
|
mapping: dict[str, str] = {}
|
||||||
|
for decky in config.deckies:
|
||||||
|
mapping[decky.ip] = decky.name
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
|
||||||
|
def _interface_exists(iface: str) -> bool:
|
||||||
|
"""Check if a network interface exists on this host."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run( # nosec B603 B607 — hardcoded args
|
||||||
|
["ip", "link", "show", iface],
|
||||||
|
capture_output=True, text=True, check=False,
|
||||||
|
)
|
||||||
|
return result.returncode == 0
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("sniffer.sniff_loop")
|
||||||
|
def _sniff_loop(
|
||||||
|
interface: str,
|
||||||
|
log_path: Path,
|
||||||
|
json_path: Path,
|
||||||
|
stop_event: threading.Event,
|
||||||
|
) -> None:
|
||||||
|
"""Blocking sniff loop. Runs in a dedicated thread via asyncio.to_thread."""
|
||||||
|
try:
|
||||||
|
from scapy.sendrecv import sniff
|
||||||
|
except ImportError:
|
||||||
|
logger.error("scapy not installed — sniffer cannot start")
|
||||||
|
return
|
||||||
|
|
||||||
|
ip_map = _load_ip_to_decky()
|
||||||
|
if not ip_map:
|
||||||
|
logger.warning("sniffer: no deckies in state — nothing to sniff")
|
||||||
|
return
|
||||||
|
|
||||||
|
def _write_fn(line: str) -> None:
|
||||||
|
write_event(line, log_path, json_path)
|
||||||
|
|
||||||
|
engine = SnifferEngine(ip_to_decky=ip_map, write_fn=_write_fn)
|
||||||
|
|
||||||
|
# Periodically refresh IP map in a background daemon thread
|
||||||
|
def _refresh_loop() -> None:
|
||||||
|
while not stop_event.is_set():
|
||||||
|
stop_event.wait(_IP_MAP_REFRESH_INTERVAL)
|
||||||
|
if stop_event.is_set():
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
new_map = _load_ip_to_decky()
|
||||||
|
if new_map:
|
||||||
|
engine.update_ip_map(new_map)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("sniffer: ip map refresh failed: %s", exc)
|
||||||
|
|
||||||
|
refresh_thread = threading.Thread(target=_refresh_loop, daemon=True)
|
||||||
|
refresh_thread.start()
|
||||||
|
|
||||||
|
logger.info("sniffer: sniffing on interface=%s deckies=%d", interface, len(ip_map))
|
||||||
|
|
||||||
|
try:
|
||||||
|
sniff(
|
||||||
|
iface=interface,
|
||||||
|
filter="tcp",
|
||||||
|
prn=engine.on_packet,
|
||||||
|
store=False,
|
||||||
|
stop_filter=lambda pkt: stop_event.is_set(),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("sniffer: scapy sniff exited: %s", exc)
|
||||||
|
finally:
|
||||||
|
stop_event.set()
|
||||||
|
logger.info("sniffer: sniff loop ended")
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("sniffer.worker")
|
||||||
|
async def sniffer_worker(log_file: str) -> None:
|
||||||
|
"""
|
||||||
|
Async entry point — started as asyncio.create_task in the API lifespan.
|
||||||
|
|
||||||
|
Fully fault-isolated: catches all exceptions, logs them, and returns
|
||||||
|
cleanly. The API continues running regardless of sniffer state.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Interface selection: explicit env override wins, otherwise probe
|
||||||
|
# both the MACVLAN and IPvlan host-side names since the driver
|
||||||
|
# choice is per-deploy (--ipvlan flag).
|
||||||
|
env_iface = os.environ.get("DECNET_SNIFFER_IFACE")
|
||||||
|
if env_iface:
|
||||||
|
interface = env_iface
|
||||||
|
elif _interface_exists(HOST_MACVLAN_IFACE):
|
||||||
|
interface = HOST_MACVLAN_IFACE
|
||||||
|
elif _interface_exists(HOST_IPVLAN_IFACE):
|
||||||
|
interface = HOST_IPVLAN_IFACE
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"sniffer: neither %s nor %s found — sniffer disabled "
|
||||||
|
"(fleet may not be deployed yet)",
|
||||||
|
HOST_MACVLAN_IFACE, HOST_IPVLAN_IFACE,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not _interface_exists(interface):
|
||||||
|
logger.warning(
|
||||||
|
"sniffer: interface %s not found — sniffer disabled "
|
||||||
|
"(fleet may not be deployed yet)", interface,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
log_path = Path(log_file)
|
||||||
|
json_path = log_path.with_suffix(".json")
|
||||||
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
stop_event = threading.Event()
|
||||||
|
|
||||||
|
# Dedicated thread pool so the long-running sniff loop doesn't
|
||||||
|
# occupy a slot in the default asyncio executor.
|
||||||
|
sniffer_pool = ThreadPoolExecutor(
|
||||||
|
max_workers=2, thread_name_prefix="decnet-sniffer",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
await loop.run_in_executor(
|
||||||
|
sniffer_pool, _sniff_loop,
|
||||||
|
interface, log_path, json_path, stop_event,
|
||||||
|
)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
logger.info("sniffer: shutdown requested")
|
||||||
|
stop_event.set()
|
||||||
|
sniffer_pool.shutdown(wait=False)
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
sniffer_pool.shutdown(wait=False)
|
||||||
|
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("sniffer: worker failed — API continues without sniffing: %s", exc)
|
||||||
7
decnet/swarm/__init__.py
Normal file
7
decnet/swarm/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
"""DECNET SWARM — multihost deployment subsystem.
|
||||||
|
|
||||||
|
Components:
|
||||||
|
* ``pki`` — X.509 CA + CSR signing used by all swarm mTLS channels
|
||||||
|
* ``client`` — master-side HTTP client that talks to remote workers
|
||||||
|
* ``log_forwarder``— worker-side syslog-over-TLS (RFC 5425) forwarder
|
||||||
|
"""
|
||||||
200
decnet/swarm/client.py
Normal file
200
decnet/swarm/client.py
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
"""Master-side HTTP client that talks to a worker's DECNET agent.
|
||||||
|
|
||||||
|
All traffic is mTLS: the master presents a cert issued by its own CA (which
|
||||||
|
workers trust) and the master validates the worker's cert against the same
|
||||||
|
CA. In practice the "client cert" the master shows is just another cert
|
||||||
|
signed by itself — the master is both the CA and the sole control-plane
|
||||||
|
client.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
async with AgentClient(host) as agent:
|
||||||
|
await agent.deploy(config)
|
||||||
|
status = await agent.status()
|
||||||
|
|
||||||
|
The ``host`` is a SwarmHost dict returned by the repository.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pathlib
|
||||||
|
import ssl
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from decnet.config import DecnetConfig
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm import pki
|
||||||
|
|
||||||
|
log = get_logger("swarm.client")
|
||||||
|
|
||||||
|
# How long a single HTTP operation can take. Deploy is the long pole —
|
||||||
|
# docker compose up pulls images, builds contexts, etc. Tune via env in a
|
||||||
|
# later iteration if the default proves too short.
|
||||||
|
_TIMEOUT_DEPLOY = httpx.Timeout(connect=10.0, read=600.0, write=30.0, pool=5.0)
|
||||||
|
_TIMEOUT_CONTROL = httpx.Timeout(connect=5.0, read=15.0, write=5.0, pool=5.0)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class MasterIdentity:
|
||||||
|
"""Paths to the master's own mTLS client bundle.
|
||||||
|
|
||||||
|
The master uses ONE master-client cert to talk to every worker. It is
|
||||||
|
signed by the DECNET CA (same CA that signs worker certs). Stored
|
||||||
|
under ``~/.decnet/ca/master/`` by ``ensure_master_identity``.
|
||||||
|
"""
|
||||||
|
key_path: pathlib.Path
|
||||||
|
cert_path: pathlib.Path
|
||||||
|
ca_cert_path: pathlib.Path
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_master_identity(
|
||||||
|
ca_dir: pathlib.Path = pki.DEFAULT_CA_DIR,
|
||||||
|
) -> MasterIdentity:
|
||||||
|
"""Create (or load) the master's own client cert.
|
||||||
|
|
||||||
|
Called once by the swarm controller on startup and by the CLI before
|
||||||
|
any master→worker call. Idempotent.
|
||||||
|
"""
|
||||||
|
ca = pki.ensure_ca(ca_dir)
|
||||||
|
master_dir = ca_dir / "master"
|
||||||
|
bundle = pki.load_worker_bundle(master_dir)
|
||||||
|
if bundle is None:
|
||||||
|
issued = pki.issue_worker_cert(ca, "decnet-master", ["127.0.0.1", "decnet-master"])
|
||||||
|
pki.write_worker_bundle(issued, master_dir)
|
||||||
|
return MasterIdentity(
|
||||||
|
key_path=master_dir / "worker.key",
|
||||||
|
cert_path=master_dir / "worker.crt",
|
||||||
|
ca_cert_path=master_dir / "ca.crt",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AgentClient:
|
||||||
|
"""Thin async wrapper around the worker agent's HTTP API."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
host: dict[str, Any] | None = None,
|
||||||
|
*,
|
||||||
|
address: Optional[str] = None,
|
||||||
|
agent_port: Optional[int] = None,
|
||||||
|
identity: Optional[MasterIdentity] = None,
|
||||||
|
verify_hostname: bool = False,
|
||||||
|
):
|
||||||
|
"""Either pass a SwarmHost dict, or explicit address/port.
|
||||||
|
|
||||||
|
``verify_hostname`` stays False by default because the worker's
|
||||||
|
cert SAN is populated from the operator-supplied address list, not
|
||||||
|
from modern TLS hostname-verification semantics. The mTLS client
|
||||||
|
cert + CA pinning are what authenticate the peer.
|
||||||
|
"""
|
||||||
|
if host is not None:
|
||||||
|
self._address = host["address"]
|
||||||
|
self._port = int(host.get("agent_port") or 8765)
|
||||||
|
self._host_uuid = host.get("uuid")
|
||||||
|
self._host_name = host.get("name")
|
||||||
|
else:
|
||||||
|
if address is None or agent_port is None:
|
||||||
|
raise ValueError(
|
||||||
|
"AgentClient requires either a host dict or address+agent_port"
|
||||||
|
)
|
||||||
|
self._address = address
|
||||||
|
self._port = int(agent_port)
|
||||||
|
self._host_uuid = None
|
||||||
|
self._host_name = None
|
||||||
|
|
||||||
|
self._identity = identity or ensure_master_identity()
|
||||||
|
self._verify_hostname = verify_hostname
|
||||||
|
self._client: Optional[httpx.AsyncClient] = None
|
||||||
|
|
||||||
|
# --------------------------------------------------------------- lifecycle
|
||||||
|
|
||||||
|
def _build_client(self, timeout: httpx.Timeout) -> httpx.AsyncClient:
|
||||||
|
# Build the SSL context manually — httpx.create_ssl_context layers on
|
||||||
|
# purpose/ALPN/default-CA logic that doesn't compose with private-CA
|
||||||
|
# mTLS in all combinations. A bare SSLContext is predictable.
|
||||||
|
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||||
|
ctx.load_cert_chain(
|
||||||
|
str(self._identity.cert_path), str(self._identity.key_path)
|
||||||
|
)
|
||||||
|
ctx.load_verify_locations(cafile=str(self._identity.ca_cert_path))
|
||||||
|
ctx.verify_mode = ssl.CERT_REQUIRED
|
||||||
|
# Pin by CA + cert chain, not by DNS — workers enroll with arbitrary
|
||||||
|
# SANs (IPs, hostnames) and we don't want to force operators to keep
|
||||||
|
# those in sync with whatever URL the master happens to use.
|
||||||
|
ctx.check_hostname = self._verify_hostname
|
||||||
|
return httpx.AsyncClient(
|
||||||
|
base_url=f"https://{self._address}:{self._port}",
|
||||||
|
verify=ctx,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def __aenter__(self) -> "AgentClient":
|
||||||
|
self._client = self._build_client(_TIMEOUT_CONTROL)
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, *exc: Any) -> None:
|
||||||
|
if self._client:
|
||||||
|
await self._client.aclose()
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
def _require_client(self) -> httpx.AsyncClient:
|
||||||
|
if self._client is None:
|
||||||
|
raise RuntimeError("AgentClient used outside `async with` block")
|
||||||
|
return self._client
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------- RPCs
|
||||||
|
|
||||||
|
async def health(self) -> dict[str, Any]:
|
||||||
|
resp = await self._require_client().get("/health")
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
async def status(self) -> dict[str, Any]:
|
||||||
|
resp = await self._require_client().get("/status")
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
async def deploy(
|
||||||
|
self,
|
||||||
|
config: DecnetConfig,
|
||||||
|
*,
|
||||||
|
dry_run: bool = False,
|
||||||
|
no_cache: bool = False,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
body = {
|
||||||
|
"config": config.model_dump(mode="json"),
|
||||||
|
"dry_run": dry_run,
|
||||||
|
"no_cache": no_cache,
|
||||||
|
}
|
||||||
|
# Swap in a long-deploy timeout for this call only.
|
||||||
|
old = self._require_client().timeout
|
||||||
|
self._require_client().timeout = _TIMEOUT_DEPLOY
|
||||||
|
try:
|
||||||
|
resp = await self._require_client().post("/deploy", json=body)
|
||||||
|
finally:
|
||||||
|
self._require_client().timeout = old
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
async def teardown(self, decky_id: Optional[str] = None) -> dict[str, Any]:
|
||||||
|
resp = await self._require_client().post(
|
||||||
|
"/teardown", json={"decky_id": decky_id}
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
async def self_destruct(self) -> dict[str, Any]:
|
||||||
|
"""Trigger the worker to stop services and wipe its install."""
|
||||||
|
resp = await self._require_client().post("/self-destruct")
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
# -------------------------------------------------------------- diagnostics
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return (
|
||||||
|
f"AgentClient(name={self._host_name!r}, "
|
||||||
|
f"address={self._address!r}, port={self._port})"
|
||||||
|
)
|
||||||
293
decnet/swarm/log_forwarder.py
Normal file
293
decnet/swarm/log_forwarder.py
Normal file
@@ -0,0 +1,293 @@
|
|||||||
|
"""Worker-side syslog-over-TLS forwarder (RFC 5425).
|
||||||
|
|
||||||
|
Runs alongside the worker agent. Tails the worker's local RFC 5424 log
|
||||||
|
file (written by the existing docker-collector) and ships each line to
|
||||||
|
the master's listener on TCP 6514 using octet-counted framing over mTLS.
|
||||||
|
Persists the last-forwarded byte offset in a tiny local SQLite so a
|
||||||
|
master crash never causes loss or duplication.
|
||||||
|
|
||||||
|
Design constraints (from the plan, non-negotiable):
|
||||||
|
* transport MUST be TLS — plaintext syslog is never acceptable between
|
||||||
|
hosts; only loopback (decky → worker-local collector) may be plaintext;
|
||||||
|
* mTLS — the listener pins the worker cert against the DECNET CA, so only
|
||||||
|
enrolled workers can push logs;
|
||||||
|
* offset persistence MUST be transactional w.r.t. the send — we only
|
||||||
|
advance the offset after ``writer.drain()`` returns without error.
|
||||||
|
|
||||||
|
The forwarder is intentionally a standalone coroutine, not a worker
|
||||||
|
inside the agent process. That keeps ``decnet agent`` crashes from
|
||||||
|
losing the log tail, and vice versa.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import sqlite3
|
||||||
|
import ssl
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm import pki
|
||||||
|
|
||||||
|
log = get_logger("swarm.forwarder")
|
||||||
|
|
||||||
|
# RFC 5425 framing: "<octet-count> <syslog-msg>".
|
||||||
|
# The message itself is a standard RFC 5424 line (no trailing newline).
|
||||||
|
_FRAME_SEP = b" "
|
||||||
|
|
||||||
|
_INITIAL_BACKOFF = 1.0
|
||||||
|
_MAX_BACKOFF = 30.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ForwarderConfig:
|
||||||
|
log_path: pathlib.Path # worker's RFC 5424 .log file
|
||||||
|
master_host: str
|
||||||
|
master_port: int = 6514
|
||||||
|
agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR
|
||||||
|
state_db: Optional[pathlib.Path] = None # default: agent_dir / "forwarder.db"
|
||||||
|
# Max unacked bytes to keep in the local buffer when master is down.
|
||||||
|
# We bound the lag to avoid unbounded disk growth on catastrophic master
|
||||||
|
# outage — older lines are surfaced as a warning and dropped by advancing
|
||||||
|
# the offset.
|
||||||
|
max_lag_bytes: int = 128 * 1024 * 1024 # 128 MiB
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------ offset storage
|
||||||
|
|
||||||
|
|
||||||
|
class _OffsetStore:
|
||||||
|
"""Single-row SQLite offset tracker. Stdlib only — no ORM, no async."""
|
||||||
|
|
||||||
|
def __init__(self, db_path: pathlib.Path) -> None:
|
||||||
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self._conn = sqlite3.connect(str(db_path))
|
||||||
|
self._conn.execute(
|
||||||
|
"CREATE TABLE IF NOT EXISTS forwarder_offset ("
|
||||||
|
" key TEXT PRIMARY KEY, offset INTEGER NOT NULL)"
|
||||||
|
)
|
||||||
|
self._conn.commit()
|
||||||
|
|
||||||
|
def get(self, key: str = "default") -> int:
|
||||||
|
row = self._conn.execute(
|
||||||
|
"SELECT offset FROM forwarder_offset WHERE key=?", (key,)
|
||||||
|
).fetchone()
|
||||||
|
return int(row[0]) if row else 0
|
||||||
|
|
||||||
|
def set(self, offset: int, key: str = "default") -> None:
|
||||||
|
self._conn.execute(
|
||||||
|
"INSERT INTO forwarder_offset(key, offset) VALUES(?, ?) "
|
||||||
|
"ON CONFLICT(key) DO UPDATE SET offset=excluded.offset",
|
||||||
|
(key, offset),
|
||||||
|
)
|
||||||
|
self._conn.commit()
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
self._conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------- TLS setup
|
||||||
|
|
||||||
|
|
||||||
|
def build_worker_ssl_context(agent_dir: pathlib.Path) -> ssl.SSLContext:
|
||||||
|
"""Client-side mTLS context for the forwarder.
|
||||||
|
|
||||||
|
Worker presents its agent bundle (same cert used for the control-plane
|
||||||
|
HTTPS listener). The CA is the DECNET CA; we pin by CA, not hostname,
|
||||||
|
because workers reach masters by operator-supplied address.
|
||||||
|
"""
|
||||||
|
bundle = pki.load_worker_bundle(agent_dir)
|
||||||
|
if bundle is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"no worker bundle at {agent_dir} — enroll from the master first"
|
||||||
|
)
|
||||||
|
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||||
|
ctx.load_cert_chain(
|
||||||
|
certfile=str(agent_dir / "worker.crt"),
|
||||||
|
keyfile=str(agent_dir / "worker.key"),
|
||||||
|
)
|
||||||
|
ctx.load_verify_locations(cafile=str(agent_dir / "ca.crt"))
|
||||||
|
ctx.verify_mode = ssl.CERT_REQUIRED
|
||||||
|
ctx.check_hostname = False
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------- frame encoding
|
||||||
|
|
||||||
|
|
||||||
|
def encode_frame(line: str) -> bytes:
|
||||||
|
"""RFC 5425 octet-counted framing: ``"<N> <msg>"``.
|
||||||
|
|
||||||
|
``N`` is the byte length of the payload that follows (after the space).
|
||||||
|
"""
|
||||||
|
payload = line.rstrip("\n").encode("utf-8", errors="replace")
|
||||||
|
return f"{len(payload)}".encode("ascii") + _FRAME_SEP + payload
|
||||||
|
|
||||||
|
|
||||||
|
async def read_frame(reader: asyncio.StreamReader) -> Optional[bytes]:
|
||||||
|
"""Read one octet-counted frame. Returns None on clean EOF."""
|
||||||
|
# Read the ASCII length up to the first space. Bound the prefix so a
|
||||||
|
# malicious peer can't force us to buffer unbounded bytes before we know
|
||||||
|
# it's a valid frame.
|
||||||
|
prefix = b""
|
||||||
|
while True:
|
||||||
|
c = await reader.read(1)
|
||||||
|
if not c:
|
||||||
|
return None if not prefix else b""
|
||||||
|
if c == _FRAME_SEP:
|
||||||
|
break
|
||||||
|
if len(prefix) >= 10 or not c.isdigit():
|
||||||
|
# RFC 5425 caps the length prefix at ~10 digits (< 4 GiB payload).
|
||||||
|
raise ValueError(f"invalid octet-count prefix: {prefix!r}")
|
||||||
|
prefix += c
|
||||||
|
n = int(prefix)
|
||||||
|
buf = await reader.readexactly(n)
|
||||||
|
return buf
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------- main loop
|
||||||
|
|
||||||
|
|
||||||
|
async def _send_batch(
|
||||||
|
writer: asyncio.StreamWriter,
|
||||||
|
offset: int,
|
||||||
|
lines: list[tuple[int, str]],
|
||||||
|
store: _OffsetStore,
|
||||||
|
) -> int:
|
||||||
|
"""Write every line as a frame, drain, then persist the last offset."""
|
||||||
|
for _, line in lines:
|
||||||
|
writer.write(encode_frame(line))
|
||||||
|
await writer.drain()
|
||||||
|
last_offset = lines[-1][0]
|
||||||
|
store.set(last_offset)
|
||||||
|
return last_offset
|
||||||
|
|
||||||
|
|
||||||
|
async def run_forwarder(
|
||||||
|
cfg: ForwarderConfig,
|
||||||
|
*,
|
||||||
|
poll_interval: float = 0.5,
|
||||||
|
stop_event: Optional[asyncio.Event] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Main forwarder loop. Run as a dedicated task.
|
||||||
|
|
||||||
|
Stops when ``stop_event`` is set (used by tests and clean shutdown).
|
||||||
|
Exceptions trigger exponential backoff but are never fatal — the
|
||||||
|
forwarder is expected to outlive transient master/network failures.
|
||||||
|
"""
|
||||||
|
state_db = cfg.state_db or (cfg.agent_dir / "forwarder.db")
|
||||||
|
store = _OffsetStore(state_db)
|
||||||
|
offset = store.get()
|
||||||
|
backoff = _INITIAL_BACKOFF
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"forwarder start log=%s master=%s:%d offset=%d",
|
||||||
|
cfg.log_path, cfg.master_host, cfg.master_port, offset,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
while stop_event is None or not stop_event.is_set():
|
||||||
|
try:
|
||||||
|
ctx = build_worker_ssl_context(cfg.agent_dir)
|
||||||
|
reader, writer = await asyncio.open_connection(
|
||||||
|
cfg.master_host, cfg.master_port, ssl=ctx
|
||||||
|
)
|
||||||
|
log.info("forwarder connected master=%s:%d", cfg.master_host, cfg.master_port)
|
||||||
|
backoff = _INITIAL_BACKOFF
|
||||||
|
try:
|
||||||
|
offset = await _pump(cfg, store, writer, offset, poll_interval, stop_event)
|
||||||
|
finally:
|
||||||
|
writer.close()
|
||||||
|
try:
|
||||||
|
await writer.wait_closed()
|
||||||
|
except Exception: # nosec B110 — socket cleanup is best-effort
|
||||||
|
pass
|
||||||
|
# Keep reader alive until here to avoid "reader garbage
|
||||||
|
# collected" warnings on some Python builds.
|
||||||
|
del reader
|
||||||
|
except (OSError, ssl.SSLError, ConnectionError) as exc:
|
||||||
|
log.warning(
|
||||||
|
"forwarder disconnected: %s — retrying in %.1fs", exc, backoff
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(
|
||||||
|
_sleep_unless_stopped(backoff, stop_event), timeout=backoff + 1
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
pass
|
||||||
|
backoff = min(_MAX_BACKOFF, backoff * 2)
|
||||||
|
finally:
|
||||||
|
store.close()
|
||||||
|
log.info("forwarder stopped offset=%d", offset)
|
||||||
|
|
||||||
|
|
||||||
|
async def _pump(
|
||||||
|
cfg: ForwarderConfig,
|
||||||
|
store: _OffsetStore,
|
||||||
|
writer: asyncio.StreamWriter,
|
||||||
|
offset: int,
|
||||||
|
poll_interval: float,
|
||||||
|
stop_event: Optional[asyncio.Event],
|
||||||
|
) -> int:
|
||||||
|
"""Read new lines since ``offset`` and ship them until disconnect."""
|
||||||
|
while stop_event is None or not stop_event.is_set():
|
||||||
|
if not cfg.log_path.exists():
|
||||||
|
await _sleep_unless_stopped(poll_interval, stop_event)
|
||||||
|
continue
|
||||||
|
|
||||||
|
stat = cfg.log_path.stat()
|
||||||
|
if stat.st_size < offset:
|
||||||
|
# truncated/rotated — reset.
|
||||||
|
log.warning("forwarder log rotated — resetting offset=0")
|
||||||
|
offset = 0
|
||||||
|
store.set(0)
|
||||||
|
if stat.st_size - offset > cfg.max_lag_bytes:
|
||||||
|
# Catastrophic lag — skip ahead to cap local disk pressure.
|
||||||
|
skip_to = stat.st_size - cfg.max_lag_bytes
|
||||||
|
log.warning(
|
||||||
|
"forwarder lag %d > cap %d — dropping oldest %d bytes",
|
||||||
|
stat.st_size - offset, cfg.max_lag_bytes, skip_to - offset,
|
||||||
|
)
|
||||||
|
offset = skip_to
|
||||||
|
store.set(offset)
|
||||||
|
|
||||||
|
if stat.st_size == offset:
|
||||||
|
await _sleep_unless_stopped(poll_interval, stop_event)
|
||||||
|
continue
|
||||||
|
|
||||||
|
batch: list[tuple[int, str]] = []
|
||||||
|
with open(cfg.log_path, "r", encoding="utf-8", errors="replace") as f:
|
||||||
|
f.seek(offset)
|
||||||
|
while True:
|
||||||
|
line = f.readline()
|
||||||
|
if not line or not line.endswith("\n"):
|
||||||
|
break
|
||||||
|
offset_after = f.tell()
|
||||||
|
batch.append((offset_after, line.rstrip("\n")))
|
||||||
|
if len(batch) >= 500:
|
||||||
|
break
|
||||||
|
if batch:
|
||||||
|
offset = await _send_batch(writer, offset, batch, store)
|
||||||
|
return offset
|
||||||
|
|
||||||
|
|
||||||
|
async def _sleep_unless_stopped(
|
||||||
|
seconds: float, stop_event: Optional[asyncio.Event]
|
||||||
|
) -> None:
|
||||||
|
if stop_event is None:
|
||||||
|
await asyncio.sleep(seconds)
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(stop_event.wait(), timeout=seconds)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Re-exported for CLI convenience
|
||||||
|
DEFAULT_PORT = 6514
|
||||||
|
|
||||||
|
|
||||||
|
def default_master_host() -> Optional[str]:
|
||||||
|
return os.environ.get("DECNET_SWARM_MASTER_HOST")
|
||||||
194
decnet/swarm/log_listener.py
Normal file
194
decnet/swarm/log_listener.py
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
"""Master-side syslog-over-TLS listener (RFC 5425).
|
||||||
|
|
||||||
|
Accepts mTLS-authenticated worker connections on TCP 6514, reads
|
||||||
|
octet-counted frames, parses each as an RFC 5424 line, and appends it to
|
||||||
|
the master's local ingest log files. The existing log_ingestion_worker
|
||||||
|
tails those files and inserts records into the master repo — worker
|
||||||
|
provenance is embedded in the parsed record's ``source_worker`` field.
|
||||||
|
|
||||||
|
Design:
|
||||||
|
* TLS is mandatory. No plaintext fallback. A peer without a CA-signed
|
||||||
|
cert is rejected at the TLS handshake; nothing gets past the kernel.
|
||||||
|
* The listener never trusts the syslog HOSTNAME field for provenance —
|
||||||
|
that's attacker-supplied from the decky. The authoritative source is
|
||||||
|
the peer cert's CN, which the CA controlled at enrollment.
|
||||||
|
* Dropped connections are fine — the worker's forwarder holds the
|
||||||
|
offset and resumes from the same byte on reconnect.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
import ssl
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from cryptography import x509
|
||||||
|
from cryptography.hazmat.primitives import serialization
|
||||||
|
from cryptography.x509.oid import NameOID
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm import pki
|
||||||
|
from decnet.swarm.log_forwarder import read_frame
|
||||||
|
|
||||||
|
log = get_logger("swarm.listener")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ListenerConfig:
|
||||||
|
log_path: pathlib.Path # master's RFC 5424 .log (forensic sink)
|
||||||
|
json_path: pathlib.Path # master's .json (ingester tails this)
|
||||||
|
bind_host: str = "0.0.0.0" # nosec B104 — listener must bind publicly
|
||||||
|
bind_port: int = 6514
|
||||||
|
ca_dir: pathlib.Path = pki.DEFAULT_CA_DIR
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------- TLS context
|
||||||
|
|
||||||
|
|
||||||
|
def build_listener_ssl_context(ca_dir: pathlib.Path) -> ssl.SSLContext:
|
||||||
|
"""Server-side mTLS context: master presents its master cert; clients
|
||||||
|
must present a cert signed by the DECNET CA."""
|
||||||
|
master_dir = ca_dir / "master"
|
||||||
|
ca_cert = master_dir / "ca.crt"
|
||||||
|
cert = master_dir / "worker.crt" # master re-uses the 'worker' bundle layout
|
||||||
|
key = master_dir / "worker.key"
|
||||||
|
for p in (ca_cert, cert, key):
|
||||||
|
if not p.exists():
|
||||||
|
raise RuntimeError(
|
||||||
|
f"master identity missing at {master_dir} — call ensure_master_identity first"
|
||||||
|
)
|
||||||
|
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
|
ctx.load_cert_chain(certfile=str(cert), keyfile=str(key))
|
||||||
|
ctx.load_verify_locations(cafile=str(ca_cert))
|
||||||
|
ctx.verify_mode = ssl.CERT_REQUIRED
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- helpers
|
||||||
|
|
||||||
|
|
||||||
|
def peer_cn(ssl_object: Optional[ssl.SSLObject]) -> str:
|
||||||
|
"""Extract the CN from the TLS peer certificate (worker provenance).
|
||||||
|
|
||||||
|
Falls back to ``"unknown"`` on any parse error — we refuse to crash on
|
||||||
|
malformed cert DNs and instead tag the message for later inspection.
|
||||||
|
"""
|
||||||
|
if ssl_object is None:
|
||||||
|
return "unknown"
|
||||||
|
der = ssl_object.getpeercert(binary_form=True)
|
||||||
|
if der is None:
|
||||||
|
return "unknown"
|
||||||
|
try:
|
||||||
|
cert = x509.load_der_x509_certificate(der)
|
||||||
|
attrs = cert.subject.get_attributes_for_oid(NameOID.COMMON_NAME)
|
||||||
|
return attrs[0].value if attrs else "unknown"
|
||||||
|
except Exception: # nosec B110 — provenance is best-effort
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def fingerprint_from_ssl(ssl_object: Optional[ssl.SSLObject]) -> Optional[str]:
|
||||||
|
if ssl_object is None:
|
||||||
|
return None
|
||||||
|
der = ssl_object.getpeercert(binary_form=True)
|
||||||
|
if der is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
cert = x509.load_der_x509_certificate(der)
|
||||||
|
return pki.fingerprint(cert.public_bytes(serialization.Encoding.PEM))
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------- per-connection handler
|
||||||
|
|
||||||
|
|
||||||
|
async def _handle_connection(
|
||||||
|
reader: asyncio.StreamReader,
|
||||||
|
writer: asyncio.StreamWriter,
|
||||||
|
cfg: ListenerConfig,
|
||||||
|
) -> None:
|
||||||
|
ssl_obj = writer.get_extra_info("ssl_object")
|
||||||
|
cn = peer_cn(ssl_obj)
|
||||||
|
peer = writer.get_extra_info("peername")
|
||||||
|
log.info("listener accepted worker=%s peer=%s", cn, peer)
|
||||||
|
|
||||||
|
# Lazy import to avoid a circular dep if the collector pulls in logger setup.
|
||||||
|
from decnet.collector.worker import parse_rfc5424
|
||||||
|
|
||||||
|
cfg.log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
cfg.json_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(cfg.log_path, "a", encoding="utf-8") as lf, open(
|
||||||
|
cfg.json_path, "a", encoding="utf-8"
|
||||||
|
) as jf:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
frame = await read_frame(reader)
|
||||||
|
except asyncio.IncompleteReadError:
|
||||||
|
break
|
||||||
|
except ValueError as exc:
|
||||||
|
log.warning("listener bad frame worker=%s err=%s", cn, exc)
|
||||||
|
break
|
||||||
|
if frame is None:
|
||||||
|
break
|
||||||
|
if not frame:
|
||||||
|
continue
|
||||||
|
line = frame.decode("utf-8", errors="replace")
|
||||||
|
lf.write(line + "\n")
|
||||||
|
lf.flush()
|
||||||
|
parsed = parse_rfc5424(line)
|
||||||
|
if parsed is not None:
|
||||||
|
parsed["source_worker"] = cn
|
||||||
|
jf.write(json.dumps(parsed) + "\n")
|
||||||
|
jf.flush()
|
||||||
|
else:
|
||||||
|
log.debug("listener malformed RFC5424 worker=%s snippet=%r", cn, line[:80])
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("listener connection error worker=%s err=%s", cn, exc)
|
||||||
|
finally:
|
||||||
|
writer.close()
|
||||||
|
try:
|
||||||
|
await writer.wait_closed()
|
||||||
|
except Exception: # nosec B110 — socket cleanup is best-effort
|
||||||
|
pass
|
||||||
|
log.info("listener closed worker=%s", cn)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------- server
|
||||||
|
|
||||||
|
|
||||||
|
async def run_listener(
|
||||||
|
cfg: ListenerConfig,
|
||||||
|
*,
|
||||||
|
stop_event: Optional[asyncio.Event] = None,
|
||||||
|
) -> None:
|
||||||
|
ctx = build_listener_ssl_context(cfg.ca_dir)
|
||||||
|
|
||||||
|
async def _client_cb(
|
||||||
|
reader: asyncio.StreamReader, writer: asyncio.StreamWriter
|
||||||
|
) -> None:
|
||||||
|
await _handle_connection(reader, writer, cfg)
|
||||||
|
|
||||||
|
server = await asyncio.start_server(
|
||||||
|
_client_cb, host=cfg.bind_host, port=cfg.bind_port, ssl=ctx
|
||||||
|
)
|
||||||
|
sockets = server.sockets or ()
|
||||||
|
log.info(
|
||||||
|
"listener bound host=%s port=%d sockets=%d",
|
||||||
|
cfg.bind_host, cfg.bind_port, len(sockets),
|
||||||
|
)
|
||||||
|
async with server:
|
||||||
|
if stop_event is None:
|
||||||
|
await server.serve_forever()
|
||||||
|
else:
|
||||||
|
serve_task = asyncio.create_task(server.serve_forever())
|
||||||
|
await stop_event.wait()
|
||||||
|
server.close()
|
||||||
|
serve_task.cancel()
|
||||||
|
try:
|
||||||
|
await serve_task
|
||||||
|
except (asyncio.CancelledError, Exception): # nosec B110
|
||||||
|
pass
|
||||||
323
decnet/swarm/pki.py
Normal file
323
decnet/swarm/pki.py
Normal file
@@ -0,0 +1,323 @@
|
|||||||
|
"""DECNET SWARM PKI — self-managed X.509 CA for master↔worker mTLS.
|
||||||
|
|
||||||
|
Used by:
|
||||||
|
* the SWARM controller (master) to issue per-worker server+client certs at
|
||||||
|
enrollment time,
|
||||||
|
* the agent (worker) to present its mTLS identity for both the control-plane
|
||||||
|
HTTPS endpoint and the syslog-over-TLS (RFC 5425) log forwarder,
|
||||||
|
* the master-side syslog-TLS listener to authenticate inbound workers.
|
||||||
|
|
||||||
|
Storage layout (master):
|
||||||
|
|
||||||
|
~/.decnet/ca/
|
||||||
|
ca.key (PEM, 0600 — the CA private key)
|
||||||
|
ca.crt (PEM — self-signed root)
|
||||||
|
workers/<worker-name>/
|
||||||
|
client.crt (issued, signed by CA)
|
||||||
|
|
||||||
|
Worker layout (delivered by /enroll response):
|
||||||
|
|
||||||
|
~/.decnet/agent/
|
||||||
|
ca.crt (master's CA — trust anchor)
|
||||||
|
worker.key (worker's own private key)
|
||||||
|
worker.crt (signed by master CA — used for both TLS
|
||||||
|
server auth *and* syslog client auth)
|
||||||
|
|
||||||
|
The CA is a hard dependency only in swarm mode; unihost installs never
|
||||||
|
touch this module.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import datetime as _dt
|
||||||
|
import hashlib
|
||||||
|
import ipaddress
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from cryptography import x509
|
||||||
|
from cryptography.hazmat.primitives import hashes, serialization
|
||||||
|
from cryptography.hazmat.primitives.asymmetric import rsa
|
||||||
|
from cryptography.x509.oid import NameOID
|
||||||
|
|
||||||
|
DEFAULT_CA_DIR = pathlib.Path(os.path.expanduser("~/.decnet/ca"))
|
||||||
|
DEFAULT_AGENT_DIR = pathlib.Path(os.path.expanduser("~/.decnet/agent"))
|
||||||
|
DEFAULT_SWARMCTL_DIR = pathlib.Path(os.path.expanduser("~/.decnet/swarmctl"))
|
||||||
|
|
||||||
|
CA_KEY_BITS = 4096
|
||||||
|
WORKER_KEY_BITS = 2048
|
||||||
|
CA_VALIDITY_DAYS = 3650 # 10 years — internal CA
|
||||||
|
WORKER_VALIDITY_DAYS = 825 # max permitted by modern TLS clients
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class CABundle:
|
||||||
|
"""The master's CA identity (key is secret, cert is published)."""
|
||||||
|
|
||||||
|
key_pem: bytes
|
||||||
|
cert_pem: bytes
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class IssuedCert:
|
||||||
|
"""A signed worker certificate + its private key, handed to the worker
|
||||||
|
exactly once during enrollment.
|
||||||
|
"""
|
||||||
|
|
||||||
|
key_pem: bytes
|
||||||
|
cert_pem: bytes
|
||||||
|
ca_cert_pem: bytes
|
||||||
|
fingerprint_sha256: str # hex, lowercase
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------- CA ops
|
||||||
|
|
||||||
|
|
||||||
|
def _pem_private(key: rsa.RSAPrivateKey) -> bytes:
|
||||||
|
return key.private_bytes(
|
||||||
|
encoding=serialization.Encoding.PEM,
|
||||||
|
format=serialization.PrivateFormat.PKCS8,
|
||||||
|
encryption_algorithm=serialization.NoEncryption(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _pem_cert(cert: x509.Certificate) -> bytes:
|
||||||
|
return cert.public_bytes(serialization.Encoding.PEM)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_ca(common_name: str = "DECNET SWARM Root CA") -> CABundle:
|
||||||
|
"""Generate a fresh self-signed CA. Does not touch disk."""
|
||||||
|
key = rsa.generate_private_key(public_exponent=65537, key_size=CA_KEY_BITS)
|
||||||
|
subject = issuer = x509.Name(
|
||||||
|
[
|
||||||
|
x509.NameAttribute(NameOID.COMMON_NAME, common_name),
|
||||||
|
x509.NameAttribute(NameOID.ORGANIZATION_NAME, "DECNET"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
now = _dt.datetime.now(_dt.timezone.utc)
|
||||||
|
cert = (
|
||||||
|
x509.CertificateBuilder()
|
||||||
|
.subject_name(subject)
|
||||||
|
.issuer_name(issuer)
|
||||||
|
.public_key(key.public_key())
|
||||||
|
.serial_number(x509.random_serial_number())
|
||||||
|
.not_valid_before(now - _dt.timedelta(minutes=5))
|
||||||
|
.not_valid_after(now + _dt.timedelta(days=CA_VALIDITY_DAYS))
|
||||||
|
.add_extension(x509.BasicConstraints(ca=True, path_length=0), critical=True)
|
||||||
|
.add_extension(
|
||||||
|
x509.KeyUsage(
|
||||||
|
digital_signature=True,
|
||||||
|
content_commitment=False,
|
||||||
|
key_encipherment=False,
|
||||||
|
data_encipherment=False,
|
||||||
|
key_agreement=False,
|
||||||
|
key_cert_sign=True,
|
||||||
|
crl_sign=True,
|
||||||
|
encipher_only=False,
|
||||||
|
decipher_only=False,
|
||||||
|
),
|
||||||
|
critical=True,
|
||||||
|
)
|
||||||
|
.sign(private_key=key, algorithm=hashes.SHA256())
|
||||||
|
)
|
||||||
|
return CABundle(key_pem=_pem_private(key), cert_pem=_pem_cert(cert))
|
||||||
|
|
||||||
|
|
||||||
|
def save_ca(bundle: CABundle, ca_dir: pathlib.Path = DEFAULT_CA_DIR) -> None:
|
||||||
|
ca_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
# 0700 on the dir, 0600 on the key — defence against casual reads.
|
||||||
|
os.chmod(ca_dir, 0o700)
|
||||||
|
key_path = ca_dir / "ca.key"
|
||||||
|
cert_path = ca_dir / "ca.crt"
|
||||||
|
key_path.write_bytes(bundle.key_pem)
|
||||||
|
os.chmod(key_path, 0o600)
|
||||||
|
cert_path.write_bytes(bundle.cert_pem)
|
||||||
|
|
||||||
|
|
||||||
|
def load_ca(ca_dir: pathlib.Path = DEFAULT_CA_DIR) -> CABundle:
|
||||||
|
key_pem = (ca_dir / "ca.key").read_bytes()
|
||||||
|
cert_pem = (ca_dir / "ca.crt").read_bytes()
|
||||||
|
return CABundle(key_pem=key_pem, cert_pem=cert_pem)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_ca(ca_dir: pathlib.Path = DEFAULT_CA_DIR) -> CABundle:
|
||||||
|
"""Load the CA if present, otherwise generate and persist a new one."""
|
||||||
|
if (ca_dir / "ca.key").exists() and (ca_dir / "ca.crt").exists():
|
||||||
|
return load_ca(ca_dir)
|
||||||
|
bundle = generate_ca()
|
||||||
|
save_ca(bundle, ca_dir)
|
||||||
|
return bundle
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------- cert issuance
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_san(value: str) -> x509.GeneralName:
|
||||||
|
"""Parse a SAN entry as IP if possible, otherwise DNS."""
|
||||||
|
try:
|
||||||
|
return x509.IPAddress(ipaddress.ip_address(value))
|
||||||
|
except ValueError:
|
||||||
|
return x509.DNSName(value)
|
||||||
|
|
||||||
|
|
||||||
|
def issue_worker_cert(
|
||||||
|
ca: CABundle,
|
||||||
|
worker_name: str,
|
||||||
|
sans: list[str],
|
||||||
|
validity_days: int = WORKER_VALIDITY_DAYS,
|
||||||
|
) -> IssuedCert:
|
||||||
|
"""Sign a freshly-generated worker keypair.
|
||||||
|
|
||||||
|
The cert is usable as BOTH a TLS server (agent's HTTPS endpoint) and a
|
||||||
|
TLS client (syslog-over-TLS upstream to the master) — extended key usage
|
||||||
|
covers both. ``sans`` should include every address/name the master or
|
||||||
|
workers will use to reach this worker — typically the worker's IP plus
|
||||||
|
its hostname.
|
||||||
|
"""
|
||||||
|
ca_key = serialization.load_pem_private_key(ca.key_pem, password=None)
|
||||||
|
ca_cert = x509.load_pem_x509_certificate(ca.cert_pem)
|
||||||
|
|
||||||
|
worker_key = rsa.generate_private_key(public_exponent=65537, key_size=WORKER_KEY_BITS)
|
||||||
|
subject = x509.Name(
|
||||||
|
[
|
||||||
|
x509.NameAttribute(NameOID.COMMON_NAME, worker_name),
|
||||||
|
x509.NameAttribute(NameOID.ORGANIZATION_NAME, "DECNET"),
|
||||||
|
x509.NameAttribute(NameOID.ORGANIZATIONAL_UNIT_NAME, "swarm-worker"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
now = _dt.datetime.now(_dt.timezone.utc)
|
||||||
|
san_entries: list[x509.GeneralName] = [_parse_san(s) for s in sans] if sans else []
|
||||||
|
# Always include the worker-name as a DNS SAN so cert pinning by CN-as-DNS
|
||||||
|
# works even when the operator forgets to pass an explicit SAN list.
|
||||||
|
if not any(
|
||||||
|
isinstance(e, x509.DNSName) and e.value == worker_name for e in san_entries
|
||||||
|
):
|
||||||
|
san_entries.append(x509.DNSName(worker_name))
|
||||||
|
|
||||||
|
builder = (
|
||||||
|
x509.CertificateBuilder()
|
||||||
|
.subject_name(subject)
|
||||||
|
.issuer_name(ca_cert.subject)
|
||||||
|
.public_key(worker_key.public_key())
|
||||||
|
.serial_number(x509.random_serial_number())
|
||||||
|
.not_valid_before(now - _dt.timedelta(minutes=5))
|
||||||
|
.not_valid_after(now + _dt.timedelta(days=validity_days))
|
||||||
|
.add_extension(x509.BasicConstraints(ca=False, path_length=None), critical=True)
|
||||||
|
.add_extension(
|
||||||
|
x509.KeyUsage(
|
||||||
|
digital_signature=True,
|
||||||
|
content_commitment=False,
|
||||||
|
key_encipherment=True,
|
||||||
|
data_encipherment=False,
|
||||||
|
key_agreement=False,
|
||||||
|
key_cert_sign=False,
|
||||||
|
crl_sign=False,
|
||||||
|
encipher_only=False,
|
||||||
|
decipher_only=False,
|
||||||
|
),
|
||||||
|
critical=True,
|
||||||
|
)
|
||||||
|
.add_extension(
|
||||||
|
x509.ExtendedKeyUsage(
|
||||||
|
[
|
||||||
|
x509.ObjectIdentifier("1.3.6.1.5.5.7.3.1"), # serverAuth
|
||||||
|
x509.ObjectIdentifier("1.3.6.1.5.5.7.3.2"), # clientAuth
|
||||||
|
]
|
||||||
|
),
|
||||||
|
critical=True,
|
||||||
|
)
|
||||||
|
.add_extension(x509.SubjectAlternativeName(san_entries), critical=False)
|
||||||
|
)
|
||||||
|
cert = builder.sign(private_key=ca_key, algorithm=hashes.SHA256())
|
||||||
|
cert_pem = _pem_cert(cert)
|
||||||
|
fp = hashlib.sha256(
|
||||||
|
cert.public_bytes(serialization.Encoding.DER)
|
||||||
|
).hexdigest()
|
||||||
|
return IssuedCert(
|
||||||
|
key_pem=_pem_private(worker_key),
|
||||||
|
cert_pem=cert_pem,
|
||||||
|
ca_cert_pem=ca.cert_pem,
|
||||||
|
fingerprint_sha256=fp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def write_worker_bundle(
|
||||||
|
issued: IssuedCert,
|
||||||
|
agent_dir: pathlib.Path = DEFAULT_AGENT_DIR,
|
||||||
|
) -> None:
|
||||||
|
"""Persist an issued bundle into the worker's agent directory."""
|
||||||
|
agent_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
os.chmod(agent_dir, 0o700)
|
||||||
|
(agent_dir / "ca.crt").write_bytes(issued.ca_cert_pem)
|
||||||
|
(agent_dir / "worker.crt").write_bytes(issued.cert_pem)
|
||||||
|
key_path = agent_dir / "worker.key"
|
||||||
|
key_path.write_bytes(issued.key_pem)
|
||||||
|
os.chmod(key_path, 0o600)
|
||||||
|
|
||||||
|
|
||||||
|
def load_worker_bundle(
|
||||||
|
agent_dir: pathlib.Path = DEFAULT_AGENT_DIR,
|
||||||
|
) -> Optional[IssuedCert]:
|
||||||
|
"""Return the worker's bundle if enrolled; ``None`` otherwise."""
|
||||||
|
ca = agent_dir / "ca.crt"
|
||||||
|
crt = agent_dir / "worker.crt"
|
||||||
|
key = agent_dir / "worker.key"
|
||||||
|
if not (ca.exists() and crt.exists() and key.exists()):
|
||||||
|
return None
|
||||||
|
cert_pem = crt.read_bytes()
|
||||||
|
cert = x509.load_pem_x509_certificate(cert_pem)
|
||||||
|
fp = hashlib.sha256(
|
||||||
|
cert.public_bytes(serialization.Encoding.DER)
|
||||||
|
).hexdigest()
|
||||||
|
return IssuedCert(
|
||||||
|
key_pem=key.read_bytes(),
|
||||||
|
cert_pem=cert_pem,
|
||||||
|
ca_cert_pem=ca.read_bytes(),
|
||||||
|
fingerprint_sha256=fp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_swarmctl_cert(
|
||||||
|
bind_host: str,
|
||||||
|
ca_dir: pathlib.Path = DEFAULT_CA_DIR,
|
||||||
|
swarmctl_dir: pathlib.Path = DEFAULT_SWARMCTL_DIR,
|
||||||
|
extra_sans: Optional[list[str]] = None,
|
||||||
|
) -> tuple[pathlib.Path, pathlib.Path, pathlib.Path]:
|
||||||
|
"""Return (cert_path, key_path, ca_path), auto-issuing if missing.
|
||||||
|
|
||||||
|
Uses the existing DECNET CA (ensuring it exists first) so workers
|
||||||
|
whose bundle already includes ``ca.crt`` can verify the swarmctl
|
||||||
|
endpoint without additional trust configuration. Self-signed is
|
||||||
|
intentionally not the default — a cert signed by the same CA the
|
||||||
|
workers already trust is the friction-free path.
|
||||||
|
|
||||||
|
Callers that want BYOC should skip this and pass their own
|
||||||
|
cert/key paths directly to uvicorn.
|
||||||
|
"""
|
||||||
|
swarmctl_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
os.chmod(swarmctl_dir, 0o700)
|
||||||
|
cert_path = swarmctl_dir / "server.crt"
|
||||||
|
key_path = swarmctl_dir / "server.key"
|
||||||
|
ca_cert_path = ca_dir / "ca.crt"
|
||||||
|
|
||||||
|
if cert_path.exists() and key_path.exists() and ca_cert_path.exists():
|
||||||
|
return cert_path, key_path, ca_cert_path
|
||||||
|
|
||||||
|
ca = ensure_ca(ca_dir)
|
||||||
|
sans = list({bind_host, "127.0.0.1", "localhost", *(extra_sans or [])})
|
||||||
|
issued = issue_worker_cert(ca, "swarmctl", sans)
|
||||||
|
cert_path.write_bytes(issued.cert_pem)
|
||||||
|
key_path.write_bytes(issued.key_pem)
|
||||||
|
os.chmod(key_path, 0o600)
|
||||||
|
# ensure_ca already wrote ca.crt under ca_dir, but save_ca is only
|
||||||
|
# called on generate — re-mirror it here to guarantee the path exists.
|
||||||
|
if not ca_cert_path.exists():
|
||||||
|
ca_cert_path.write_bytes(ca.cert_pem)
|
||||||
|
return cert_path, key_path, ca_cert_path
|
||||||
|
|
||||||
|
|
||||||
|
def fingerprint(cert_pem: bytes) -> str:
|
||||||
|
"""SHA-256 hex fingerprint of a cert (DER-encoded)."""
|
||||||
|
cert = x509.load_pem_x509_certificate(cert_pem)
|
||||||
|
return hashlib.sha256(cert.public_bytes(serialization.Encoding.DER)).hexdigest()
|
||||||
97
decnet/swarm/tar_tree.py
Normal file
97
decnet/swarm/tar_tree.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
"""Build a gzipped tarball of the master's working tree for pushing to workers.
|
||||||
|
|
||||||
|
Always excludes the obvious large / secret / churn paths: ``.venv/``,
|
||||||
|
``__pycache__/``, ``.git/``, ``wiki-checkout/``, ``*.db*``, ``*.log``. The
|
||||||
|
caller can supply additional exclude globs.
|
||||||
|
|
||||||
|
Deliberately does NOT invoke git — the tree is what the operator has on
|
||||||
|
disk (staged + unstaged + untracked). That's the whole point; the scp
|
||||||
|
workflow we're replacing also shipped the live tree.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import fnmatch
|
||||||
|
import io
|
||||||
|
import pathlib
|
||||||
|
import tarfile
|
||||||
|
from typing import Iterable, Optional
|
||||||
|
|
||||||
|
DEFAULT_EXCLUDES = (
|
||||||
|
".venv", ".venv/*",
|
||||||
|
"**/.venv/*",
|
||||||
|
"__pycache__", "**/__pycache__", "**/__pycache__/*",
|
||||||
|
".git", ".git/*",
|
||||||
|
"wiki-checkout", "wiki-checkout/*",
|
||||||
|
"*.pyc", "*.pyo",
|
||||||
|
"*.db", "*.db-wal", "*.db-shm",
|
||||||
|
"*.log",
|
||||||
|
".pytest_cache", ".pytest_cache/*",
|
||||||
|
".mypy_cache", ".mypy_cache/*",
|
||||||
|
".tox", ".tox/*",
|
||||||
|
"*.egg-info", "*.egg-info/*",
|
||||||
|
"decnet-state.json",
|
||||||
|
"master.log", "master.json",
|
||||||
|
"decnet.db*",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_excluded(rel: str, patterns: Iterable[str]) -> bool:
|
||||||
|
parts = pathlib.PurePosixPath(rel).parts
|
||||||
|
for pat in patterns:
|
||||||
|
if fnmatch.fnmatch(rel, pat):
|
||||||
|
return True
|
||||||
|
# Also match the pattern against every leading subpath — this is
|
||||||
|
# what catches nested `.venv/...` without forcing callers to spell
|
||||||
|
# out every `**/` glob.
|
||||||
|
for i in range(1, len(parts) + 1):
|
||||||
|
if fnmatch.fnmatch("/".join(parts[:i]), pat):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def tar_working_tree(
|
||||||
|
root: pathlib.Path,
|
||||||
|
extra_excludes: Optional[Iterable[str]] = None,
|
||||||
|
) -> bytes:
|
||||||
|
"""Return the gzipped tarball bytes of ``root``.
|
||||||
|
|
||||||
|
Entries are added with paths relative to ``root`` (no leading ``/``,
|
||||||
|
no ``..``). The updater rejects unsafe paths on the receiving side.
|
||||||
|
"""
|
||||||
|
patterns = list(DEFAULT_EXCLUDES) + list(extra_excludes or ())
|
||||||
|
buf = io.BytesIO()
|
||||||
|
|
||||||
|
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
|
||||||
|
for path in sorted(root.rglob("*")):
|
||||||
|
rel = path.relative_to(root).as_posix()
|
||||||
|
if _is_excluded(rel, patterns):
|
||||||
|
continue
|
||||||
|
if path.is_symlink():
|
||||||
|
# Symlinks inside a repo tree are rare and often break
|
||||||
|
# portability; skip them rather than ship dangling links.
|
||||||
|
continue
|
||||||
|
if path.is_dir():
|
||||||
|
continue
|
||||||
|
tar.add(path, arcname=rel, recursive=False)
|
||||||
|
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
def detect_git_sha(root: pathlib.Path) -> str:
|
||||||
|
"""Best-effort ``HEAD`` sha. Returns ``""`` if not a git repo."""
|
||||||
|
head = root / ".git" / "HEAD"
|
||||||
|
if not head.is_file():
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
ref = head.read_text().strip()
|
||||||
|
except OSError:
|
||||||
|
return ""
|
||||||
|
if ref.startswith("ref: "):
|
||||||
|
ref_path = root / ".git" / ref[5:]
|
||||||
|
if ref_path.is_file():
|
||||||
|
try:
|
||||||
|
return ref_path.read_text().strip()
|
||||||
|
except OSError:
|
||||||
|
return ""
|
||||||
|
return ""
|
||||||
|
return ref
|
||||||
124
decnet/swarm/updater_client.py
Normal file
124
decnet/swarm/updater_client.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
"""Master-side HTTP client for the worker's self-updater daemon.
|
||||||
|
|
||||||
|
Sibling of ``AgentClient``: same mTLS identity (same DECNET CA, same
|
||||||
|
master client cert) but targets the updater's port (default 8766) and
|
||||||
|
speaks the multipart upload protocol the updater's ``/update`` endpoint
|
||||||
|
expects.
|
||||||
|
|
||||||
|
Kept as its own module — not a subclass of ``AgentClient`` — because the
|
||||||
|
timeouts and failure semantics are genuinely different: pip install +
|
||||||
|
agent probe can take a minute on a slow VM, and ``/update-self`` drops
|
||||||
|
the connection on purpose (the updater re-execs itself mid-response).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import ssl
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm.client import MasterIdentity, ensure_master_identity
|
||||||
|
|
||||||
|
log = get_logger("swarm.updater_client")
|
||||||
|
|
||||||
|
_TIMEOUT_UPDATE = httpx.Timeout(connect=10.0, read=180.0, write=120.0, pool=5.0)
|
||||||
|
_TIMEOUT_CONTROL = httpx.Timeout(connect=5.0, read=30.0, write=10.0, pool=5.0)
|
||||||
|
|
||||||
|
|
||||||
|
class UpdaterClient:
|
||||||
|
"""Async client targeting a worker's ``decnet updater`` daemon."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
host: dict[str, Any] | None = None,
|
||||||
|
*,
|
||||||
|
address: Optional[str] = None,
|
||||||
|
updater_port: int = 8766,
|
||||||
|
identity: Optional[MasterIdentity] = None,
|
||||||
|
):
|
||||||
|
if host is not None:
|
||||||
|
self._address = host["address"]
|
||||||
|
self._host_name = host.get("name")
|
||||||
|
else:
|
||||||
|
if address is None:
|
||||||
|
raise ValueError("UpdaterClient requires host dict or address")
|
||||||
|
self._address = address
|
||||||
|
self._host_name = None
|
||||||
|
self._port = updater_port
|
||||||
|
self._identity = identity or ensure_master_identity()
|
||||||
|
self._client: Optional[httpx.AsyncClient] = None
|
||||||
|
|
||||||
|
def _build_client(self, timeout: httpx.Timeout) -> httpx.AsyncClient:
|
||||||
|
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||||
|
ctx.load_cert_chain(
|
||||||
|
str(self._identity.cert_path), str(self._identity.key_path),
|
||||||
|
)
|
||||||
|
ctx.load_verify_locations(cafile=str(self._identity.ca_cert_path))
|
||||||
|
ctx.verify_mode = ssl.CERT_REQUIRED
|
||||||
|
ctx.check_hostname = False
|
||||||
|
return httpx.AsyncClient(
|
||||||
|
base_url=f"https://{self._address}:{self._port}",
|
||||||
|
verify=ctx,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def __aenter__(self) -> "UpdaterClient":
|
||||||
|
self._client = self._build_client(_TIMEOUT_CONTROL)
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, *exc: Any) -> None:
|
||||||
|
if self._client:
|
||||||
|
await self._client.aclose()
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
def _require(self) -> httpx.AsyncClient:
|
||||||
|
if self._client is None:
|
||||||
|
raise RuntimeError("UpdaterClient used outside `async with` block")
|
||||||
|
return self._client
|
||||||
|
|
||||||
|
# --------------------------------------------------------------- RPCs
|
||||||
|
|
||||||
|
async def health(self) -> dict[str, Any]:
|
||||||
|
r = await self._require().get("/health")
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
async def releases(self) -> dict[str, Any]:
|
||||||
|
r = await self._require().get("/releases")
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
async def update(self, tarball: bytes, sha: str = "") -> httpx.Response:
|
||||||
|
"""POST /update. Returns the Response so the caller can distinguish
|
||||||
|
200 / 409 / 500 — each means something different.
|
||||||
|
"""
|
||||||
|
self._require().timeout = _TIMEOUT_UPDATE
|
||||||
|
try:
|
||||||
|
r = await self._require().post(
|
||||||
|
"/update",
|
||||||
|
files={"tarball": ("tree.tgz", tarball, "application/gzip")},
|
||||||
|
data={"sha": sha},
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
self._require().timeout = _TIMEOUT_CONTROL
|
||||||
|
return r
|
||||||
|
|
||||||
|
async def update_self(self, tarball: bytes, sha: str = "") -> httpx.Response:
|
||||||
|
"""POST /update-self. The updater re-execs itself, so the connection
|
||||||
|
usually drops mid-response; that's not an error. Callers should then
|
||||||
|
poll /health until the new SHA appears.
|
||||||
|
"""
|
||||||
|
self._require().timeout = _TIMEOUT_UPDATE
|
||||||
|
try:
|
||||||
|
r = await self._require().post(
|
||||||
|
"/update-self",
|
||||||
|
files={"tarball": ("tree.tgz", tarball, "application/gzip")},
|
||||||
|
data={"sha": sha, "confirm_self": "true"},
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
self._require().timeout = _TIMEOUT_CONTROL
|
||||||
|
return r
|
||||||
|
|
||||||
|
async def rollback(self) -> httpx.Response:
|
||||||
|
return await self._require().post("/rollback")
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user