Compare commits
156 Commits
c1d8102253
...
testing
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e8d97281f7 | ||
| 8a2876fe86 | |||
| 3e8e4c9e1c | |||
| 64bc6fcb1d | |||
| af9d59d3ee | |||
| 4197441c01 | |||
| 1b70d6db87 | |||
| 038596776a | |||
| 692ac35ee4 | |||
| f064690452 | |||
| dd82cd3f39 | |||
| ff3e376726 | |||
| 47f2ca8d5f | |||
| da3e675f86 | |||
| 2febd921bc | |||
| 12b5c25cd7 | |||
| 5b70a34c94 | |||
| 4abfac1a98 | |||
| 9eca33938d | |||
| 195580c74d | |||
| 262a84ca53 | |||
| d1b7e94325 | |||
| 33d954a61c | |||
| bf01804736 | |||
| 62f7c88b90 | |||
| e411063075 | |||
| 148e51011c | |||
| 3ebd206bca | |||
| f576564f02 | |||
| 00d5799a79 | |||
| 14250cacad | |||
| 9d68bb45c7 | |||
| 07ec4bc269 | |||
| a63301c7a3 | |||
| df18cb44cc | |||
| 91549e6936 | |||
| e8e11b2896 | |||
| 585541016f | |||
| 5dad1bb315 | |||
| 6708f26e6b | |||
| 2bef3edb72 | |||
| d2cf1e8b3a | |||
| 6d7877c679 | |||
| ee9ade4cd5 | |||
| dad29249de | |||
| f91ba9a16e | |||
| 43b92c7bd6 | |||
| a0a241f65d | |||
| 42b5e4cd06 | |||
| 6245786289 | |||
| 5df995fda1 | |||
| 6d7567b6bb | |||
| dbaccde143 | |||
| b883f24ba2 | |||
| 79db999030 | |||
| cb1a1d1270 | |||
| 899ea559d9 | |||
| e67b6d7f73 | |||
| bc5f43c3f7 | |||
| ff4c993617 | |||
| e32fdf9cbf | |||
| 95ae175e1b | |||
| b4df9ea0a1 | |||
| 02f07c7962 | |||
| c6f7de30d2 | |||
| 37b22b76a5 | |||
| 43f140a87a | |||
| 3223bec615 | |||
| 2b1b962849 | |||
| 65fc9ac2b9 | |||
| 1e8b73c361 | |||
| 9b1299458d | |||
| 7894b9e073 | |||
| a266d6b17e | |||
| f5a5fec607 | |||
| 40d3e86e55 | |||
| ebeaf08a49 | |||
| 7765b36c50 | |||
| 8914c27220 | |||
| 4db9c7464c | |||
| 411a797120 | |||
| 3da5a2c4ee | |||
| bfc7af000a | |||
| 1e8ca4cc05 | |||
| a6430cac4c | |||
| 39d2077a3a | |||
| e2d6f857b5 | |||
| 811136e600 | |||
| 63b0a58527 | |||
| cd0057c129 | |||
| 0c77cdab32 | |||
| 8257bcc031 | |||
| d3b90679c5 | |||
| 6657d3e097 | |||
| 293da364a6 | |||
| d5e6ca1949 | |||
| a97696fa23 | |||
| 7864c72948 | |||
| 47a0480994 | |||
| 2bf886e18e | |||
| 8bdc5b98c9 | |||
| aa39be909a | |||
| 41fd496128 | |||
| 39dafaf384 | |||
| b0e00a6cc4 | |||
| 2843aafa1a | |||
| 766eeb3d83 | |||
| f462835373 | |||
| e356829234 | |||
| a5d6860124 | |||
| 8dd4c78b33 | |||
| 69510fb880 | |||
| 09d9f8595e | |||
| bfb3edbd4a | |||
| a773dddd5c | |||
| edc5c59f93 | |||
| 1f758a3669 | |||
| 6c22f9ba59 | |||
| 20fa1f9a63 | |||
| fb69a06ab3 | |||
| 1446f6da94 | |||
| e967aaabfb | |||
| 255c2e5eb7 | |||
| 2dd86fb3bb | |||
| 3106d03135 | |||
| 3cc5ba36e8 | |||
| 6301504c0e | |||
| de4b64d857 | |||
| b5d7bf818f | |||
| 257f780d0f | |||
| a10aee282f | |||
| 11b9e85874 | |||
| 45039bd621 | |||
| 4ea1c2ff4f | |||
| bb8d782e42 | |||
| 342916ca63 | |||
| d3f4bbb62b | |||
| 32340bea0d | |||
| f1e14280c0 | |||
| 931f33fb06 | |||
| 467511e997 | |||
| 3945e72e11 | |||
| bd406090a7 | |||
| e22d057e68 | |||
| cb12e7c475 | |||
| c29ca977fd | |||
| bf4afac70f | |||
| 4b15b7eb35 | |||
| 140d2fbaad | |||
| 064c8760b6 | |||
| 6572c5cbaf | |||
| ba448bae13 | |||
| 1a18377b0a | |||
| 319c1dbb61 | |||
|
|
8ad3350d51 | ||
|
|
ac4e5e1570 |
@@ -28,7 +28,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- run: pip install bandit
|
- run: pip install bandit
|
||||||
- run: bandit -r decnet/ -ll -x decnet/services/registry.py
|
- run: bandit -r decnet/ -ll -x decnet/services/registry.py -x decnet/templates/
|
||||||
|
|
||||||
pip-audit:
|
pip-audit:
|
||||||
name: Dependency audit (pip-audit)
|
name: Dependency audit (pip-audit)
|
||||||
@@ -40,7 +40,7 @@ jobs:
|
|||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- run: pip install pip-audit
|
- run: pip install pip-audit
|
||||||
- run: pip install -e .[dev]
|
- run: pip install -e .[dev]
|
||||||
- run: pip-audit --skip-editable
|
- run: pip-audit --skip-editable --ignore-vuln CVE-2025-65896
|
||||||
|
|
||||||
test-standard:
|
test-standard:
|
||||||
name: Test (Standard)
|
name: Test (Standard)
|
||||||
@@ -48,7 +48,7 @@ jobs:
|
|||||||
needs: [lint, bandit, pip-audit]
|
needs: [lint, bandit, pip-audit]
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.11", "3.12"]
|
python-version: ["3.11"]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
@@ -64,6 +64,19 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.11"]
|
python-version: ["3.11"]
|
||||||
|
services:
|
||||||
|
mysql:
|
||||||
|
image: mysql:8.0
|
||||||
|
env:
|
||||||
|
MYSQL_ROOT_PASSWORD: root
|
||||||
|
MYSQL_DATABASE: decnet_test
|
||||||
|
ports:
|
||||||
|
- 3307:3306
|
||||||
|
options: >-
|
||||||
|
--health-cmd="mysqladmin ping -h 127.0.0.1"
|
||||||
|
--health-interval=10s
|
||||||
|
--health-timeout=5s
|
||||||
|
--health-retries=5
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
@@ -71,6 +84,12 @@ jobs:
|
|||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- run: pip install -e .[dev]
|
- run: pip install -e .[dev]
|
||||||
- run: pytest -m live
|
- run: pytest -m live
|
||||||
|
env:
|
||||||
|
DECNET_MYSQL_HOST: 127.0.0.1
|
||||||
|
DECNET_MYSQL_PORT: 3307
|
||||||
|
DECNET_MYSQL_USER: root
|
||||||
|
DECNET_MYSQL_PASSWORD: root
|
||||||
|
DECNET_MYSQL_DATABASE: decnet_test
|
||||||
|
|
||||||
test-fuzz:
|
test-fuzz:
|
||||||
name: Test (Fuzz)
|
name: Test (Fuzz)
|
||||||
@@ -86,6 +105,8 @@ jobs:
|
|||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- run: pip install -e .[dev]
|
- run: pip install -e .[dev]
|
||||||
- run: pytest -m fuzz
|
- run: pytest -m fuzz
|
||||||
|
env:
|
||||||
|
SCHEMATHESIS_CONFIG: schemathesis.ci.toml
|
||||||
|
|
||||||
merge-to-testing:
|
merge-to-testing:
|
||||||
name: Merge dev → testing
|
name: Merge dev → testing
|
||||||
|
|||||||
8
.gitignore
vendored
8
.gitignore
vendored
@@ -1,6 +1,7 @@
|
|||||||
.venv/
|
.venv/
|
||||||
logs/
|
logs/
|
||||||
.claude/
|
.claude/*
|
||||||
|
CLAUDE.md
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
*.pyo
|
*.pyo
|
||||||
@@ -17,8 +18,13 @@ linterfails.log
|
|||||||
webmail
|
webmail
|
||||||
windows1
|
windows1
|
||||||
*.db
|
*.db
|
||||||
|
*.db-shm
|
||||||
|
*.db-wal
|
||||||
|
decnet.*.log
|
||||||
decnet.json
|
decnet.json
|
||||||
.env*
|
.env*
|
||||||
.env.local
|
.env.local
|
||||||
.coverage
|
.coverage
|
||||||
.hypothesis/
|
.hypothesis/
|
||||||
|
profiles/*
|
||||||
|
tests/test_decnet.db*
|
||||||
|
|||||||
58
CLAUDE.md
58
CLAUDE.md
@@ -1,58 +0,0 @@
|
|||||||
# CLAUDE.md
|
|
||||||
|
|
||||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
||||||
|
|
||||||
## Commands
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Install (dev)
|
|
||||||
pip install -e .
|
|
||||||
|
|
||||||
# List registered service plugins
|
|
||||||
decnet services
|
|
||||||
|
|
||||||
# Dry-run (generates compose, no containers)
|
|
||||||
decnet deploy --mode unihost --deckies 3 --randomize-services --dry-run
|
|
||||||
|
|
||||||
# Full deploy (requires root for MACVLAN)
|
|
||||||
sudo decnet deploy --mode unihost --deckies 5 --interface eth0 --randomize-services
|
|
||||||
sudo decnet deploy --mode unihost --deckies 3 --services ssh,smb --log-target 192.168.1.5:5140
|
|
||||||
|
|
||||||
# Status / teardown
|
|
||||||
decnet status
|
|
||||||
sudo decnet teardown --all
|
|
||||||
sudo decnet teardown --id decky-01
|
|
||||||
```
|
|
||||||
|
|
||||||
## Project Overview
|
|
||||||
|
|
||||||
DECNET is a honeypot/deception network framework. It deploys fake machines (called **deckies**) with realistic services (RDP, SMB, SSH, FTP, etc.) to lure and profile attackers. All attacker interactions are aggregated to an isolated logging network (ELK stack / SIEM).
|
|
||||||
|
|
||||||
## Deployment Models
|
|
||||||
|
|
||||||
**UNIHOST** — one real host spins up _n_ deckies via a container orchestrator. Simpler, single-machine deployment.
|
|
||||||
|
|
||||||
**SWARM (MULTIHOST)** — _n_ real hosts each running deckies. Orchestrated via Ansible/sshpass or similar tooling.
|
|
||||||
|
|
||||||
## Core Technology Choices
|
|
||||||
|
|
||||||
- **Containers**: Docker Compose is the starting point but other orchestration frameworks should be evaluated if they serve the project better. `debian:bookworm-slim` is the default base image; mixing in Ubuntu, CentOS, or other distros is encouraged to make the decoy network look heterogeneous.
|
|
||||||
- **Networking**: Deckies need to appear as real machines on the LAN (own MACs/IPs). MACVLAN and IPVLAN are candidates; the right driver depends on the host environment. WSL has known limitations — bare metal or a VM is preferred for testing.
|
|
||||||
- **Log pipeline**: Logstash → ELK stack → SIEM (isolated network, not reachable from decoy network)
|
|
||||||
|
|
||||||
## Architecture Constraints
|
|
||||||
|
|
||||||
- The decoy network must be reachable from the outside (attacker-facing).
|
|
||||||
- The logging/aggregation network must be isolated from the decoy network.
|
|
||||||
- A publicly accessible real server acts as the bridge between the two networks.
|
|
||||||
- Deckies should differ in exposed services and OS fingerprints to appear as a heterogeneous network.
|
|
||||||
- **IMPORTANT**: The system now strictly enforces dependency injection for storage. Do not import `SQLiteRepository` directly in new features; instead, use `get_repository()` from the factory or the FastAPI `get_repo` dependency.
|
|
||||||
|
|
||||||
## Development and testing
|
|
||||||
|
|
||||||
- For every new feature, pytests must me made.
|
|
||||||
- Pytest is the main testing framework in use.
|
|
||||||
- NEVER pass broken code to the user.
|
|
||||||
- Broken means: not running, not passing 100% tests, etc.
|
|
||||||
- After tests pass with 100%, always git commit your changes.
|
|
||||||
- NEVER add "Co-Authored-By" or any Claude attribution lines to git commit messages.
|
|
||||||
104
GEMINI.md
104
GEMINI.md
@@ -1,104 +0,0 @@
|
|||||||
# DECNET (Deception Network) Project Context
|
|
||||||
|
|
||||||
DECNET is a high-fidelity honeypot framework designed to deploy heterogeneous fleets of fake machines (called **deckies**) that appear as real hosts on a local network.
|
|
||||||
|
|
||||||
## Project Overview
|
|
||||||
|
|
||||||
- **Core Purpose:** To lure, profile, and log attacker interactions within a controlled, deceptive environment.
|
|
||||||
- **Key Technology:** Linux-native container networking (MACVLAN/IPvlan) combined with Docker to give each decoy its own MAC address, IP, and realistic TCP/IP stack behavior.
|
|
||||||
- **Main Components:**
|
|
||||||
- **Deckies:** Group of containers sharing a network namespace (one base container + multiple service containers).
|
|
||||||
- **Archetypes:** Pre-defined machine profiles (e.g., `windows-workstation`, `linux-server`) that bundle services and OS fingerprints.
|
|
||||||
- **Services:** Modular honeypot plugins (SSH, SMB, RDP, etc.) built as `BaseService` subclasses.
|
|
||||||
- **OS Fingerprinting:** Sysctl-based TCP/IP stack tuning to spoof OS detection (nmap).
|
|
||||||
- **Logging Pipeline:** RFC 5424 syslog forwarding to an isolated SIEM/ELK stack.
|
|
||||||
|
|
||||||
## Technical Stack
|
|
||||||
|
|
||||||
- **Language:** Python 3.11+
|
|
||||||
- **CLI Framework:** [Typer](https://typer.tiangolo.com/)
|
|
||||||
- **Data Validation:** [Pydantic v2](https://docs.pydantic.dev/)
|
|
||||||
- **Orchestration:** Docker Engine 24+ (via Docker SDK for Python)
|
|
||||||
- **Networking:** MACVLAN (default) or IPvlan L2 (for WiFi/restricted environments).
|
|
||||||
- **Testing:** Pytest (100% pass requirement).
|
|
||||||
- **Formatting/Linting:** Ruff, Bandit (SAST), pip-audit.
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
```text
|
|
||||||
Host NIC (eth0)
|
|
||||||
└── MACVLAN Bridge
|
|
||||||
├── Decky-01 (192.168.1.10) -> [Base] + [SSH] + [HTTP]
|
|
||||||
├── Decky-02 (192.168.1.11) -> [Base] + [SMB] + [RDP]
|
|
||||||
└── ...
|
|
||||||
```
|
|
||||||
|
|
||||||
- **Base Container:** Owns the IP/MAC, sets `sysctls` for OS spoofing, and runs `sleep infinity`.
|
|
||||||
- **Service Containers:** Use `network_mode: service:<base>` to share the identity and networking of the base container.
|
|
||||||
- **Isolation:** Decoy traffic is strictly separated from the logging network.
|
|
||||||
|
|
||||||
## Key Commands
|
|
||||||
|
|
||||||
### Development & Maintenance
|
|
||||||
- **Install (Dev):**
|
|
||||||
- `rm .venv -rf`
|
|
||||||
- `python3 -m venv .venv`
|
|
||||||
- `source .venv/bin/activate`
|
|
||||||
- `pip install -e .`
|
|
||||||
- **Run Tests:** `pytest` (Run before any commit)
|
|
||||||
- **Linting:** `ruff check .`
|
|
||||||
- **Security Scan:** `bandit -r decnet/`
|
|
||||||
- **Web Git:** git.resacachile.cl (Gitea)
|
|
||||||
|
|
||||||
### CLI Usage
|
|
||||||
- **List Services:** `decnet services`
|
|
||||||
- **List Archetypes:** `decnet archetypes`
|
|
||||||
- **Dry Run (Compose Gen):** `decnet deploy --deckies 3 --randomize-services --dry-run`
|
|
||||||
- **Deploy (Full):** `sudo .venv/bin/decnet deploy --interface eth0 --deckies 5 --randomize-services`
|
|
||||||
- **Status:** `decnet status`
|
|
||||||
- **Teardown:** `sudo .venv/bin/decnet teardown --all`
|
|
||||||
|
|
||||||
## Development Conventions
|
|
||||||
|
|
||||||
- **Code Style:**
|
|
||||||
- Strict adherence to Ruff/PEP8.
|
|
||||||
- **Always use typed variables**. If any non-types variables are found, they must be corrected.
|
|
||||||
- The correct way is `x: int = 1`, never `x : int = 1`.
|
|
||||||
- If assignment is present, always use a space between the type and the equal sign `x: int = 1`.
|
|
||||||
- **Never** use lowercase L (l), uppercase o (O) or uppercase i (i) in single-character names.
|
|
||||||
- **Internal vars are to be declared with an underscore** (_internal_variable_name).
|
|
||||||
- **Internal to internal vars are to be declared with double underscore** (__internal_variable_name).
|
|
||||||
- Always use snake_case for code.
|
|
||||||
- Always use PascalCase for classes and generics.
|
|
||||||
- **Testing:** New features MUST include a `pytest` case. 100% test pass rate is mandatory before merging.
|
|
||||||
- **Plugin System:**
|
|
||||||
- New services go in `decnet/services/<name>.py`.
|
|
||||||
- Subclass `decnet.services.base.BaseService`.
|
|
||||||
- The registry uses auto-discovery; no manual registration required.
|
|
||||||
- **Configuration:**
|
|
||||||
- Use Pydantic models in `decnet/config.py` for any new settings.
|
|
||||||
- INI file parsing is handled in `decnet/ini_loader.py`.
|
|
||||||
- **State Management:**
|
|
||||||
- Runtime state is persisted in `decnet-state.json`.
|
|
||||||
- Do not modify this file manually.
|
|
||||||
- **General Development Guidelines**:
|
|
||||||
- **Never** commit broken code, or before running `pytest`s or `bandit` at the project level.
|
|
||||||
- **No matter how small** the changes, they must be committed.
|
|
||||||
- **If new features are addedd** new tests must be added, too.
|
|
||||||
- **Never present broken code to the user**. Test, validate, then present.
|
|
||||||
- **Extensive testing** for every function must be created.
|
|
||||||
- **Always develop in the `dev` branch, never in `main`.**
|
|
||||||
- **Test in the `testing` branch.**
|
|
||||||
- **IMPORTANT**: The system now strictly enforces dependency injection for storage. Do not import `SQLiteRepository` directly in new features; instead, use `get_repository()` from the factory or the FastAPI `get_repo` dependency.
|
|
||||||
|
|
||||||
## Directory Structure
|
|
||||||
|
|
||||||
- `decnet/`: Main source code.
|
|
||||||
- `services/`: Honeypot service implementations.
|
|
||||||
- `logging/`: Syslog formatting and forwarding logic.
|
|
||||||
- `correlation/`: (In Progress) Logic for grouping attacker events.
|
|
||||||
- `templates/`: Dockerfiles and entrypoint scripts for services.
|
|
||||||
- `tests/`: Pytest suite.
|
|
||||||
- `pyproject.toml`: Dependency and entry point definitions.
|
|
||||||
- `CLAUDE.md`: Claude-specific environment guidance.
|
|
||||||
- `DEVELOPMENT.md`: Roadmap and TODOs.
|
|
||||||
110
README.md
110
README.md
@@ -508,6 +508,10 @@ DECNET_WEB_HOST=0.0.0.0
|
|||||||
DECNET_WEB_PORT=8080
|
DECNET_WEB_PORT=8080
|
||||||
DECNET_ADMIN_USER=admin
|
DECNET_ADMIN_USER=admin
|
||||||
DECNET_ADMIN_PASSWORD=admin
|
DECNET_ADMIN_PASSWORD=admin
|
||||||
|
|
||||||
|
# Database pool tuning (applies to both SQLite and MySQL)
|
||||||
|
DECNET_DB_POOL_SIZE=20 # base pool connections (default: 20)
|
||||||
|
DECNET_DB_MAX_OVERFLOW=40 # extra connections under burst (default: 40)
|
||||||
```
|
```
|
||||||
|
|
||||||
Copy `.env.example` to `.env.local` and modify it to suit your environment.
|
Copy `.env.example` to `.env.local` and modify it to suit your environment.
|
||||||
@@ -676,6 +680,112 @@ The test suite covers:
|
|||||||
|
|
||||||
Every new feature requires passing tests before merging.
|
Every new feature requires passing tests before merging.
|
||||||
|
|
||||||
|
### Stress Testing
|
||||||
|
|
||||||
|
A [Locust](https://locust.io)-based stress test suite lives in `tests/stress/`. It hammers every API endpoint with realistic traffic patterns to find throughput ceilings and latency degradation.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run via pytest (starts its own server)
|
||||||
|
pytest -m stress tests/stress/ -v -x -n0 -s
|
||||||
|
|
||||||
|
# Crank it up
|
||||||
|
STRESS_USERS=2000 STRESS_SPAWN_RATE=200 STRESS_DURATION=120 pytest -m stress tests/stress/ -v -x -n0 -s
|
||||||
|
|
||||||
|
# Standalone Locust web UI against a running server
|
||||||
|
locust -f tests/stress/locustfile.py --host http://localhost:8000
|
||||||
|
```
|
||||||
|
|
||||||
|
| Env var | Default | Description |
|
||||||
|
|---|---|---|
|
||||||
|
| `STRESS_USERS` | `500` | Total simulated users |
|
||||||
|
| `STRESS_SPAWN_RATE` | `50` | Users spawned per second |
|
||||||
|
| `STRESS_DURATION` | `60` | Test duration in seconds |
|
||||||
|
| `STRESS_WORKERS` | CPU count (max 4) | Uvicorn workers for the test server |
|
||||||
|
| `STRESS_MIN_RPS` | `500` | Minimum RPS to pass baseline test |
|
||||||
|
| `STRESS_MAX_P99_MS` | `200` | Maximum p99 latency (ms) to pass |
|
||||||
|
| `STRESS_SPIKE_USERS` | `1000` | Users for thundering herd test |
|
||||||
|
| `STRESS_SUSTAINED_USERS` | `200` | Users for sustained load test |
|
||||||
|
|
||||||
|
#### Measured baseline
|
||||||
|
|
||||||
|
Reference numbers from recent Locust runs against a MySQL backend
|
||||||
|
(asyncmy driver). All runs hold zero failures throughout.
|
||||||
|
|
||||||
|
**Single worker** (unless noted):
|
||||||
|
|
||||||
|
| Metric | 500u, tracing on | 1500u, tracing on | 1500u, tracing **off** | 1500u, tracing off, **pinned to 1 core** | 1500u, tracing off, **12 workers** |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| Requests served | 396,672 | 232,648 | 277,214 | 3,532 | 308,024 |
|
||||||
|
| Failures | 0 | 0 | 0 | 0 | 0 |
|
||||||
|
| Throughput (current RPS) | ~960 | ~880 | ~990 | ~46 | ~1,585 |
|
||||||
|
| Average latency | 465 ms | 1,774 ms | 1,489 ms | 21.7 s | 930 ms |
|
||||||
|
| Median (p50) | 100 ms | 690 ms | 340 ms | 270 ms | 700 ms |
|
||||||
|
| p95 | 1.9 s | 6.5 s | 5.7 s | 115 s | 2.7 s |
|
||||||
|
| p99 | 2.9 s | 9.5 s | 8.4 s | 122 s | 4.2 s |
|
||||||
|
| Max observed | 8.3 s | 24.4 s | 20.9 s | 124.5 s | 16.5 s |
|
||||||
|
|
||||||
|
Ramp is 15 users/s for the 500u column, 40 users/s otherwise.
|
||||||
|
|
||||||
|
Takeaways:
|
||||||
|
|
||||||
|
- **Tracing off**: at 1500 users, flipping `DECNET_TRACING=false`
|
||||||
|
halves p50 (690 → 340 ms) and pushes RPS from ~880 past the
|
||||||
|
500-user figure on a single worker.
|
||||||
|
- **12 workers**: RPS scales ~1.6× over a single worker (~990 →
|
||||||
|
~1585). Sublinear because the workload is DB-bound — MySQL and the
|
||||||
|
connection pool become the new ceiling, not Python. p99 drops from
|
||||||
|
8.4 s to 4.2 s.
|
||||||
|
- **Connection math**: `DECNET_DB_POOL_SIZE=20` × `DECNET_DB_MAX_OVERFLOW=40`
|
||||||
|
× 12 workers = 720 connections at peak. MySQL's default
|
||||||
|
`max_connections=151` needs bumping (we used 2000) before running
|
||||||
|
multi-worker load.
|
||||||
|
- **Single-core pinning**: ~46 RPS with p95 near two minutes. Interesting
|
||||||
|
as a "physics floor" datapoint — not a production config.
|
||||||
|
|
||||||
|
Top endpoints by volume: `/api/v1/attackers`, `/api/v1/deckies`,
|
||||||
|
`/api/v1/bounty`, `/api/v1/logs/histogram`, `/api/v1/config`,
|
||||||
|
`/api/v1/health`, `/api/v1/auth/login`, `/api/v1/logs`.
|
||||||
|
|
||||||
|
Notes on tuning:
|
||||||
|
|
||||||
|
- **Python 3.14 is currently a no-go for the API server.** Under heavy
|
||||||
|
concurrent async load the reworked 3.14 GC segfaults inside
|
||||||
|
`mark_all_reachable` (observed in `_PyGC_Collect` during pending-GC
|
||||||
|
on 3.14.3). Stick to Python 3.11–3.13 until upstream stabilises.
|
||||||
|
- Router-level TTL caches on hot count/stats endpoints (`/stats`,
|
||||||
|
`/logs` count, `/attackers` count, `/bounty`, `/logs/histogram`,
|
||||||
|
`/deckies`, `/config`) collapse concurrent duplicate work onto a
|
||||||
|
single DB hit per window — essential to reach this RPS on one worker.
|
||||||
|
- Turning off request tracing (`DECNET_TRACING=false`) is the next
|
||||||
|
free headroom: tracing was still on during the run above.
|
||||||
|
- On SQLite, `DECNET_DB_POOL_PRE_PING=false` skips the per-checkout
|
||||||
|
`SELECT 1`. On MySQL, keep it `true` — network disconnects are real.
|
||||||
|
|
||||||
|
#### System tuning: open file limit
|
||||||
|
|
||||||
|
Under heavy load (500+ concurrent users), the server will exhaust the default Linux open file limit (`ulimit -n`), causing `OSError: [Errno 24] Too many open files`. Most distros default to **1024**, which is far too low for stress testing or production use.
|
||||||
|
|
||||||
|
**Before running stress tests:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check current limit
|
||||||
|
ulimit -n
|
||||||
|
|
||||||
|
# Bump for this shell session
|
||||||
|
ulimit -n 65536
|
||||||
|
```
|
||||||
|
|
||||||
|
**Permanent fix** — add to `/etc/security/limits.conf`:
|
||||||
|
|
||||||
|
```
|
||||||
|
* soft nofile 65536
|
||||||
|
* hard nofile 65536
|
||||||
|
```
|
||||||
|
|
||||||
|
Or for systemd-managed services, add `LimitNOFILE=65536` to the unit file.
|
||||||
|
|
||||||
|
> This applies to production deployments too — any server handling hundreds of concurrent connections needs a raised file descriptor limit.
|
||||||
|
|
||||||
# AI Disclosure
|
# AI Disclosure
|
||||||
|
|
||||||
This project has been made with lots, and I mean lots of help from AIs. While most of the design was made by me, most of the coding was done by AI models.
|
This project has been made with lots, and I mean lots of help from AIs. While most of the design was made by me, most of the coding was done by AI models.
|
||||||
|
|||||||
64
decnet.ini.example
Normal file
64
decnet.ini.example
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
; /etc/decnet/decnet.ini — DECNET host configuration
|
||||||
|
;
|
||||||
|
; Copy to /etc/decnet/decnet.ini and edit. Values here seed os.environ at
|
||||||
|
; CLI startup via setdefault() — real env vars still win, so you can
|
||||||
|
; override any value on the shell without editing this file.
|
||||||
|
;
|
||||||
|
; A missing file is fine; every daemon has sensible defaults. The main
|
||||||
|
; reason to use this file is to skip typing the same flags on every
|
||||||
|
; `decnet` invocation and to pin a host's role via `mode`.
|
||||||
|
|
||||||
|
[decnet]
|
||||||
|
; mode = agent | master
|
||||||
|
; agent — worker host (runs `decnet agent`, `decnet forwarder`, `decnet updater`).
|
||||||
|
; Master-only commands (api, swarmctl, swarm, deploy, teardown, ...)
|
||||||
|
; are hidden from `decnet --help` and refuse to run.
|
||||||
|
; master — central server (runs `decnet api`, `decnet web`, `decnet swarmctl`,
|
||||||
|
; `decnet listener`). All commands visible.
|
||||||
|
mode = agent
|
||||||
|
|
||||||
|
; disallow-master = true (default when mode=agent)
|
||||||
|
; Set to false for hybrid dev hosts that legitimately run both roles.
|
||||||
|
disallow-master = true
|
||||||
|
|
||||||
|
; log-directory — root for DECNET's per-component logs. Systemd units set
|
||||||
|
; DECNET_SYSTEM_LOGS=<log-directory>/decnet.<component>.log so agent, forwarder,
|
||||||
|
; and engine each get their own file. The forwarder tails decnet.log.
|
||||||
|
log-directory = /var/log/decnet
|
||||||
|
|
||||||
|
|
||||||
|
; ─── Agent-only settings (read when mode=agent) ───────────────────────────
|
||||||
|
[agent]
|
||||||
|
; Where the master's syslog-TLS listener lives. DECNET_SWARM_MASTER_HOST.
|
||||||
|
master-host = 192.168.1.50
|
||||||
|
; Master listener port (RFC 5425 default 6514). DECNET_SWARM_SYSLOG_PORT.
|
||||||
|
swarm-syslog-port = 6514
|
||||||
|
; Bind address/port for this worker's agent API (mTLS).
|
||||||
|
agent-port = 8765
|
||||||
|
; Cert bundle dir — must contain ca.crt, worker.crt, worker.key from enroll.
|
||||||
|
; DECNET_AGENT_DIR — honored by the forwarder child as well.
|
||||||
|
agent-dir = /home/anti/.decnet/agent
|
||||||
|
; Updater cert bundle (required for `decnet updater`).
|
||||||
|
updater-dir = /home/anti/.decnet/updater
|
||||||
|
|
||||||
|
|
||||||
|
; ─── Master-only settings (read when mode=master) ─────────────────────────
|
||||||
|
[master]
|
||||||
|
; Main API (REST for the React dashboard). DECNET_API_HOST / _PORT.
|
||||||
|
api-host = 0.0.0.0
|
||||||
|
api-port = 8000
|
||||||
|
; React dev-server dashboard (`decnet web`). DECNET_WEB_HOST / _PORT.
|
||||||
|
web-host = 0.0.0.0
|
||||||
|
web-port = 8080
|
||||||
|
; Swarm controller (master-internal). DECNET_SWARMCTL_HOST isn't exposed
|
||||||
|
; under that name today — this block is the forward-compatible spelling.
|
||||||
|
; swarmctl-host = 127.0.0.1
|
||||||
|
; swarmctl-port = 8770
|
||||||
|
; Syslog-over-TLS listener bind address and port. DECNET_LISTENER_HOST and
|
||||||
|
; DECNET_SWARM_SYSLOG_PORT. The listener is auto-spawned by `decnet swarmctl`.
|
||||||
|
listener-host = 0.0.0.0
|
||||||
|
swarm-syslog-port = 6514
|
||||||
|
; Master CA dir (for enroll / swarm cert issuance).
|
||||||
|
; ca-dir = /home/anti/.decnet/ca
|
||||||
|
; JWT secret for the web API. MUST be set; 32+ bytes. Keep out of git.
|
||||||
|
; jwt-secret = REPLACE_ME_WITH_A_32_BYTE_SECRET
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
"""DECNET — honeypot deception-network framework.
|
||||||
|
|
||||||
|
This __init__ runs once, on the first `import decnet.*`. It seeds
|
||||||
|
os.environ from /etc/decnet/decnet.ini (if present) so that later
|
||||||
|
module-level reads in decnet.env pick up the INI values as if they had
|
||||||
|
been exported by the shell. Real env vars always win via setdefault().
|
||||||
|
|
||||||
|
Kept minimal on purpose — any heavier work belongs in a submodule.
|
||||||
|
"""
|
||||||
|
from decnet.config_ini import load_ini_config as _load_ini_config
|
||||||
|
|
||||||
|
_load_ini_config()
|
||||||
|
|||||||
7
decnet/agent/__init__.py
Normal file
7
decnet/agent/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
"""DECNET worker agent — runs on every SWARM worker host.
|
||||||
|
|
||||||
|
Exposes an mTLS-protected FastAPI service the master's SWARM controller
|
||||||
|
calls to deploy, mutate, and tear down deckies locally. The agent reuses
|
||||||
|
the existing `decnet.engine.deployer` code path unchanged, so a worker runs
|
||||||
|
deckies the same way `decnet deploy --mode unihost` does today.
|
||||||
|
"""
|
||||||
144
decnet/agent/app.py
Normal file
144
decnet/agent/app.py
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
"""Worker-side FastAPI app.
|
||||||
|
|
||||||
|
Protected by mTLS at the ASGI/uvicorn transport layer: uvicorn is started
|
||||||
|
with ``--ssl-ca-certs`` + ``--ssl-cert-reqs 2`` (CERT_REQUIRED), so any
|
||||||
|
client that cannot prove a cert signed by the DECNET CA is rejected before
|
||||||
|
reaching a handler. Once past the TLS handshake, all peers are trusted
|
||||||
|
equally (the only entity holding a CA-signed cert is the master
|
||||||
|
controller).
|
||||||
|
|
||||||
|
Endpoints mirror the existing unihost CLI verbs:
|
||||||
|
|
||||||
|
* ``POST /deploy`` — body: serialized ``DecnetConfig``
|
||||||
|
* ``POST /teardown`` — body: optional ``{"decky_id": "..."}``
|
||||||
|
* ``POST /mutate`` — body: ``{"decky_id": "...", "services": [...]}``
|
||||||
|
* ``GET /status`` — deployment snapshot
|
||||||
|
* ``GET /health`` — liveness probe, does NOT require mTLS? No — mTLS
|
||||||
|
still required; master pings it with its cert.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from decnet.agent import executor as _exec
|
||||||
|
from decnet.agent import heartbeat as _heartbeat
|
||||||
|
from decnet.config import DecnetConfig
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
|
||||||
|
log = get_logger("agent.app")
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def _lifespan(app: FastAPI):
|
||||||
|
# Best-effort: if identity/bundle plumbing isn't configured (e.g. dev
|
||||||
|
# runs or non-enrolled hosts), heartbeat.start() is a silent no-op.
|
||||||
|
_heartbeat.start()
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
await _heartbeat.stop()
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="DECNET SWARM Agent",
|
||||||
|
version="0.1.0",
|
||||||
|
docs_url=None, # no interactive docs on worker — narrow attack surface
|
||||||
|
redoc_url=None,
|
||||||
|
openapi_url=None,
|
||||||
|
lifespan=_lifespan,
|
||||||
|
responses={
|
||||||
|
400: {"description": "Malformed request body"},
|
||||||
|
500: {"description": "Executor error"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ schemas
|
||||||
|
|
||||||
|
class DeployRequest(BaseModel):
|
||||||
|
config: DecnetConfig = Field(..., description="Full DecnetConfig to materialise on this worker")
|
||||||
|
dry_run: bool = False
|
||||||
|
no_cache: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class TeardownRequest(BaseModel):
|
||||||
|
decky_id: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class MutateRequest(BaseModel):
|
||||||
|
decky_id: str
|
||||||
|
services: list[str]
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ routes
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health() -> dict[str, str]:
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/status")
|
||||||
|
async def status() -> dict:
|
||||||
|
return await _exec.status()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/deploy",
|
||||||
|
responses={500: {"description": "Deployer raised an exception materialising the config"}},
|
||||||
|
)
|
||||||
|
async def deploy(req: DeployRequest) -> dict:
|
||||||
|
try:
|
||||||
|
await _exec.deploy(req.config, dry_run=req.dry_run, no_cache=req.no_cache)
|
||||||
|
except Exception as exc:
|
||||||
|
log.exception("agent.deploy failed")
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||||
|
return {"status": "deployed", "deckies": len(req.config.deckies)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/teardown",
|
||||||
|
responses={500: {"description": "Teardown raised an exception"}},
|
||||||
|
)
|
||||||
|
async def teardown(req: TeardownRequest) -> dict:
|
||||||
|
try:
|
||||||
|
await _exec.teardown(req.decky_id)
|
||||||
|
except Exception as exc:
|
||||||
|
log.exception("agent.teardown failed")
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||||
|
return {"status": "torn_down", "decky_id": req.decky_id}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/self-destruct",
|
||||||
|
responses={500: {"description": "Reaper could not be scheduled"}},
|
||||||
|
)
|
||||||
|
async def self_destruct() -> dict:
|
||||||
|
"""Stop all DECNET services on this worker and delete the install
|
||||||
|
footprint. Called by the master during decommission. Logs under
|
||||||
|
/var/log/decnet* are preserved. Fire-and-forget — returns 202 before
|
||||||
|
the reaper starts deleting files."""
|
||||||
|
try:
|
||||||
|
await _exec.self_destruct()
|
||||||
|
except Exception as exc:
|
||||||
|
log.exception("agent.self_destruct failed")
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||||
|
return {"status": "self_destruct_scheduled"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/mutate",
|
||||||
|
responses={501: {"description": "Worker-side mutate not yet implemented"}},
|
||||||
|
)
|
||||||
|
async def mutate(req: MutateRequest) -> dict:
|
||||||
|
# TODO: implement worker-side mutate. Currently the master performs
|
||||||
|
# mutation by re-sending a full /deploy with the updated DecnetConfig;
|
||||||
|
# this avoids duplicating mutation logic on the worker for v1. When
|
||||||
|
# ready, replace the 501 with a real redeploy-of-a-single-decky path.
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=501,
|
||||||
|
detail="Per-decky mutate is performed via /deploy with updated services",
|
||||||
|
)
|
||||||
223
decnet/agent/executor.py
Normal file
223
decnet/agent/executor.py
Normal file
@@ -0,0 +1,223 @@
|
|||||||
|
"""Thin adapter between the agent's HTTP endpoints and the existing
|
||||||
|
``decnet.engine.deployer`` code path.
|
||||||
|
|
||||||
|
Kept deliberately small: the agent does not re-implement deployment logic,
|
||||||
|
it only translates a master RPC into the same function calls the unihost
|
||||||
|
CLI already uses. Everything runs in a worker thread (the deployer is
|
||||||
|
blocking) so the FastAPI event loop stays responsive.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from ipaddress import IPv4Network
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from decnet.engine import deployer as _deployer
|
||||||
|
from decnet.config import DecnetConfig, load_state, clear_state
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.network import (
|
||||||
|
allocate_ips,
|
||||||
|
detect_interface,
|
||||||
|
detect_subnet,
|
||||||
|
get_host_ip,
|
||||||
|
)
|
||||||
|
|
||||||
|
log = get_logger("agent.executor")
|
||||||
|
|
||||||
|
|
||||||
|
def _relocalize(config: DecnetConfig) -> DecnetConfig:
|
||||||
|
"""Rewrite a master-built config to the worker's local network reality.
|
||||||
|
|
||||||
|
The master populates ``interface``/``subnet``/``gateway`` from its own
|
||||||
|
box before dispatching, which blows up the deployer on any worker whose
|
||||||
|
NIC name differs (common in heterogeneous fleets — master on ``wlp6s0``,
|
||||||
|
worker on ``enp0s3``). We always re-detect locally; if the worker sits
|
||||||
|
on a different subnet than the master, decky IPs are re-allocated from
|
||||||
|
the worker's subnet so they're actually reachable.
|
||||||
|
"""
|
||||||
|
local_iface = detect_interface()
|
||||||
|
local_subnet, local_gateway = detect_subnet(local_iface)
|
||||||
|
local_host_ip = get_host_ip(local_iface)
|
||||||
|
|
||||||
|
updates: dict[str, Any] = {
|
||||||
|
"interface": local_iface,
|
||||||
|
"subnet": local_subnet,
|
||||||
|
"gateway": local_gateway,
|
||||||
|
}
|
||||||
|
|
||||||
|
master_net = IPv4Network(config.subnet, strict=False) if config.subnet else None
|
||||||
|
local_net = IPv4Network(local_subnet, strict=False)
|
||||||
|
if master_net is None or master_net != local_net:
|
||||||
|
log.info(
|
||||||
|
"agent.deploy subnet mismatch master=%s local=%s — re-allocating decky IPs",
|
||||||
|
config.subnet, local_subnet,
|
||||||
|
)
|
||||||
|
fresh_ips = allocate_ips(
|
||||||
|
subnet=local_subnet,
|
||||||
|
gateway=local_gateway,
|
||||||
|
host_ip=local_host_ip,
|
||||||
|
count=len(config.deckies),
|
||||||
|
)
|
||||||
|
new_deckies = [d.model_copy(update={"ip": ip}) for d, ip in zip(config.deckies, fresh_ips)]
|
||||||
|
updates["deckies"] = new_deckies
|
||||||
|
|
||||||
|
return config.model_copy(update=updates)
|
||||||
|
|
||||||
|
|
||||||
|
async def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False) -> None:
|
||||||
|
"""Run the blocking deployer off-loop. The deployer itself calls
|
||||||
|
save_state() internally once the compose file is materialised."""
|
||||||
|
log.info(
|
||||||
|
"agent.deploy mode=%s deckies=%d interface=%s (incoming)",
|
||||||
|
config.mode, len(config.deckies), config.interface,
|
||||||
|
)
|
||||||
|
if config.mode == "swarm":
|
||||||
|
config = _relocalize(config)
|
||||||
|
log.info(
|
||||||
|
"agent.deploy relocalized interface=%s subnet=%s gateway=%s",
|
||||||
|
config.interface, config.subnet, config.gateway,
|
||||||
|
)
|
||||||
|
await asyncio.to_thread(_deployer.deploy, config, dry_run, no_cache, False)
|
||||||
|
|
||||||
|
|
||||||
|
async def teardown(decky_id: str | None = None) -> None:
|
||||||
|
log.info("agent.teardown decky_id=%s", decky_id)
|
||||||
|
await asyncio.to_thread(_deployer.teardown, decky_id)
|
||||||
|
if decky_id is None:
|
||||||
|
await asyncio.to_thread(clear_state)
|
||||||
|
|
||||||
|
|
||||||
|
def _decky_runtime_states(config: DecnetConfig) -> dict[str, dict[str, Any]]:
|
||||||
|
"""Map decky_name → {"running": bool, "services": {svc: container_state}}.
|
||||||
|
|
||||||
|
Queried so the master can tell, after a partial-failure deploy, which
|
||||||
|
deckies actually came up instead of tainting the whole shard as failed.
|
||||||
|
Best-effort: a docker error returns an empty map, not an exception.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import docker # local import — agent-only path
|
||||||
|
client = docker.from_env()
|
||||||
|
live = {c.name: c.status for c in client.containers.list(all=True, ignore_removed=True)}
|
||||||
|
except Exception: # pragma: no cover — defensive
|
||||||
|
log.exception("_decky_runtime_states: docker query failed")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
out: dict[str, dict[str, Any]] = {}
|
||||||
|
for d in config.deckies:
|
||||||
|
svc_states = {
|
||||||
|
svc: live.get(f"{d.name}-{svc.replace('_', '-')}", "absent")
|
||||||
|
for svc in d.services
|
||||||
|
}
|
||||||
|
out[d.name] = {
|
||||||
|
"running": bool(svc_states) and all(s == "running" for s in svc_states.values()),
|
||||||
|
"services": svc_states,
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
_REAPER_SCRIPT = r"""#!/bin/bash
|
||||||
|
# DECNET agent self-destruct reaper.
|
||||||
|
# Runs detached from the agent process so it survives the agent's death.
|
||||||
|
# Waits briefly for the HTTP response to drain, then stops services,
|
||||||
|
# wipes install paths, and preserves logs.
|
||||||
|
set +e
|
||||||
|
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
# Stop decky containers started by the local deployer (best-effort).
|
||||||
|
if command -v docker >/dev/null 2>&1; then
|
||||||
|
docker ps -q --filter "label=com.docker.compose.project=decnet" | xargs -r docker stop
|
||||||
|
docker ps -aq --filter "label=com.docker.compose.project=decnet" | xargs -r docker rm -f
|
||||||
|
docker network rm decnet_lan 2>/dev/null
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Stop+disable every systemd unit the installer may have dropped.
|
||||||
|
for unit in decnet-agent decnet-engine decnet-collector decnet-forwarder decnet-prober decnet-sniffer decnet-updater; do
|
||||||
|
systemctl stop "$unit" 2>/dev/null
|
||||||
|
systemctl disable "$unit" 2>/dev/null
|
||||||
|
done
|
||||||
|
|
||||||
|
# Nuke install paths. Logs under /var/log/decnet* are intentionally
|
||||||
|
# preserved — the operator typically wants them for forensic review.
|
||||||
|
rm -rf /opt/decnet* /var/lib/decnet/* /usr/local/bin/decnet* /etc/decnet
|
||||||
|
rm -f /etc/systemd/system/decnet-*.service /etc/systemd/system/decnet-*.timer
|
||||||
|
|
||||||
|
systemctl daemon-reload 2>/dev/null
|
||||||
|
rm -f "$0"
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def self_destruct() -> None:
|
||||||
|
"""Tear down deckies, then spawn a detached reaper that wipes the
|
||||||
|
install footprint. Returns immediately so the HTTP response can drain
|
||||||
|
before the reaper starts deleting files out from under the agent."""
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
# Best-effort teardown first — the reaper also runs docker stop, but
|
||||||
|
# going through the deployer gives the host-macvlan/ipvlan helper a
|
||||||
|
# chance to clean up routes cleanly.
|
||||||
|
try:
|
||||||
|
await asyncio.to_thread(_deployer.teardown, None)
|
||||||
|
await asyncio.to_thread(clear_state)
|
||||||
|
except Exception:
|
||||||
|
log.exception("self_destruct: pre-reap teardown failed — reaper will force-stop containers")
|
||||||
|
|
||||||
|
# Reaper lives under /tmp so it survives rm -rf /opt/decnet*.
|
||||||
|
fd, path = tempfile.mkstemp(prefix="decnet-reaper-", suffix=".sh", dir="/tmp") # nosec B108 — reaper must outlive /opt/decnet removal
|
||||||
|
try:
|
||||||
|
os.write(fd, _REAPER_SCRIPT.encode())
|
||||||
|
finally:
|
||||||
|
os.close(fd)
|
||||||
|
os.chmod(path, 0o700) # nosec B103 — root-owned reaper, needs exec
|
||||||
|
|
||||||
|
# The reaper MUST run outside decnet-agent.service's cgroup — otherwise
|
||||||
|
# `systemctl stop decnet-agent` SIGTERMs the whole cgroup (reaper included)
|
||||||
|
# before rm -rf completes. `start_new_session=True` gets us a fresh POSIX
|
||||||
|
# session but does NOT escape the systemd cgroup. So we prefer
|
||||||
|
# `systemd-run --scope` (launches the command in a transient scope
|
||||||
|
# detached from the caller's service), falling back to a bare Popen if
|
||||||
|
# systemd-run is unavailable (non-systemd host / container).
|
||||||
|
systemd_run = shutil.which("systemd-run")
|
||||||
|
if systemd_run:
|
||||||
|
argv = [
|
||||||
|
systemd_run,
|
||||||
|
"--collect",
|
||||||
|
"--unit", f"decnet-reaper-{os.getpid()}",
|
||||||
|
"--description", "DECNET agent self-destruct reaper",
|
||||||
|
"/bin/bash", path,
|
||||||
|
]
|
||||||
|
spawn_kwargs = {"start_new_session": True}
|
||||||
|
else:
|
||||||
|
argv = ["/bin/bash", path]
|
||||||
|
spawn_kwargs = {"start_new_session": True}
|
||||||
|
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
argv,
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
close_fds=True,
|
||||||
|
**spawn_kwargs,
|
||||||
|
)
|
||||||
|
log.warning(
|
||||||
|
"self_destruct: reaper spawned path=%s via=%s — agent will die in ~3s",
|
||||||
|
path, "systemd-run" if systemd_run else "popen",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def status() -> dict[str, Any]:
|
||||||
|
state = await asyncio.to_thread(load_state)
|
||||||
|
if state is None:
|
||||||
|
return {"deployed": False, "deckies": []}
|
||||||
|
config, _compose_path = state
|
||||||
|
runtime = await asyncio.to_thread(_decky_runtime_states, config)
|
||||||
|
return {
|
||||||
|
"deployed": True,
|
||||||
|
"mode": config.mode,
|
||||||
|
"compose_path": str(_compose_path),
|
||||||
|
"deckies": [d.model_dump() for d in config.deckies],
|
||||||
|
"runtime": runtime,
|
||||||
|
}
|
||||||
134
decnet/agent/heartbeat.py
Normal file
134
decnet/agent/heartbeat.py
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
"""Agent → master liveness heartbeat loop.
|
||||||
|
|
||||||
|
Every ``INTERVAL_S`` seconds the worker posts ``executor.status()`` to
|
||||||
|
``POST <master>/swarm/heartbeat`` over mTLS. The master pins the
|
||||||
|
presented client cert's SHA-256 against the ``SwarmHost`` row for the
|
||||||
|
claimed ``host_uuid``; a match refreshes ``last_heartbeat`` + each
|
||||||
|
``DeckyShard``'s snapshot + runtime state.
|
||||||
|
|
||||||
|
Identity comes from ``/etc/decnet/decnet.ini`` (seeded by the enroll
|
||||||
|
bundle) — specifically ``DECNET_HOST_UUID`` and ``DECNET_MASTER_HOST``.
|
||||||
|
The worker's existing ``~/.decnet/agent/`` bundle (or
|
||||||
|
``/etc/decnet/agent/``) provides the mTLS client cert.
|
||||||
|
|
||||||
|
Started/stopped via the agent FastAPI app's lifespan. If identity
|
||||||
|
plumbing is missing (pre-enrollment dev runs) the loop logs at DEBUG and
|
||||||
|
declines to start — callers don't have to guard it.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import pathlib
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from decnet.agent import executor as _exec
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm import pki
|
||||||
|
from decnet.swarm.log_forwarder import build_worker_ssl_context
|
||||||
|
|
||||||
|
log = get_logger("agent.heartbeat")
|
||||||
|
|
||||||
|
INTERVAL_S = 30.0
|
||||||
|
_TIMEOUT = httpx.Timeout(connect=5.0, read=10.0, write=5.0, pool=5.0)
|
||||||
|
|
||||||
|
_task: Optional[asyncio.Task] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_agent_dir() -> pathlib.Path:
|
||||||
|
"""Match the agent-dir resolution order used by the agent server:
|
||||||
|
DECNET_AGENT_DIR env, else /etc/decnet/agent (production install),
|
||||||
|
else ~/.decnet/agent (dev)."""
|
||||||
|
import os
|
||||||
|
env = os.environ.get("DECNET_AGENT_DIR")
|
||||||
|
if env:
|
||||||
|
return pathlib.Path(env)
|
||||||
|
system = pathlib.Path("/etc/decnet/agent")
|
||||||
|
if system.exists():
|
||||||
|
return system
|
||||||
|
return pki.DEFAULT_AGENT_DIR
|
||||||
|
|
||||||
|
|
||||||
|
async def _tick(client: httpx.AsyncClient, url: str, host_uuid: str, agent_version: str) -> None:
|
||||||
|
snap = await _exec.status()
|
||||||
|
resp = await client.post(
|
||||||
|
url,
|
||||||
|
json={
|
||||||
|
"host_uuid": host_uuid,
|
||||||
|
"agent_version": agent_version,
|
||||||
|
"status": snap,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
# 403 / 404 are terminal-ish — we still keep looping because an
|
||||||
|
# operator may re-enrol the host mid-session, but we log loudly so
|
||||||
|
# prod ops can spot cert-pinning drift.
|
||||||
|
if resp.status_code == 204:
|
||||||
|
return
|
||||||
|
log.warning(
|
||||||
|
"heartbeat rejected status=%d body=%s",
|
||||||
|
resp.status_code, resp.text[:200],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _loop(url: str, host_uuid: str, agent_version: str, ssl_ctx) -> None:
|
||||||
|
log.info("heartbeat loop starting url=%s host_uuid=%s interval=%ss",
|
||||||
|
url, host_uuid, INTERVAL_S)
|
||||||
|
async with httpx.AsyncClient(verify=ssl_ctx, timeout=_TIMEOUT) as client:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await _tick(client, url, host_uuid, agent_version)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
log.exception("heartbeat tick failed — will retry in %ss", INTERVAL_S)
|
||||||
|
await asyncio.sleep(INTERVAL_S)
|
||||||
|
|
||||||
|
|
||||||
|
def start() -> Optional[asyncio.Task]:
|
||||||
|
"""Kick off the background heartbeat task. No-op if identity is
|
||||||
|
unconfigured (dev mode) — the caller doesn't need to check."""
|
||||||
|
global _task
|
||||||
|
from decnet.env import (
|
||||||
|
DECNET_HOST_UUID,
|
||||||
|
DECNET_MASTER_HOST,
|
||||||
|
DECNET_SWARMCTL_PORT,
|
||||||
|
)
|
||||||
|
|
||||||
|
if _task is not None and not _task.done():
|
||||||
|
return _task
|
||||||
|
if not DECNET_HOST_UUID or not DECNET_MASTER_HOST:
|
||||||
|
log.debug("heartbeat not starting — DECNET_HOST_UUID or DECNET_MASTER_HOST unset")
|
||||||
|
return None
|
||||||
|
|
||||||
|
agent_dir = _resolve_agent_dir()
|
||||||
|
try:
|
||||||
|
ssl_ctx = build_worker_ssl_context(agent_dir)
|
||||||
|
except Exception:
|
||||||
|
log.exception("heartbeat not starting — worker SSL context unavailable at %s", agent_dir)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from decnet import __version__ as _v
|
||||||
|
agent_version = _v
|
||||||
|
except Exception:
|
||||||
|
agent_version = "unknown"
|
||||||
|
|
||||||
|
url = f"https://{DECNET_MASTER_HOST}:{DECNET_SWARMCTL_PORT}/swarm/heartbeat"
|
||||||
|
_task = asyncio.create_task(
|
||||||
|
_loop(url, DECNET_HOST_UUID, agent_version, ssl_ctx),
|
||||||
|
name="agent-heartbeat",
|
||||||
|
)
|
||||||
|
return _task
|
||||||
|
|
||||||
|
|
||||||
|
async def stop() -> None:
|
||||||
|
global _task
|
||||||
|
if _task is None:
|
||||||
|
return
|
||||||
|
_task.cancel()
|
||||||
|
try:
|
||||||
|
await _task
|
||||||
|
except (asyncio.CancelledError, Exception):
|
||||||
|
pass
|
||||||
|
_task = None
|
||||||
70
decnet/agent/server.py
Normal file
70
decnet/agent/server.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""Worker-agent uvicorn launcher.
|
||||||
|
|
||||||
|
Starts ``decnet.agent.app:app`` over HTTPS with mTLS enforcement. The
|
||||||
|
worker must already have a bundle in ``~/.decnet/agent/`` (delivered by
|
||||||
|
``decnet swarm enroll`` from the master); if it does not, we refuse to
|
||||||
|
start — unauthenticated agents are not a supported mode.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import signal
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm import pki
|
||||||
|
|
||||||
|
log = get_logger("agent.server")
|
||||||
|
|
||||||
|
|
||||||
|
def run(host: str, port: int, agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR) -> int:
|
||||||
|
bundle = pki.load_worker_bundle(agent_dir)
|
||||||
|
if bundle is None:
|
||||||
|
print(
|
||||||
|
f"[agent] No cert bundle at {agent_dir}. "
|
||||||
|
f"Run `decnet swarm enroll` from the master first.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
keyfile = agent_dir / "worker.key"
|
||||||
|
certfile = agent_dir / "worker.crt"
|
||||||
|
cafile = agent_dir / "ca.crt"
|
||||||
|
|
||||||
|
cmd = [
|
||||||
|
sys.executable,
|
||||||
|
"-m",
|
||||||
|
"uvicorn",
|
||||||
|
"decnet.agent.app:app",
|
||||||
|
"--host",
|
||||||
|
host,
|
||||||
|
"--port",
|
||||||
|
str(port),
|
||||||
|
"--ssl-keyfile",
|
||||||
|
str(keyfile),
|
||||||
|
"--ssl-certfile",
|
||||||
|
str(certfile),
|
||||||
|
"--ssl-ca-certs",
|
||||||
|
str(cafile),
|
||||||
|
# 2 == ssl.CERT_REQUIRED — clients MUST present a CA-signed cert.
|
||||||
|
"--ssl-cert-reqs",
|
||||||
|
"2",
|
||||||
|
]
|
||||||
|
log.info("agent starting host=%s port=%d bundle=%s", host, port, agent_dir)
|
||||||
|
# Own process group for clean Ctrl+C / SIGTERM propagation to uvicorn
|
||||||
|
# workers (same pattern as `decnet api`).
|
||||||
|
proc = subprocess.Popen(cmd, start_new_session=True) # nosec B603
|
||||||
|
try:
|
||||||
|
return proc.wait()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
try:
|
||||||
|
os.killpg(proc.pid, signal.SIGTERM)
|
||||||
|
try:
|
||||||
|
return proc.wait(timeout=10)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
os.killpg(proc.pid, signal.SIGKILL)
|
||||||
|
return proc.wait()
|
||||||
|
except ProcessLookupError:
|
||||||
|
return 0
|
||||||
972
decnet/cli.py
972
decnet/cli.py
@@ -1,972 +0,0 @@
|
|||||||
"""
|
|
||||||
DECNET CLI — entry point for all commands.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
decnet deploy --mode unihost --deckies 5 --randomize-services
|
|
||||||
decnet status
|
|
||||||
decnet teardown [--all | --id decky-01]
|
|
||||||
decnet services
|
|
||||||
"""
|
|
||||||
|
|
||||||
import signal
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import typer
|
|
||||||
from rich.console import Console
|
|
||||||
from rich.table import Table
|
|
||||||
|
|
||||||
from decnet.logging import get_logger
|
|
||||||
from decnet.env import (
|
|
||||||
DECNET_API_HOST,
|
|
||||||
DECNET_API_PORT,
|
|
||||||
DECNET_INGEST_LOG_FILE,
|
|
||||||
DECNET_WEB_HOST,
|
|
||||||
DECNET_WEB_PORT,
|
|
||||||
)
|
|
||||||
from decnet.archetypes import Archetype, all_archetypes, get_archetype
|
|
||||||
from decnet.config import (
|
|
||||||
DecnetConfig,
|
|
||||||
)
|
|
||||||
from decnet.distros import all_distros, get_distro
|
|
||||||
from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini
|
|
||||||
from decnet.ini_loader import load_ini
|
|
||||||
from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
|
|
||||||
from decnet.services.registry import all_services
|
|
||||||
|
|
||||||
log = get_logger("cli")
|
|
||||||
|
|
||||||
|
|
||||||
def _daemonize() -> None:
|
|
||||||
"""Fork the current process into a background daemon (Unix double-fork)."""
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
if os.fork() > 0:
|
|
||||||
raise SystemExit(0)
|
|
||||||
os.setsid()
|
|
||||||
if os.fork() > 0:
|
|
||||||
raise SystemExit(0)
|
|
||||||
sys.stdout = open(os.devnull, "w") # noqa: SIM115
|
|
||||||
sys.stderr = open(os.devnull, "w") # noqa: SIM115
|
|
||||||
sys.stdin = open(os.devnull, "r") # noqa: SIM115
|
|
||||||
|
|
||||||
|
|
||||||
app = typer.Typer(
|
|
||||||
name="decnet",
|
|
||||||
help="Deploy a deception network of honeypot deckies on your LAN.",
|
|
||||||
no_args_is_help=True,
|
|
||||||
)
|
|
||||||
console = Console()
|
|
||||||
|
|
||||||
|
|
||||||
def _kill_all_services() -> None:
|
|
||||||
"""Find and kill all running DECNET microservice processes."""
|
|
||||||
import os
|
|
||||||
|
|
||||||
registry = _service_registry(str(DECNET_INGEST_LOG_FILE))
|
|
||||||
killed = 0
|
|
||||||
for name, match_fn, _launch_args in registry:
|
|
||||||
pid = _is_running(match_fn)
|
|
||||||
if pid is not None:
|
|
||||||
console.print(f"[yellow]Stopping {name} (PID {pid})...[/]")
|
|
||||||
os.kill(pid, signal.SIGTERM)
|
|
||||||
killed += 1
|
|
||||||
|
|
||||||
if killed:
|
|
||||||
console.print(f"[green]{killed} background process(es) stopped.[/]")
|
|
||||||
else:
|
|
||||||
console.print("[dim]No DECNET services were running.[/]")
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def api(
|
|
||||||
port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"),
|
|
||||||
host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"),
|
|
||||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"),
|
|
||||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
|
||||||
) -> None:
|
|
||||||
"""Run the DECNET API and Web Dashboard in standalone mode."""
|
|
||||||
import subprocess # nosec B404
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
if daemon:
|
|
||||||
log.info("API daemonizing host=%s port=%d", host, port)
|
|
||||||
_daemonize()
|
|
||||||
|
|
||||||
log.info("API command invoked host=%s port=%d", host, port)
|
|
||||||
console.print(f"[green]Starting DECNET API on {host}:{port}...[/]")
|
|
||||||
_env: dict[str, str] = os.environ.copy()
|
|
||||||
_env["DECNET_INGEST_LOG_FILE"] = str(log_file)
|
|
||||||
try:
|
|
||||||
subprocess.run( # nosec B603 B404
|
|
||||||
[sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", host, "--port", str(port)],
|
|
||||||
env=_env
|
|
||||||
)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
pass
|
|
||||||
except (FileNotFoundError, subprocess.SubprocessError):
|
|
||||||
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def deploy(
|
|
||||||
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
|
|
||||||
deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
|
|
||||||
interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
|
|
||||||
subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
|
|
||||||
ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
|
|
||||||
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
|
|
||||||
randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
|
|
||||||
distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
|
|
||||||
randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
|
|
||||||
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
|
|
||||||
archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
|
|
||||||
mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
|
|
||||||
dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
|
|
||||||
no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
|
|
||||||
parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"),
|
|
||||||
ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
|
|
||||||
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
|
|
||||||
api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"),
|
|
||||||
api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"),
|
|
||||||
daemon: bool = typer.Option(False, "--daemon", help="Detach to background as a daemon process"),
|
|
||||||
) -> None:
|
|
||||||
"""Deploy deckies to the LAN."""
|
|
||||||
import os
|
|
||||||
|
|
||||||
if daemon:
|
|
||||||
log.info("deploy daemonizing mode=%s deckies=%s", mode, deckies)
|
|
||||||
_daemonize()
|
|
||||||
|
|
||||||
log.info("deploy command invoked mode=%s deckies=%s dry_run=%s", mode, deckies, dry_run)
|
|
||||||
if mode not in ("unihost", "swarm"):
|
|
||||||
console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------ #
|
|
||||||
# Config-file path #
|
|
||||||
# ------------------------------------------------------------------ #
|
|
||||||
if config_file:
|
|
||||||
try:
|
|
||||||
ini = load_ini(config_file)
|
|
||||||
except FileNotFoundError as e:
|
|
||||||
console.print(f"[red]{e}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
iface = interface or ini.interface or detect_interface()
|
|
||||||
subnet_cidr = subnet or ini.subnet
|
|
||||||
effective_gateway = ini.gateway
|
|
||||||
if subnet_cidr is None:
|
|
||||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
|
||||||
elif effective_gateway is None:
|
|
||||||
_, effective_gateway = detect_subnet(iface)
|
|
||||||
|
|
||||||
host_ip = get_host_ip(iface)
|
|
||||||
console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} "
|
|
||||||
f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} "
|
|
||||||
f"[dim]Host IP:[/] {host_ip}")
|
|
||||||
|
|
||||||
if ini.custom_services:
|
|
||||||
from decnet.custom_service import CustomService
|
|
||||||
from decnet.services.registry import register_custom_service
|
|
||||||
for cs in ini.custom_services:
|
|
||||||
register_custom_service(
|
|
||||||
CustomService(
|
|
||||||
name=cs.name,
|
|
||||||
image=cs.image,
|
|
||||||
exec_cmd=cs.exec_cmd,
|
|
||||||
ports=cs.ports,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
effective_log_file = log_file
|
|
||||||
try:
|
|
||||||
decky_configs = build_deckies_from_ini(
|
|
||||||
ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval
|
|
||||||
)
|
|
||||||
except ValueError as e:
|
|
||||||
console.print(f"[red]{e}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
# ------------------------------------------------------------------ #
|
|
||||||
# Classic CLI path #
|
|
||||||
# ------------------------------------------------------------------ #
|
|
||||||
else:
|
|
||||||
if deckies is None:
|
|
||||||
console.print("[red]--deckies is required when --config is not used.[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
services_list = [s.strip() for s in services.split(",")] if services else None
|
|
||||||
if services_list:
|
|
||||||
known = set(all_service_names())
|
|
||||||
unknown = [s for s in services_list if s not in known]
|
|
||||||
if unknown:
|
|
||||||
console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
arch: Archetype | None = None
|
|
||||||
if archetype_name:
|
|
||||||
try:
|
|
||||||
arch = get_archetype(archetype_name)
|
|
||||||
except ValueError as e:
|
|
||||||
console.print(f"[red]{e}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
if not services_list and not randomize_services and not arch:
|
|
||||||
console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
iface = interface or detect_interface()
|
|
||||||
if subnet is None:
|
|
||||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
|
||||||
else:
|
|
||||||
subnet_cidr = subnet
|
|
||||||
_, effective_gateway = detect_subnet(iface)
|
|
||||||
|
|
||||||
host_ip = get_host_ip(iface)
|
|
||||||
console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} "
|
|
||||||
f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}")
|
|
||||||
|
|
||||||
distros_list = [d.strip() for d in distro.split(",")] if distro else None
|
|
||||||
if distros_list:
|
|
||||||
try:
|
|
||||||
for slug in distros_list:
|
|
||||||
get_distro(slug)
|
|
||||||
except ValueError as e:
|
|
||||||
console.print(f"[red]{e}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
|
|
||||||
decky_configs = build_deckies(
|
|
||||||
deckies, ips, services_list, randomize_services,
|
|
||||||
distros_explicit=distros_list, randomize_distros=randomize_distros,
|
|
||||||
archetype=arch, mutate_interval=mutate_interval,
|
|
||||||
)
|
|
||||||
effective_log_file = log_file
|
|
||||||
|
|
||||||
if api and not effective_log_file:
|
|
||||||
effective_log_file = os.path.join(os.getcwd(), "decnet.log")
|
|
||||||
console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]")
|
|
||||||
|
|
||||||
config = DecnetConfig(
|
|
||||||
mode=mode,
|
|
||||||
interface=iface,
|
|
||||||
subnet=subnet_cidr,
|
|
||||||
gateway=effective_gateway,
|
|
||||||
deckies=decky_configs,
|
|
||||||
log_file=effective_log_file,
|
|
||||||
ipvlan=ipvlan,
|
|
||||||
mutate_interval=mutate_interval,
|
|
||||||
)
|
|
||||||
|
|
||||||
log.debug("deploy: config built deckies=%d interface=%s subnet=%s", len(config.deckies), config.interface, config.subnet)
|
|
||||||
from decnet.engine import deploy as _deploy
|
|
||||||
_deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel)
|
|
||||||
if dry_run:
|
|
||||||
log.info("deploy: dry-run complete, no containers started")
|
|
||||||
else:
|
|
||||||
log.info("deploy: deployment complete deckies=%d", len(config.deckies))
|
|
||||||
|
|
||||||
if mutate_interval is not None and not dry_run:
|
|
||||||
import subprocess # nosec B404
|
|
||||||
import sys
|
|
||||||
console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]")
|
|
||||||
try:
|
|
||||||
subprocess.Popen( # nosec B603
|
|
||||||
[sys.executable, "-m", "decnet.cli", "mutate", "--watch"],
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
stderr=subprocess.STDOUT,
|
|
||||||
start_new_session=True,
|
|
||||||
)
|
|
||||||
except (FileNotFoundError, subprocess.SubprocessError):
|
|
||||||
console.print("[red]Failed to start mutator watcher.[/]")
|
|
||||||
|
|
||||||
if effective_log_file and not dry_run and not api:
|
|
||||||
import subprocess # nosec B404
|
|
||||||
import sys
|
|
||||||
from pathlib import Path as _Path
|
|
||||||
_collector_err = _Path(effective_log_file).with_suffix(".collector.log")
|
|
||||||
console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
|
|
||||||
subprocess.Popen( # nosec B603
|
|
||||||
[sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
|
|
||||||
stdin=subprocess.DEVNULL,
|
|
||||||
stdout=open(_collector_err, "a"),
|
|
||||||
stderr=subprocess.STDOUT,
|
|
||||||
start_new_session=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if api and not dry_run:
|
|
||||||
import subprocess # nosec B404
|
|
||||||
import sys
|
|
||||||
console.print(f"[green]Starting DECNET API on port {api_port}...[/]")
|
|
||||||
_env: dict[str, str] = os.environ.copy()
|
|
||||||
_env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "")
|
|
||||||
try:
|
|
||||||
subprocess.Popen( # nosec B603
|
|
||||||
[sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)],
|
|
||||||
env=_env,
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
stderr=subprocess.STDOUT
|
|
||||||
)
|
|
||||||
console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]")
|
|
||||||
except (FileNotFoundError, subprocess.SubprocessError):
|
|
||||||
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
|
||||||
|
|
||||||
if effective_log_file and not dry_run:
|
|
||||||
import subprocess # nosec B404
|
|
||||||
import sys
|
|
||||||
console.print("[bold cyan]Starting DECNET-PROBER[/] (auto-discovers attackers from log stream)")
|
|
||||||
try:
|
|
||||||
_prober_args = [
|
|
||||||
sys.executable, "-m", "decnet.cli", "probe",
|
|
||||||
"--daemon",
|
|
||||||
"--log-file", str(effective_log_file),
|
|
||||||
]
|
|
||||||
subprocess.Popen( # nosec B603
|
|
||||||
_prober_args,
|
|
||||||
stdin=subprocess.DEVNULL,
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
stderr=subprocess.STDOUT,
|
|
||||||
start_new_session=True,
|
|
||||||
)
|
|
||||||
except (FileNotFoundError, subprocess.SubprocessError):
|
|
||||||
console.print("[red]Failed to start DECNET-PROBER.[/]")
|
|
||||||
|
|
||||||
if effective_log_file and not dry_run:
|
|
||||||
import subprocess # nosec B404
|
|
||||||
import sys
|
|
||||||
console.print("[bold cyan]Starting DECNET-PROFILER[/] (builds attacker profiles from log stream)")
|
|
||||||
try:
|
|
||||||
subprocess.Popen( # nosec B603
|
|
||||||
[sys.executable, "-m", "decnet.cli", "profiler", "--daemon"],
|
|
||||||
stdin=subprocess.DEVNULL,
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
stderr=subprocess.STDOUT,
|
|
||||||
start_new_session=True,
|
|
||||||
)
|
|
||||||
except (FileNotFoundError, subprocess.SubprocessError):
|
|
||||||
console.print("[red]Failed to start DECNET-PROFILER.[/]")
|
|
||||||
|
|
||||||
if effective_log_file and not dry_run:
|
|
||||||
import subprocess # nosec B404
|
|
||||||
import sys
|
|
||||||
console.print("[bold cyan]Starting DECNET-SNIFFER[/] (passive network capture)")
|
|
||||||
try:
|
|
||||||
subprocess.Popen( # nosec B603
|
|
||||||
[sys.executable, "-m", "decnet.cli", "sniffer",
|
|
||||||
"--daemon",
|
|
||||||
"--log-file", str(effective_log_file)],
|
|
||||||
stdin=subprocess.DEVNULL,
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
stderr=subprocess.STDOUT,
|
|
||||||
start_new_session=True,
|
|
||||||
)
|
|
||||||
except (FileNotFoundError, subprocess.SubprocessError):
|
|
||||||
console.print("[red]Failed to start DECNET-SNIFFER.[/]")
|
|
||||||
|
|
||||||
|
|
||||||
def _is_running(match_fn) -> int | None:
|
|
||||||
"""Return PID of a running DECNET process matching ``match_fn(cmdline)``, or None."""
|
|
||||||
import psutil
|
|
||||||
|
|
||||||
for proc in psutil.process_iter(["pid", "cmdline"]):
|
|
||||||
try:
|
|
||||||
cmd = proc.info["cmdline"]
|
|
||||||
if cmd and match_fn(cmd):
|
|
||||||
return proc.info["pid"]
|
|
||||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
||||||
continue
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
# Each entry: (display_name, detection_fn, launch_args_fn)
|
|
||||||
# launch_args_fn receives log_file and returns the Popen argv list.
|
|
||||||
def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
|
|
||||||
"""Return the microservice registry for health-check and relaunch."""
|
|
||||||
import sys
|
|
||||||
|
|
||||||
_py = sys.executable
|
|
||||||
return [
|
|
||||||
(
|
|
||||||
"Collector",
|
|
||||||
lambda cmd: "decnet.cli" in cmd and "collect" in cmd,
|
|
||||||
[_py, "-m", "decnet.cli", "collect", "--daemon", "--log-file", log_file],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Mutator",
|
|
||||||
lambda cmd: "decnet.cli" in cmd and "mutate" in cmd and "--watch" in cmd,
|
|
||||||
[_py, "-m", "decnet.cli", "mutate", "--daemon", "--watch"],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Prober",
|
|
||||||
lambda cmd: "decnet.cli" in cmd and "probe" in cmd,
|
|
||||||
[_py, "-m", "decnet.cli", "probe", "--daemon", "--log-file", log_file],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Profiler",
|
|
||||||
lambda cmd: "decnet.cli" in cmd and "profiler" in cmd,
|
|
||||||
[_py, "-m", "decnet.cli", "profiler", "--daemon"],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Sniffer",
|
|
||||||
lambda cmd: "decnet.cli" in cmd and "sniffer" in cmd,
|
|
||||||
[_py, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", log_file],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"API",
|
|
||||||
lambda cmd: "uvicorn" in cmd and "decnet.web.api:app" in cmd,
|
|
||||||
[_py, "-m", "uvicorn", "decnet.web.api:app",
|
|
||||||
"--host", DECNET_API_HOST, "--port", str(DECNET_API_PORT)],
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def redeploy(
|
|
||||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to the DECNET log file"),
|
|
||||||
) -> None:
|
|
||||||
"""Check running DECNET services and relaunch any that are down."""
|
|
||||||
import subprocess # nosec B404
|
|
||||||
|
|
||||||
log.info("redeploy: checking services")
|
|
||||||
registry = _service_registry(str(log_file))
|
|
||||||
|
|
||||||
table = Table(title="DECNET Services", show_lines=True)
|
|
||||||
table.add_column("Service", style="bold cyan")
|
|
||||||
table.add_column("Status")
|
|
||||||
table.add_column("PID", style="dim")
|
|
||||||
table.add_column("Action")
|
|
||||||
|
|
||||||
relaunched = 0
|
|
||||||
for name, match_fn, launch_args in registry:
|
|
||||||
pid = _is_running(match_fn)
|
|
||||||
if pid is not None:
|
|
||||||
table.add_row(name, "[green]UP[/]", str(pid), "—")
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
subprocess.Popen( # nosec B603
|
|
||||||
launch_args,
|
|
||||||
stdin=subprocess.DEVNULL,
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
stderr=subprocess.STDOUT,
|
|
||||||
start_new_session=True,
|
|
||||||
)
|
|
||||||
table.add_row(name, "[red]DOWN[/]", "—", "[green]relaunched[/]")
|
|
||||||
relaunched += 1
|
|
||||||
except (FileNotFoundError, subprocess.SubprocessError) as exc:
|
|
||||||
table.add_row(name, "[red]DOWN[/]", "—", f"[red]failed: {exc}[/]")
|
|
||||||
|
|
||||||
console.print(table)
|
|
||||||
if relaunched:
|
|
||||||
console.print(f"[green]{relaunched} service(s) relaunched.[/]")
|
|
||||||
else:
|
|
||||||
console.print("[green]All services running.[/]")
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def probe(
|
|
||||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path for RFC 5424 syslog + .json output (reads attackers from .json, writes results to both)"),
|
|
||||||
interval: int = typer.Option(300, "--interval", "-i", help="Seconds between probe cycles (default: 300)"),
|
|
||||||
timeout: float = typer.Option(5.0, "--timeout", help="Per-probe TCP timeout in seconds"),
|
|
||||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background (used by deploy, no console output)"),
|
|
||||||
) -> None:
|
|
||||||
"""Fingerprint attackers (JARM + HASSH + TCP/IP stack) discovered in the log stream."""
|
|
||||||
import asyncio
|
|
||||||
from decnet.prober import prober_worker
|
|
||||||
|
|
||||||
if daemon:
|
|
||||||
log.info("probe daemonizing log_file=%s interval=%d", log_file, interval)
|
|
||||||
_daemonize()
|
|
||||||
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
|
|
||||||
return
|
|
||||||
|
|
||||||
else:
|
|
||||||
log.info("probe command invoked log_file=%s interval=%d", log_file, interval)
|
|
||||||
console.print(f"[bold cyan]DECNET-PROBER[/] watching {log_file} for attackers (interval: {interval}s)")
|
|
||||||
console.print("[dim]Press Ctrl+C to stop[/]")
|
|
||||||
try:
|
|
||||||
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
console.print("\n[yellow]DECNET-PROBER stopped.[/]")
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def collect(
|
|
||||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"),
|
|
||||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
|
||||||
) -> None:
|
|
||||||
"""Stream Docker logs from all running decky service containers to a log file."""
|
|
||||||
import asyncio
|
|
||||||
from decnet.collector import log_collector_worker
|
|
||||||
|
|
||||||
if daemon:
|
|
||||||
log.info("collect daemonizing log_file=%s", log_file)
|
|
||||||
_daemonize()
|
|
||||||
|
|
||||||
log.info("collect command invoked log_file=%s", log_file)
|
|
||||||
console.print(f"[bold cyan]Collector starting[/] → {log_file}")
|
|
||||||
asyncio.run(log_collector_worker(log_file))
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def mutate(
|
|
||||||
watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"),
|
|
||||||
decky_name: Optional[str] = typer.Option(None, "--decky", help="Force mutate a specific decky immediately"),
|
|
||||||
force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"),
|
|
||||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
|
||||||
) -> None:
|
|
||||||
"""Manually trigger or continuously watch for decky mutation."""
|
|
||||||
import asyncio
|
|
||||||
from decnet.mutator import mutate_decky, mutate_all, run_watch_loop
|
|
||||||
from decnet.web.dependencies import repo
|
|
||||||
|
|
||||||
if daemon:
|
|
||||||
log.info("mutate daemonizing watch=%s", watch)
|
|
||||||
_daemonize()
|
|
||||||
|
|
||||||
async def _run() -> None:
|
|
||||||
await repo.initialize()
|
|
||||||
if watch:
|
|
||||||
await run_watch_loop(repo)
|
|
||||||
elif decky_name:
|
|
||||||
await mutate_decky(decky_name, repo)
|
|
||||||
elif force_all:
|
|
||||||
await mutate_all(force=True, repo=repo)
|
|
||||||
else:
|
|
||||||
await mutate_all(force=False, repo=repo)
|
|
||||||
|
|
||||||
asyncio.run(_run())
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def status() -> None:
|
|
||||||
"""Show running deckies and their status."""
|
|
||||||
log.info("status command invoked")
|
|
||||||
from decnet.engine import status as _status
|
|
||||||
_status()
|
|
||||||
|
|
||||||
registry = _service_registry(str(DECNET_INGEST_LOG_FILE))
|
|
||||||
svc_table = Table(title="DECNET Services", show_lines=True)
|
|
||||||
svc_table.add_column("Service", style="bold cyan")
|
|
||||||
svc_table.add_column("Status")
|
|
||||||
svc_table.add_column("PID", style="dim")
|
|
||||||
|
|
||||||
for name, match_fn, _launch_args in registry:
|
|
||||||
pid = _is_running(match_fn)
|
|
||||||
if pid is not None:
|
|
||||||
svc_table.add_row(name, "[green]UP[/]", str(pid))
|
|
||||||
else:
|
|
||||||
svc_table.add_row(name, "[red]DOWN[/]", "—")
|
|
||||||
|
|
||||||
console.print(svc_table)
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def teardown(
|
|
||||||
all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
|
|
||||||
id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
|
|
||||||
) -> None:
|
|
||||||
"""Stop and remove deckies."""
|
|
||||||
if not all_ and not id_:
|
|
||||||
console.print("[red]Specify --all or --id <name>.[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
log.info("teardown command invoked all=%s id=%s", all_, id_)
|
|
||||||
from decnet.engine import teardown as _teardown
|
|
||||||
_teardown(decky_id=id_)
|
|
||||||
log.info("teardown complete all=%s id=%s", all_, id_)
|
|
||||||
|
|
||||||
if all_:
|
|
||||||
_kill_all_services()
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="services")
|
|
||||||
def list_services() -> None:
|
|
||||||
"""List all registered honeypot service plugins."""
|
|
||||||
svcs = all_services()
|
|
||||||
table = Table(title="Available Services", show_lines=True)
|
|
||||||
table.add_column("Name", style="bold cyan")
|
|
||||||
table.add_column("Ports")
|
|
||||||
table.add_column("Image")
|
|
||||||
for name, svc in sorted(svcs.items()):
|
|
||||||
table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
|
|
||||||
console.print(table)
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="distros")
|
|
||||||
def list_distros() -> None:
|
|
||||||
"""List all available OS distro profiles for deckies."""
|
|
||||||
table = Table(title="Available Distro Profiles", show_lines=True)
|
|
||||||
table.add_column("Slug", style="bold cyan")
|
|
||||||
table.add_column("Display Name")
|
|
||||||
table.add_column("Docker Image", style="dim")
|
|
||||||
for slug, profile in sorted(all_distros().items()):
|
|
||||||
table.add_row(slug, profile.display_name, profile.image)
|
|
||||||
console.print(table)
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="correlate")
|
|
||||||
def correlate(
|
|
||||||
log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"),
|
|
||||||
min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"),
|
|
||||||
output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"),
|
|
||||||
emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"),
|
|
||||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
|
||||||
) -> None:
|
|
||||||
"""Analyse logs for cross-decky traversals and print the attacker movement graph."""
|
|
||||||
import sys
|
|
||||||
import json as _json
|
|
||||||
from pathlib import Path
|
|
||||||
from decnet.correlation.engine import CorrelationEngine
|
|
||||||
|
|
||||||
if daemon:
|
|
||||||
log.info("correlate daemonizing log_file=%s", log_file)
|
|
||||||
_daemonize()
|
|
||||||
|
|
||||||
engine = CorrelationEngine()
|
|
||||||
|
|
||||||
if log_file:
|
|
||||||
path = Path(log_file)
|
|
||||||
if not path.exists():
|
|
||||||
console.print(f"[red]Log file not found: {log_file}[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
engine.ingest_file(path)
|
|
||||||
elif not sys.stdin.isatty():
|
|
||||||
for line in sys.stdin:
|
|
||||||
engine.ingest(line)
|
|
||||||
else:
|
|
||||||
console.print("[red]Provide --log-file or pipe log data via stdin.[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
traversals = engine.traversals(min_deckies)
|
|
||||||
|
|
||||||
if output == "json":
|
|
||||||
console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2))
|
|
||||||
elif output == "syslog":
|
|
||||||
for line in engine.traversal_syslog_lines(min_deckies):
|
|
||||||
typer.echo(line)
|
|
||||||
else:
|
|
||||||
if not traversals:
|
|
||||||
console.print(
|
|
||||||
f"[yellow]No traversals detected "
|
|
||||||
f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
console.print(engine.report_table(min_deckies))
|
|
||||||
console.print(
|
|
||||||
f"[dim]Parsed {engine.lines_parsed} lines · "
|
|
||||||
f"indexed {engine.events_indexed} events · "
|
|
||||||
f"{len(engine.all_attackers())} unique IPs · "
|
|
||||||
f"[bold]{len(traversals)}[/] traversal(s)[/]"
|
|
||||||
)
|
|
||||||
|
|
||||||
if emit_syslog:
|
|
||||||
for line in engine.traversal_syslog_lines(min_deckies):
|
|
||||||
typer.echo(line)
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="archetypes")
|
|
||||||
def list_archetypes() -> None:
|
|
||||||
"""List all machine archetype profiles."""
|
|
||||||
table = Table(title="Machine Archetypes", show_lines=True)
|
|
||||||
table.add_column("Slug", style="bold cyan")
|
|
||||||
table.add_column("Display Name")
|
|
||||||
table.add_column("Default Services", style="green")
|
|
||||||
table.add_column("Description", style="dim")
|
|
||||||
for slug, arch in sorted(all_archetypes().items()):
|
|
||||||
table.add_row(
|
|
||||||
slug,
|
|
||||||
arch.display_name,
|
|
||||||
", ".join(arch.services),
|
|
||||||
arch.description,
|
|
||||||
)
|
|
||||||
console.print(table)
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="web")
|
|
||||||
def serve_web(
|
|
||||||
web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"),
|
|
||||||
host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"),
|
|
||||||
api_port: int = typer.Option(DECNET_API_PORT, "--api-port", help="Port the DECNET API is listening on"),
|
|
||||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
|
||||||
) -> None:
|
|
||||||
"""Serve the DECNET Web Dashboard frontend.
|
|
||||||
|
|
||||||
Proxies /api/* requests to the API server so the frontend can use
|
|
||||||
relative URLs (/api/v1/...) with no CORS configuration required.
|
|
||||||
"""
|
|
||||||
import http.client
|
|
||||||
import http.server
|
|
||||||
import socketserver
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
dist_dir = Path(__file__).parent.parent / "decnet_web" / "dist"
|
|
||||||
|
|
||||||
if not dist_dir.exists():
|
|
||||||
console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
if daemon:
|
|
||||||
log.info("web daemonizing host=%s port=%d api_port=%d", host, web_port, api_port)
|
|
||||||
_daemonize()
|
|
||||||
|
|
||||||
_api_port = api_port
|
|
||||||
|
|
||||||
class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|
||||||
def do_GET(self):
|
|
||||||
if self.path.startswith("/api/"):
|
|
||||||
self._proxy("GET")
|
|
||||||
return
|
|
||||||
path = self.translate_path(self.path)
|
|
||||||
if not Path(path).exists() or Path(path).is_dir():
|
|
||||||
self.path = "/index.html"
|
|
||||||
return super().do_GET()
|
|
||||||
|
|
||||||
def do_POST(self):
|
|
||||||
if self.path.startswith("/api/"):
|
|
||||||
self._proxy("POST")
|
|
||||||
return
|
|
||||||
self.send_error(405)
|
|
||||||
|
|
||||||
def do_PUT(self):
|
|
||||||
if self.path.startswith("/api/"):
|
|
||||||
self._proxy("PUT")
|
|
||||||
return
|
|
||||||
self.send_error(405)
|
|
||||||
|
|
||||||
def do_DELETE(self):
|
|
||||||
if self.path.startswith("/api/"):
|
|
||||||
self._proxy("DELETE")
|
|
||||||
return
|
|
||||||
self.send_error(405)
|
|
||||||
|
|
||||||
def _proxy(self, method: str) -> None:
|
|
||||||
content_length = int(self.headers.get("Content-Length", 0))
|
|
||||||
body = self.rfile.read(content_length) if content_length else None
|
|
||||||
|
|
||||||
forward = {k: v for k, v in self.headers.items()
|
|
||||||
if k.lower() not in ("host", "connection")}
|
|
||||||
|
|
||||||
try:
|
|
||||||
conn = http.client.HTTPConnection("127.0.0.1", _api_port, timeout=120)
|
|
||||||
conn.request(method, self.path, body=body, headers=forward)
|
|
||||||
resp = conn.getresponse()
|
|
||||||
|
|
||||||
self.send_response(resp.status)
|
|
||||||
for key, val in resp.getheaders():
|
|
||||||
if key.lower() not in ("connection", "transfer-encoding"):
|
|
||||||
self.send_header(key, val)
|
|
||||||
self.end_headers()
|
|
||||||
|
|
||||||
# Disable socket timeout for SSE streams — they are
|
|
||||||
# long-lived by design and the 120s timeout would kill them.
|
|
||||||
content_type = resp.getheader("Content-Type", "")
|
|
||||||
if "text/event-stream" in content_type:
|
|
||||||
conn.sock.settimeout(None)
|
|
||||||
|
|
||||||
# read1() returns bytes immediately available in the buffer
|
|
||||||
# without blocking for more. Plain read(4096) waits until
|
|
||||||
# 4096 bytes accumulate — fatal for SSE where each event
|
|
||||||
# is only ~100-500 bytes.
|
|
||||||
_read = getattr(resp, "read1", resp.read)
|
|
||||||
while True:
|
|
||||||
chunk = _read(4096)
|
|
||||||
if not chunk:
|
|
||||||
break
|
|
||||||
self.wfile.write(chunk)
|
|
||||||
self.wfile.flush()
|
|
||||||
except Exception as exc:
|
|
||||||
log.warning("web proxy error %s %s: %s", method, self.path, exc)
|
|
||||||
self.send_error(502, f"API proxy error: {exc}")
|
|
||||||
finally:
|
|
||||||
try:
|
|
||||||
conn.close()
|
|
||||||
except Exception: # nosec B110 — best-effort conn cleanup
|
|
||||||
pass
|
|
||||||
|
|
||||||
def log_message(self, fmt: str, *args: object) -> None:
|
|
||||||
log.debug("web %s", fmt % args)
|
|
||||||
|
|
||||||
import os
|
|
||||||
os.chdir(dist_dir)
|
|
||||||
|
|
||||||
socketserver.TCPServer.allow_reuse_address = True
|
|
||||||
with socketserver.ThreadingTCPServer((host, web_port), SPAHTTPRequestHandler) as httpd:
|
|
||||||
console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]")
|
|
||||||
console.print(f"[dim]Proxying /api/* → http://127.0.0.1:{_api_port}[/]")
|
|
||||||
try:
|
|
||||||
httpd.serve_forever()
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
console.print("\n[dim]Shutting down dashboard server.[/]")
|
|
||||||
|
|
||||||
@app.command(name="profiler")
|
|
||||||
def profiler_cmd(
|
|
||||||
interval: int = typer.Option(30, "--interval", "-i", help="Seconds between profile rebuild cycles"),
|
|
||||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
|
||||||
) -> None:
|
|
||||||
"""Run the attacker profiler as a standalone microservice."""
|
|
||||||
import asyncio
|
|
||||||
from decnet.profiler import attacker_profile_worker
|
|
||||||
from decnet.web.dependencies import repo
|
|
||||||
|
|
||||||
if daemon:
|
|
||||||
log.info("profiler daemonizing interval=%d", interval)
|
|
||||||
_daemonize()
|
|
||||||
|
|
||||||
log.info("profiler starting interval=%d", interval)
|
|
||||||
console.print(f"[bold cyan]Profiler starting[/] (interval: {interval}s)")
|
|
||||||
|
|
||||||
async def _run() -> None:
|
|
||||||
await repo.initialize()
|
|
||||||
await attacker_profile_worker(repo, interval=interval)
|
|
||||||
|
|
||||||
try:
|
|
||||||
asyncio.run(_run())
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
console.print("\n[yellow]Profiler stopped.[/]")
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="sniffer")
|
|
||||||
def sniffer_cmd(
|
|
||||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write captured syslog + JSON records"),
|
|
||||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
|
||||||
) -> None:
|
|
||||||
"""Run the network sniffer as a standalone microservice."""
|
|
||||||
import asyncio
|
|
||||||
from decnet.sniffer import sniffer_worker
|
|
||||||
|
|
||||||
if daemon:
|
|
||||||
log.info("sniffer daemonizing log_file=%s", log_file)
|
|
||||||
_daemonize()
|
|
||||||
|
|
||||||
log.info("sniffer starting log_file=%s", log_file)
|
|
||||||
console.print(f"[bold cyan]Sniffer starting[/] → {log_file}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
asyncio.run(sniffer_worker(log_file))
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
console.print("\n[yellow]Sniffer stopped.[/]")
|
|
||||||
|
|
||||||
|
|
||||||
_DB_RESET_TABLES: tuple[str, ...] = (
|
|
||||||
# Order matters for DROP TABLE: attacker_behavior FK-references attackers.
|
|
||||||
"attacker_behavior",
|
|
||||||
"attackers",
|
|
||||||
"logs",
|
|
||||||
"bounty",
|
|
||||||
"state",
|
|
||||||
"users",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None:
|
|
||||||
"""Inspect + (optionally) wipe a MySQL database. Pulled out of the CLI
|
|
||||||
wrapper so tests can drive it without spawning a Typer runner."""
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
from sqlalchemy import text
|
|
||||||
from sqlalchemy.ext.asyncio import create_async_engine
|
|
||||||
|
|
||||||
db_name = urlparse(dsn).path.lstrip("/") or "(default)"
|
|
||||||
engine = create_async_engine(dsn)
|
|
||||||
try:
|
|
||||||
# Collect current row counts per table. Missing tables yield -1.
|
|
||||||
rows: dict[str, int] = {}
|
|
||||||
async with engine.connect() as conn:
|
|
||||||
for tbl in _DB_RESET_TABLES:
|
|
||||||
try:
|
|
||||||
result = await conn.execute(text(f"SELECT COUNT(*) FROM `{tbl}`")) # nosec B608
|
|
||||||
rows[tbl] = result.scalar() or 0
|
|
||||||
except Exception: # noqa: BLE001 — ProgrammingError for missing table varies by driver
|
|
||||||
rows[tbl] = -1
|
|
||||||
|
|
||||||
summary = Table(title=f"DECNET MySQL reset — database `{db_name}` (mode={mode})")
|
|
||||||
summary.add_column("Table", style="cyan")
|
|
||||||
summary.add_column("Rows", justify="right")
|
|
||||||
for tbl, count in rows.items():
|
|
||||||
summary.add_row(tbl, "[dim]missing[/]" if count < 0 else f"{count:,}")
|
|
||||||
console.print(summary)
|
|
||||||
|
|
||||||
if not confirm:
|
|
||||||
console.print(
|
|
||||||
"[yellow]Dry-run only. Re-run with [bold]--i-know-what-im-doing[/] "
|
|
||||||
"to actually execute.[/]"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Destructive phase. FK checks off so TRUNCATE/DROP works in any order.
|
|
||||||
async with engine.begin() as conn:
|
|
||||||
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0"))
|
|
||||||
for tbl in _DB_RESET_TABLES:
|
|
||||||
if rows.get(tbl, -1) < 0:
|
|
||||||
continue # skip absent tables silently
|
|
||||||
if mode == "truncate":
|
|
||||||
await conn.execute(text(f"TRUNCATE TABLE `{tbl}`"))
|
|
||||||
console.print(f"[green]✓ TRUNCATE {tbl}[/]")
|
|
||||||
else: # drop-tables
|
|
||||||
await conn.execute(text(f"DROP TABLE `{tbl}`"))
|
|
||||||
console.print(f"[green]✓ DROP TABLE {tbl}[/]")
|
|
||||||
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 1"))
|
|
||||||
|
|
||||||
console.print(f"[bold green]Done. Database `{db_name}` reset ({mode}).[/]")
|
|
||||||
finally:
|
|
||||||
await engine.dispose()
|
|
||||||
|
|
||||||
|
|
||||||
@app.command(name="db-reset")
|
|
||||||
def db_reset(
|
|
||||||
i_know: bool = typer.Option(
|
|
||||||
False,
|
|
||||||
"--i-know-what-im-doing",
|
|
||||||
help="Required to actually execute. Without it, the command runs in dry-run mode.",
|
|
||||||
),
|
|
||||||
mode: str = typer.Option(
|
|
||||||
"truncate",
|
|
||||||
"--mode",
|
|
||||||
help="truncate (wipe rows, keep schema) | drop-tables (DROP TABLE for each DECNET table)",
|
|
||||||
),
|
|
||||||
url: Optional[str] = typer.Option(
|
|
||||||
None,
|
|
||||||
"--url",
|
|
||||||
help="Override DECNET_DB_URL for this invocation (e.g. when cleanup needs admin creds).",
|
|
||||||
),
|
|
||||||
) -> None:
|
|
||||||
"""Wipe the MySQL database used by the DECNET dashboard.
|
|
||||||
|
|
||||||
Destructive. Runs dry by default — pass --i-know-what-im-doing to commit.
|
|
||||||
Only supported against MySQL; refuses to operate on SQLite.
|
|
||||||
"""
|
|
||||||
import asyncio
|
|
||||||
import os
|
|
||||||
|
|
||||||
if mode not in ("truncate", "drop-tables"):
|
|
||||||
console.print(f"[red]Invalid --mode '{mode}'. Expected: truncate | drop-tables.[/]")
|
|
||||||
raise typer.Exit(2)
|
|
||||||
|
|
||||||
db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
|
|
||||||
if db_type != "mysql":
|
|
||||||
console.print(
|
|
||||||
f"[red]db-reset is MySQL-only (DECNET_DB_TYPE='{db_type}'). "
|
|
||||||
f"For SQLite, just delete the decnet.db file.[/]"
|
|
||||||
)
|
|
||||||
raise typer.Exit(2)
|
|
||||||
|
|
||||||
dsn = url or os.environ.get("DECNET_DB_URL")
|
|
||||||
if not dsn:
|
|
||||||
# Fall back to component env vars (DECNET_DB_HOST/PORT/NAME/USER/PASSWORD).
|
|
||||||
from decnet.web.db.mysql.database import build_mysql_url
|
|
||||||
try:
|
|
||||||
dsn = build_mysql_url()
|
|
||||||
except ValueError as e:
|
|
||||||
console.print(f"[red]{e}[/]")
|
|
||||||
raise typer.Exit(2) from e
|
|
||||||
|
|
||||||
log.info("db-reset invoked mode=%s confirm=%s", mode, i_know)
|
|
||||||
try:
|
|
||||||
asyncio.run(_db_reset_mysql_async(dsn, mode=mode, confirm=i_know))
|
|
||||||
except Exception as e: # noqa: BLE001
|
|
||||||
console.print(f"[red]db-reset failed: {e}[/]")
|
|
||||||
raise typer.Exit(1) from e
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__': # pragma: no cover
|
|
||||||
app()
|
|
||||||
80
decnet/cli/__init__.py
Normal file
80
decnet/cli/__init__.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
"""
|
||||||
|
DECNET CLI — entry point for all commands.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
decnet deploy --mode unihost --deckies 5 --randomize-services
|
||||||
|
decnet status
|
||||||
|
decnet teardown [--all | --id decky-01]
|
||||||
|
decnet services
|
||||||
|
|
||||||
|
Layout: each command module exports ``register(app)`` which attaches its
|
||||||
|
commands to the passed Typer app. ``__init__.py`` builds the root app,
|
||||||
|
calls every module's ``register`` in order, then runs the master-only
|
||||||
|
gate. The gate must fire LAST so it sees the fully-populated dispatch
|
||||||
|
table before filtering.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import (
|
||||||
|
agent,
|
||||||
|
api,
|
||||||
|
db,
|
||||||
|
deploy,
|
||||||
|
forwarder,
|
||||||
|
inventory,
|
||||||
|
lifecycle,
|
||||||
|
listener,
|
||||||
|
profiler,
|
||||||
|
sniffer,
|
||||||
|
swarm,
|
||||||
|
swarmctl,
|
||||||
|
updater,
|
||||||
|
web,
|
||||||
|
workers,
|
||||||
|
)
|
||||||
|
from .gating import _gate_commands_by_mode
|
||||||
|
from .utils import console as console, log as log
|
||||||
|
|
||||||
|
app = typer.Typer(
|
||||||
|
name="decnet",
|
||||||
|
help="Deploy a deception network of honeypot deckies on your LAN.",
|
||||||
|
no_args_is_help=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Order matches the old flat layout so `decnet --help` reads the same.
|
||||||
|
for _mod in (
|
||||||
|
api, swarmctl, agent, updater, listener, forwarder,
|
||||||
|
swarm,
|
||||||
|
deploy, lifecycle, workers, inventory,
|
||||||
|
web, profiler, sniffer, db,
|
||||||
|
):
|
||||||
|
_mod.register(app)
|
||||||
|
|
||||||
|
_gate_commands_by_mode(app)
|
||||||
|
|
||||||
|
# Backwards-compat re-exports. Tests and third-party tooling import these
|
||||||
|
# directly from ``decnet.cli``; the refactor must keep them resolvable.
|
||||||
|
from .db import _db_reset_mysql_async # noqa: E402,F401
|
||||||
|
from .gating import ( # noqa: E402,F401
|
||||||
|
MASTER_ONLY_COMMANDS,
|
||||||
|
MASTER_ONLY_GROUPS,
|
||||||
|
_agent_mode_active,
|
||||||
|
_require_master_mode,
|
||||||
|
)
|
||||||
|
from .utils import ( # noqa: E402,F401
|
||||||
|
_daemonize,
|
||||||
|
_http_request,
|
||||||
|
_is_running,
|
||||||
|
_kill_all_services,
|
||||||
|
_pid_dir,
|
||||||
|
_service_registry,
|
||||||
|
_spawn_detached,
|
||||||
|
_swarmctl_base_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": # pragma: no cover
|
||||||
|
app()
|
||||||
64
decnet/cli/agent.py
Normal file
64
decnet/cli/agent.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pathlib as _pathlib
|
||||||
|
import sys as _sys
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def agent(
|
||||||
|
port: int = typer.Option(8765, "--port", help="Port for the worker agent"),
|
||||||
|
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the worker agent"), # nosec B104
|
||||||
|
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent, expanded under the running user's HOME — set this when running as sudo/root)"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
no_forwarder: bool = typer.Option(False, "--no-forwarder", help="Do not auto-spawn the log forwarder alongside the agent"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the DECNET SWARM worker agent (requires a cert bundle in ~/.decnet/agent/).
|
||||||
|
|
||||||
|
By default, `decnet agent` auto-spawns `decnet forwarder` as a fully-
|
||||||
|
detached sibling process so worker logs start flowing to the master
|
||||||
|
without a second manual invocation. The forwarder survives agent
|
||||||
|
restarts and crashes — if it dies on its own, restart it manually
|
||||||
|
with `decnet forwarder --daemon …`. Pass --no-forwarder to skip.
|
||||||
|
"""
|
||||||
|
from decnet.agent import server as _agent_server
|
||||||
|
from decnet.env import DECNET_SWARM_MASTER_HOST, DECNET_INGEST_LOG_FILE
|
||||||
|
from decnet.swarm import pki as _pki
|
||||||
|
|
||||||
|
resolved_dir = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("agent daemonizing host=%s port=%d", host, port)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
if not no_forwarder and DECNET_SWARM_MASTER_HOST:
|
||||||
|
fw_argv = [
|
||||||
|
_sys.executable, "-m", "decnet", "forwarder",
|
||||||
|
"--master-host", DECNET_SWARM_MASTER_HOST,
|
||||||
|
"--master-port", str(int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))),
|
||||||
|
"--agent-dir", str(resolved_dir),
|
||||||
|
"--log-file", str(DECNET_INGEST_LOG_FILE),
|
||||||
|
"--daemon",
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
pid = _utils._spawn_detached(fw_argv, _utils._pid_dir() / "forwarder.pid")
|
||||||
|
log.info("agent auto-spawned forwarder pid=%d master=%s", pid, DECNET_SWARM_MASTER_HOST)
|
||||||
|
console.print(f"[dim]Auto-spawned forwarder (pid {pid}) → {DECNET_SWARM_MASTER_HOST}.[/]")
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
log.warning("agent could not auto-spawn forwarder: %s", e)
|
||||||
|
console.print(f"[yellow]forwarder auto-spawn skipped: {e}[/]")
|
||||||
|
elif not no_forwarder:
|
||||||
|
log.info("agent skipping forwarder auto-spawn (DECNET_SWARM_MASTER_HOST unset)")
|
||||||
|
|
||||||
|
log.info("agent command invoked host=%s port=%d dir=%s", host, port, resolved_dir)
|
||||||
|
console.print(f"[green]Starting DECNET worker agent on {host}:{port} (mTLS)...[/]")
|
||||||
|
rc = _agent_server.run(host, port, agent_dir=resolved_dir)
|
||||||
|
if rc != 0:
|
||||||
|
raise typer.Exit(rc)
|
||||||
53
decnet/cli/api.py
Normal file
53
decnet/cli/api.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .gating import _require_master_mode
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def api(
|
||||||
|
port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"),
|
||||||
|
host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"),
|
||||||
|
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
workers: int = typer.Option(1, "--workers", "-w", min=1, help="Number of uvicorn worker processes"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the DECNET API and Web Dashboard in standalone mode."""
|
||||||
|
_require_master_mode("api")
|
||||||
|
if daemon:
|
||||||
|
log.info("API daemonizing host=%s port=%d workers=%d", host, port, workers)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("API command invoked host=%s port=%d workers=%d", host, port, workers)
|
||||||
|
console.print(f"[green]Starting DECNET API on {host}:{port} (workers={workers})...[/]")
|
||||||
|
_env: dict[str, str] = os.environ.copy()
|
||||||
|
_env["DECNET_INGEST_LOG_FILE"] = str(log_file)
|
||||||
|
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.api:app",
|
||||||
|
"--host", host, "--port", str(port), "--workers", str(workers)]
|
||||||
|
try:
|
||||||
|
proc = subprocess.Popen(_cmd, env=_env, start_new_session=True) # nosec B603 B404
|
||||||
|
try:
|
||||||
|
proc.wait()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
try:
|
||||||
|
os.killpg(proc.pid, signal.SIGTERM)
|
||||||
|
try:
|
||||||
|
proc.wait(timeout=10)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
os.killpg(proc.pid, signal.SIGKILL)
|
||||||
|
proc.wait()
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||||
130
decnet/cli/db.py
Normal file
130
decnet/cli/db.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
_DB_RESET_TABLES: tuple[str, ...] = (
|
||||||
|
# Order matters for DROP TABLE: child FKs first.
|
||||||
|
# - attacker_behavior FK-references attackers.
|
||||||
|
# - decky_shards FK-references swarm_hosts.
|
||||||
|
"attacker_behavior",
|
||||||
|
"attackers",
|
||||||
|
"logs",
|
||||||
|
"bounty",
|
||||||
|
"state",
|
||||||
|
"users",
|
||||||
|
"decky_shards",
|
||||||
|
"swarm_hosts",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None:
|
||||||
|
"""Inspect + (optionally) wipe a MySQL database. Pulled out of the CLI
|
||||||
|
wrapper so tests can drive it without spawning a Typer runner."""
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from sqlalchemy import text
|
||||||
|
from sqlalchemy.ext.asyncio import create_async_engine
|
||||||
|
|
||||||
|
db_name = urlparse(dsn).path.lstrip("/") or "(default)"
|
||||||
|
engine = create_async_engine(dsn)
|
||||||
|
try:
|
||||||
|
rows: dict[str, int] = {}
|
||||||
|
async with engine.connect() as conn:
|
||||||
|
for tbl in _DB_RESET_TABLES:
|
||||||
|
try:
|
||||||
|
result = await conn.execute(text(f"SELECT COUNT(*) FROM `{tbl}`")) # nosec B608
|
||||||
|
rows[tbl] = result.scalar() or 0
|
||||||
|
except Exception: # noqa: BLE001 — ProgrammingError for missing table varies by driver
|
||||||
|
rows[tbl] = -1
|
||||||
|
|
||||||
|
summary = Table(title=f"DECNET MySQL reset — database `{db_name}` (mode={mode})")
|
||||||
|
summary.add_column("Table", style="cyan")
|
||||||
|
summary.add_column("Rows", justify="right")
|
||||||
|
for tbl, count in rows.items():
|
||||||
|
summary.add_row(tbl, "[dim]missing[/]" if count < 0 else f"{count:,}")
|
||||||
|
console.print(summary)
|
||||||
|
|
||||||
|
if not confirm:
|
||||||
|
console.print(
|
||||||
|
"[yellow]Dry-run only. Re-run with [bold]--i-know-what-im-doing[/] "
|
||||||
|
"to actually execute.[/]"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
async with engine.begin() as conn:
|
||||||
|
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0"))
|
||||||
|
for tbl in _DB_RESET_TABLES:
|
||||||
|
if rows.get(tbl, -1) < 0:
|
||||||
|
continue
|
||||||
|
if mode == "truncate":
|
||||||
|
await conn.execute(text(f"TRUNCATE TABLE `{tbl}`"))
|
||||||
|
console.print(f"[green]✓ TRUNCATE {tbl}[/]")
|
||||||
|
else:
|
||||||
|
await conn.execute(text(f"DROP TABLE `{tbl}`"))
|
||||||
|
console.print(f"[green]✓ DROP TABLE {tbl}[/]")
|
||||||
|
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 1"))
|
||||||
|
|
||||||
|
console.print(f"[bold green]Done. Database `{db_name}` reset ({mode}).[/]")
|
||||||
|
finally:
|
||||||
|
await engine.dispose()
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command(name="db-reset")
|
||||||
|
def db_reset(
|
||||||
|
i_know: bool = typer.Option(
|
||||||
|
False,
|
||||||
|
"--i-know-what-im-doing",
|
||||||
|
help="Required to actually execute. Without it, the command runs in dry-run mode.",
|
||||||
|
),
|
||||||
|
mode: str = typer.Option(
|
||||||
|
"truncate",
|
||||||
|
"--mode",
|
||||||
|
help="truncate (wipe rows, keep schema) | drop-tables (DROP TABLE for each DECNET table)",
|
||||||
|
),
|
||||||
|
url: Optional[str] = typer.Option(
|
||||||
|
None,
|
||||||
|
"--url",
|
||||||
|
help="Override DECNET_DB_URL for this invocation (e.g. when cleanup needs admin creds).",
|
||||||
|
),
|
||||||
|
) -> None:
|
||||||
|
"""Wipe the MySQL database used by the DECNET dashboard.
|
||||||
|
|
||||||
|
Destructive. Runs dry by default — pass --i-know-what-im-doing to commit.
|
||||||
|
Only supported against MySQL; refuses to operate on SQLite.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
|
||||||
|
if mode not in ("truncate", "drop-tables"):
|
||||||
|
console.print(f"[red]Invalid --mode '{mode}'. Expected: truncate | drop-tables.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
|
||||||
|
if db_type != "mysql":
|
||||||
|
console.print(
|
||||||
|
f"[red]db-reset is MySQL-only (DECNET_DB_TYPE='{db_type}'). "
|
||||||
|
f"For SQLite, just delete the decnet.db file.[/]"
|
||||||
|
)
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
dsn = url or os.environ.get("DECNET_DB_URL")
|
||||||
|
if not dsn:
|
||||||
|
from decnet.web.db.mysql.database import build_mysql_url
|
||||||
|
try:
|
||||||
|
dsn = build_mysql_url()
|
||||||
|
except ValueError as e:
|
||||||
|
console.print(f"[red]{e}[/]")
|
||||||
|
raise typer.Exit(2) from e
|
||||||
|
|
||||||
|
log.info("db-reset invoked mode=%s confirm=%s", mode, i_know)
|
||||||
|
try:
|
||||||
|
asyncio.run(_db_reset_mysql_async(dsn, mode=mode, confirm=i_know))
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
console.print(f"[red]db-reset failed: {e}[/]")
|
||||||
|
raise typer.Exit(1) from e
|
||||||
307
decnet/cli/deploy.py
Normal file
307
decnet/cli/deploy.py
Normal file
@@ -0,0 +1,307 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from decnet.archetypes import Archetype, get_archetype
|
||||||
|
from decnet.config import DecnetConfig
|
||||||
|
from decnet.distros import get_distro
|
||||||
|
from decnet.env import DECNET_API_HOST, DECNET_INGEST_LOG_FILE
|
||||||
|
from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini
|
||||||
|
from decnet.ini_loader import load_ini
|
||||||
|
from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .gating import _require_master_mode
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def _deploy_swarm(config: "DecnetConfig", *, dry_run: bool, no_cache: bool) -> None:
|
||||||
|
"""Shard deckies round-robin across enrolled workers and POST to swarmctl."""
|
||||||
|
base = _utils._swarmctl_base_url(None)
|
||||||
|
resp = _utils._http_request("GET", base + "/swarm/hosts?host_status=enrolled")
|
||||||
|
enrolled = resp.json()
|
||||||
|
resp2 = _utils._http_request("GET", base + "/swarm/hosts?host_status=active")
|
||||||
|
active = resp2.json()
|
||||||
|
workers = [*enrolled, *active]
|
||||||
|
if not workers:
|
||||||
|
console.print("[red]No enrolled workers — run `decnet swarm enroll ...` first.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
assigned: list = []
|
||||||
|
for idx, d in enumerate(config.deckies):
|
||||||
|
target = workers[idx % len(workers)]
|
||||||
|
assigned.append(d.model_copy(update={"host_uuid": target["uuid"]}))
|
||||||
|
config = config.model_copy(update={"deckies": assigned})
|
||||||
|
|
||||||
|
body = {"config": config.model_dump(mode="json"), "dry_run": dry_run, "no_cache": no_cache}
|
||||||
|
console.print(f"[cyan]Dispatching {len(config.deckies)} deckies across {len(workers)} worker(s)...[/]")
|
||||||
|
resp3 = _utils._http_request("POST", base + "/swarm/deploy", json_body=body, timeout=900.0)
|
||||||
|
results = resp3.json().get("results", [])
|
||||||
|
|
||||||
|
table = Table(title="SWARM deploy results")
|
||||||
|
for col in ("worker", "host_uuid", "ok", "detail"):
|
||||||
|
table.add_column(col)
|
||||||
|
any_failed = False
|
||||||
|
for r in results:
|
||||||
|
ok = bool(r.get("ok"))
|
||||||
|
if not ok:
|
||||||
|
any_failed = True
|
||||||
|
detail = r.get("detail")
|
||||||
|
if isinstance(detail, dict):
|
||||||
|
detail = detail.get("status") or "ok"
|
||||||
|
table.add_row(
|
||||||
|
str(r.get("host_name") or ""),
|
||||||
|
str(r.get("host_uuid") or ""),
|
||||||
|
"[green]yes[/]" if ok else "[red]no[/]",
|
||||||
|
str(detail)[:80],
|
||||||
|
)
|
||||||
|
console.print(table)
|
||||||
|
if any_failed:
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def deploy(
|
||||||
|
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
|
||||||
|
deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
|
||||||
|
interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
|
||||||
|
subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
|
||||||
|
ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
|
||||||
|
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
|
||||||
|
randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
|
||||||
|
distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
|
||||||
|
randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
|
||||||
|
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
|
||||||
|
archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
|
||||||
|
mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
|
||||||
|
dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
|
||||||
|
no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
|
||||||
|
parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"),
|
||||||
|
ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
|
||||||
|
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
|
||||||
|
api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"),
|
||||||
|
api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Deploy deckies to the LAN."""
|
||||||
|
import os
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import sys
|
||||||
|
from pathlib import Path as _Path
|
||||||
|
|
||||||
|
_require_master_mode("deploy")
|
||||||
|
if daemon:
|
||||||
|
log.info("deploy daemonizing mode=%s deckies=%s", mode, deckies)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("deploy command invoked mode=%s deckies=%s dry_run=%s", mode, deckies, dry_run)
|
||||||
|
if mode not in ("unihost", "swarm"):
|
||||||
|
console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
if config_file:
|
||||||
|
try:
|
||||||
|
ini = load_ini(config_file)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
console.print(f"[red]{e}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
iface = interface or ini.interface or detect_interface()
|
||||||
|
subnet_cidr = subnet or ini.subnet
|
||||||
|
effective_gateway = ini.gateway
|
||||||
|
if subnet_cidr is None:
|
||||||
|
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||||
|
elif effective_gateway is None:
|
||||||
|
_, effective_gateway = detect_subnet(iface)
|
||||||
|
|
||||||
|
host_ip = get_host_ip(iface)
|
||||||
|
console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} "
|
||||||
|
f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} "
|
||||||
|
f"[dim]Host IP:[/] {host_ip}")
|
||||||
|
|
||||||
|
if ini.custom_services:
|
||||||
|
from decnet.custom_service import CustomService
|
||||||
|
from decnet.services.registry import register_custom_service
|
||||||
|
for cs in ini.custom_services:
|
||||||
|
register_custom_service(
|
||||||
|
CustomService(
|
||||||
|
name=cs.name,
|
||||||
|
image=cs.image,
|
||||||
|
exec_cmd=cs.exec_cmd,
|
||||||
|
ports=cs.ports,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
effective_log_file = log_file
|
||||||
|
try:
|
||||||
|
decky_configs = build_deckies_from_ini(
|
||||||
|
ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval
|
||||||
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
console.print(f"[red]{e}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
else:
|
||||||
|
if deckies is None:
|
||||||
|
console.print("[red]--deckies is required when --config is not used.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
services_list = [s.strip() for s in services.split(",")] if services else None
|
||||||
|
if services_list:
|
||||||
|
known = set(all_service_names())
|
||||||
|
unknown = [s for s in services_list if s not in known]
|
||||||
|
if unknown:
|
||||||
|
console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
arch: Archetype | None = None
|
||||||
|
if archetype_name:
|
||||||
|
try:
|
||||||
|
arch = get_archetype(archetype_name)
|
||||||
|
except ValueError as e:
|
||||||
|
console.print(f"[red]{e}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
if not services_list and not randomize_services and not arch:
|
||||||
|
console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
iface = interface or detect_interface()
|
||||||
|
if subnet is None:
|
||||||
|
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||||
|
else:
|
||||||
|
subnet_cidr = subnet
|
||||||
|
_, effective_gateway = detect_subnet(iface)
|
||||||
|
|
||||||
|
host_ip = get_host_ip(iface)
|
||||||
|
console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} "
|
||||||
|
f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}")
|
||||||
|
|
||||||
|
distros_list = [d.strip() for d in distro.split(",")] if distro else None
|
||||||
|
if distros_list:
|
||||||
|
try:
|
||||||
|
for slug in distros_list:
|
||||||
|
get_distro(slug)
|
||||||
|
except ValueError as e:
|
||||||
|
console.print(f"[red]{e}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
|
||||||
|
decky_configs = build_deckies(
|
||||||
|
deckies, ips, services_list, randomize_services,
|
||||||
|
distros_explicit=distros_list, randomize_distros=randomize_distros,
|
||||||
|
archetype=arch, mutate_interval=mutate_interval,
|
||||||
|
)
|
||||||
|
effective_log_file = log_file
|
||||||
|
|
||||||
|
if api and not effective_log_file:
|
||||||
|
effective_log_file = os.path.join(os.getcwd(), "decnet.log")
|
||||||
|
console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]")
|
||||||
|
|
||||||
|
config = DecnetConfig(
|
||||||
|
mode=mode,
|
||||||
|
interface=iface,
|
||||||
|
subnet=subnet_cidr,
|
||||||
|
gateway=effective_gateway,
|
||||||
|
deckies=decky_configs,
|
||||||
|
log_file=effective_log_file,
|
||||||
|
ipvlan=ipvlan,
|
||||||
|
mutate_interval=mutate_interval,
|
||||||
|
)
|
||||||
|
|
||||||
|
log.debug("deploy: config built deckies=%d interface=%s subnet=%s", len(config.deckies), config.interface, config.subnet)
|
||||||
|
|
||||||
|
if mode == "swarm":
|
||||||
|
_deploy_swarm(config, dry_run=dry_run, no_cache=no_cache)
|
||||||
|
if dry_run:
|
||||||
|
log.info("deploy: swarm dry-run complete, no workers dispatched")
|
||||||
|
else:
|
||||||
|
log.info("deploy: swarm deployment complete deckies=%d", len(config.deckies))
|
||||||
|
return
|
||||||
|
|
||||||
|
from decnet.engine import deploy as _deploy
|
||||||
|
_deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel)
|
||||||
|
if dry_run:
|
||||||
|
log.info("deploy: dry-run complete, no containers started")
|
||||||
|
else:
|
||||||
|
log.info("deploy: deployment complete deckies=%d", len(config.deckies))
|
||||||
|
|
||||||
|
if mutate_interval is not None and not dry_run:
|
||||||
|
console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]")
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "decnet.cli", "mutate", "--watch"],
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start mutator watcher.[/]")
|
||||||
|
|
||||||
|
if effective_log_file and not dry_run and not api:
|
||||||
|
_collector_err = _Path(effective_log_file).with_suffix(".collector.log")
|
||||||
|
console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=open(_collector_err, "a"),
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if api and not dry_run:
|
||||||
|
console.print(f"[green]Starting DECNET API on port {api_port}...[/]")
|
||||||
|
_env: dict[str, str] = os.environ.copy()
|
||||||
|
_env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "")
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)],
|
||||||
|
env=_env,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT
|
||||||
|
)
|
||||||
|
console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]")
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||||
|
|
||||||
|
if effective_log_file and not dry_run:
|
||||||
|
console.print("[bold cyan]Starting DECNET-PROBER[/] (auto-discovers attackers from log stream)")
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "decnet.cli", "probe", "--daemon", "--log-file", str(effective_log_file)],
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start DECNET-PROBER.[/]")
|
||||||
|
|
||||||
|
if effective_log_file and not dry_run:
|
||||||
|
console.print("[bold cyan]Starting DECNET-PROFILER[/] (builds attacker profiles from log stream)")
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "decnet.cli", "profiler", "--daemon"],
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start DECNET-PROFILER.[/]")
|
||||||
|
|
||||||
|
if effective_log_file and not dry_run:
|
||||||
|
console.print("[bold cyan]Starting DECNET-SNIFFER[/] (passive network capture)")
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
[sys.executable, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", str(effective_log_file)],
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start DECNET-SNIFFER.[/]")
|
||||||
74
decnet/cli/forwarder.py
Normal file
74
decnet/cli/forwarder.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import pathlib
|
||||||
|
import signal
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def forwarder(
|
||||||
|
master_host: Optional[str] = typer.Option(None, "--master-host", help="Master listener hostname/IP (default: $DECNET_SWARM_MASTER_HOST)"),
|
||||||
|
master_port: int = typer.Option(6514, "--master-port", help="Master listener TCP port (RFC 5425 default 6514)"),
|
||||||
|
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Local RFC 5424 file to tail and forward"),
|
||||||
|
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent)"),
|
||||||
|
state_db: Optional[str] = typer.Option(None, "--state-db", help="Forwarder offset SQLite path (default: <agent_dir>/forwarder.db)"),
|
||||||
|
poll_interval: float = typer.Option(0.5, "--poll-interval", help="Seconds between log file stat checks"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the worker-side syslog-over-TLS forwarder (RFC 5425, mTLS to master:6514)."""
|
||||||
|
from decnet.env import DECNET_SWARM_MASTER_HOST
|
||||||
|
from decnet.swarm import pki
|
||||||
|
from decnet.swarm.log_forwarder import ForwarderConfig, run_forwarder
|
||||||
|
|
||||||
|
resolved_host = master_host or DECNET_SWARM_MASTER_HOST
|
||||||
|
if not resolved_host:
|
||||||
|
console.print("[red]--master-host is required (or set DECNET_SWARM_MASTER_HOST).[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
resolved_agent_dir = pathlib.Path(agent_dir) if agent_dir else pki.DEFAULT_AGENT_DIR
|
||||||
|
if not (resolved_agent_dir / "worker.crt").exists():
|
||||||
|
console.print(f"[red]No worker cert bundle at {resolved_agent_dir} — enroll from the master first.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
if not log_file:
|
||||||
|
console.print("[red]--log-file is required.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
cfg = ForwarderConfig(
|
||||||
|
log_path=pathlib.Path(log_file),
|
||||||
|
master_host=resolved_host,
|
||||||
|
master_port=master_port,
|
||||||
|
agent_dir=resolved_agent_dir,
|
||||||
|
state_db=pathlib.Path(state_db) if state_db else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("forwarder daemonizing master=%s:%d log=%s", resolved_host, master_port, log_file)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("forwarder command invoked master=%s:%d log=%s", resolved_host, master_port, log_file)
|
||||||
|
console.print(f"[green]Starting DECNET forwarder → {resolved_host}:{master_port} (mTLS)...[/]")
|
||||||
|
|
||||||
|
async def _main() -> None:
|
||||||
|
stop = asyncio.Event()
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||||
|
try:
|
||||||
|
loop.add_signal_handler(sig, stop.set)
|
||||||
|
except (NotImplementedError, RuntimeError): # pragma: no cover
|
||||||
|
pass
|
||||||
|
await run_forwarder(cfg, poll_interval=poll_interval, stop_event=stop)
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(_main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
71
decnet/cli/gating.py
Normal file
71
decnet/cli/gating.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
"""Role-based CLI gating.
|
||||||
|
|
||||||
|
MAINTAINERS: when you add a new Typer command (or add_typer group) that is
|
||||||
|
master-only, register its name in MASTER_ONLY_COMMANDS / MASTER_ONLY_GROUPS
|
||||||
|
below. The gate is the only thing that:
|
||||||
|
(a) hides the command from `decnet --help` on worker hosts, and
|
||||||
|
(b) prevents a misconfigured worker from invoking master-side logic.
|
||||||
|
Forgetting to register a new command is a role-boundary bug. Grep for
|
||||||
|
MASTER_ONLY when touching command registration.
|
||||||
|
|
||||||
|
Worker-legitimate commands (NOT in these sets): agent, updater, forwarder,
|
||||||
|
status, collect, probe, sniffer. Agents run deckies locally and should be
|
||||||
|
able to inspect them + run the per-host microservices (collector streams
|
||||||
|
container logs, prober characterizes attackers hitting this host, sniffer
|
||||||
|
captures traffic). Mutator and Profiler stay master-only: the mutator
|
||||||
|
orchestrates respawns across the swarm; the profiler rebuilds attacker
|
||||||
|
profiles against the master DB (no per-host DB exists).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from .utils import console
|
||||||
|
|
||||||
|
MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
|
||||||
|
"api", "swarmctl", "deploy", "redeploy", "teardown",
|
||||||
|
"mutate", "listener", "profiler",
|
||||||
|
"services", "distros", "correlate", "archetypes", "web",
|
||||||
|
"db-reset",
|
||||||
|
})
|
||||||
|
MASTER_ONLY_GROUPS: frozenset[str] = frozenset({"swarm"})
|
||||||
|
|
||||||
|
|
||||||
|
def _agent_mode_active() -> bool:
|
||||||
|
"""True when the host is configured as an agent AND master commands are
|
||||||
|
disallowed (the default for agents). Workers overriding this explicitly
|
||||||
|
set DECNET_DISALLOW_MASTER=false to opt into hybrid use."""
|
||||||
|
mode = os.environ.get("DECNET_MODE", "master").lower()
|
||||||
|
disallow = os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true"
|
||||||
|
return mode == "agent" and disallow
|
||||||
|
|
||||||
|
|
||||||
|
def _require_master_mode(command_name: str) -> None:
|
||||||
|
"""Defence-in-depth: called at the top of every master-only command body.
|
||||||
|
|
||||||
|
The registration-time gate in _gate_commands_by_mode() already hides
|
||||||
|
these commands from Typer's dispatch table, but this check protects
|
||||||
|
against direct function imports (e.g. from tests or third-party tools)
|
||||||
|
that would bypass Typer entirely."""
|
||||||
|
if _agent_mode_active():
|
||||||
|
console.print(
|
||||||
|
f"[red]`decnet {command_name}` is a master-only command; this host "
|
||||||
|
f"is configured as an agent (DECNET_MODE=agent).[/]"
|
||||||
|
)
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def _gate_commands_by_mode(_app: typer.Typer) -> None:
|
||||||
|
if not _agent_mode_active():
|
||||||
|
return
|
||||||
|
_app.registered_commands = [
|
||||||
|
c for c in _app.registered_commands
|
||||||
|
if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS
|
||||||
|
]
|
||||||
|
_app.registered_groups = [
|
||||||
|
g for g in _app.registered_groups
|
||||||
|
if g.name not in MASTER_ONLY_GROUPS
|
||||||
|
]
|
||||||
52
decnet/cli/inventory.py
Normal file
52
decnet/cli/inventory.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from decnet.archetypes import all_archetypes
|
||||||
|
from decnet.distros import all_distros
|
||||||
|
from decnet.services.registry import all_services
|
||||||
|
|
||||||
|
from .utils import console
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command(name="services")
|
||||||
|
def list_services() -> None:
|
||||||
|
"""List all registered honeypot service plugins."""
|
||||||
|
svcs = all_services()
|
||||||
|
table = Table(title="Available Services", show_lines=True)
|
||||||
|
table.add_column("Name", style="bold cyan")
|
||||||
|
table.add_column("Ports")
|
||||||
|
table.add_column("Image")
|
||||||
|
for name, svc in sorted(svcs.items()):
|
||||||
|
table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
@app.command(name="distros")
|
||||||
|
def list_distros() -> None:
|
||||||
|
"""List all available OS distro profiles for deckies."""
|
||||||
|
table = Table(title="Available Distro Profiles", show_lines=True)
|
||||||
|
table.add_column("Slug", style="bold cyan")
|
||||||
|
table.add_column("Display Name")
|
||||||
|
table.add_column("Docker Image", style="dim")
|
||||||
|
for slug, profile in sorted(all_distros().items()):
|
||||||
|
table.add_row(slug, profile.display_name, profile.image)
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
@app.command(name="archetypes")
|
||||||
|
def list_archetypes() -> None:
|
||||||
|
"""List all machine archetype profiles."""
|
||||||
|
table = Table(title="Machine Archetypes", show_lines=True)
|
||||||
|
table.add_column("Slug", style="bold cyan")
|
||||||
|
table.add_column("Display Name")
|
||||||
|
table.add_column("Default Services", style="green")
|
||||||
|
table.add_column("Description", style="dim")
|
||||||
|
for slug, arch in sorted(all_archetypes().items()):
|
||||||
|
table.add_row(
|
||||||
|
slug,
|
||||||
|
arch.display_name,
|
||||||
|
", ".join(arch.services),
|
||||||
|
arch.description,
|
||||||
|
)
|
||||||
|
console.print(table)
|
||||||
97
decnet/cli/lifecycle.py
Normal file
97
decnet/cli/lifecycle.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import subprocess # nosec B404
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .gating import _agent_mode_active, _require_master_mode
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def redeploy(
|
||||||
|
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to the DECNET log file"),
|
||||||
|
) -> None:
|
||||||
|
"""Check running DECNET services and relaunch any that are down."""
|
||||||
|
log.info("redeploy: checking services")
|
||||||
|
registry = _utils._service_registry(str(log_file))
|
||||||
|
|
||||||
|
table = Table(title="DECNET Services", show_lines=True)
|
||||||
|
table.add_column("Service", style="bold cyan")
|
||||||
|
table.add_column("Status")
|
||||||
|
table.add_column("PID", style="dim")
|
||||||
|
table.add_column("Action")
|
||||||
|
|
||||||
|
relaunched = 0
|
||||||
|
for name, match_fn, launch_args in registry:
|
||||||
|
pid = _utils._is_running(match_fn)
|
||||||
|
if pid is not None:
|
||||||
|
table.add_row(name, "[green]UP[/]", str(pid), "—")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
subprocess.Popen( # nosec B603
|
||||||
|
launch_args,
|
||||||
|
stdin=subprocess.DEVNULL,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
start_new_session=True,
|
||||||
|
)
|
||||||
|
table.add_row(name, "[red]DOWN[/]", "—", "[green]relaunched[/]")
|
||||||
|
relaunched += 1
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError) as exc:
|
||||||
|
table.add_row(name, "[red]DOWN[/]", "—", f"[red]failed: {exc}[/]")
|
||||||
|
|
||||||
|
console.print(table)
|
||||||
|
if relaunched:
|
||||||
|
console.print(f"[green]{relaunched} service(s) relaunched.[/]")
|
||||||
|
else:
|
||||||
|
console.print("[green]All services running.[/]")
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def status() -> None:
|
||||||
|
"""Show running deckies and their status."""
|
||||||
|
log.info("status command invoked")
|
||||||
|
from decnet.engine import status as _status
|
||||||
|
_status()
|
||||||
|
|
||||||
|
registry = _utils._service_registry(str(DECNET_INGEST_LOG_FILE))
|
||||||
|
if _agent_mode_active():
|
||||||
|
registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}]
|
||||||
|
svc_table = Table(title="DECNET Services", show_lines=True)
|
||||||
|
svc_table.add_column("Service", style="bold cyan")
|
||||||
|
svc_table.add_column("Status")
|
||||||
|
svc_table.add_column("PID", style="dim")
|
||||||
|
|
||||||
|
for name, match_fn, _launch_args in registry:
|
||||||
|
pid = _utils._is_running(match_fn)
|
||||||
|
if pid is not None:
|
||||||
|
svc_table.add_row(name, "[green]UP[/]", str(pid))
|
||||||
|
else:
|
||||||
|
svc_table.add_row(name, "[red]DOWN[/]", "—")
|
||||||
|
|
||||||
|
console.print(svc_table)
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def teardown(
|
||||||
|
all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
|
||||||
|
id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
|
||||||
|
) -> None:
|
||||||
|
"""Stop and remove deckies."""
|
||||||
|
_require_master_mode("teardown")
|
||||||
|
if not all_ and not id_:
|
||||||
|
console.print("[red]Specify --all or --id <name>.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
log.info("teardown command invoked all=%s id=%s", all_, id_)
|
||||||
|
from decnet.engine import teardown as _teardown
|
||||||
|
_teardown(decky_id=id_)
|
||||||
|
log.info("teardown complete all=%s id=%s", all_, id_)
|
||||||
|
|
||||||
|
if all_:
|
||||||
|
_utils._kill_all_services()
|
||||||
57
decnet/cli/listener.py
Normal file
57
decnet/cli/listener.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import pathlib
|
||||||
|
import signal
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def listener(
|
||||||
|
bind_host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the master syslog-TLS listener"), # nosec B104
|
||||||
|
bind_port: int = typer.Option(6514, "--port", help="Listener TCP port (RFC 5425 default 6514)"),
|
||||||
|
log_path: Optional[str] = typer.Option(None, "--log-path", help="RFC 5424 forensic sink (default: ./master.log)"),
|
||||||
|
json_path: Optional[str] = typer.Option(None, "--json-path", help="Parsed-JSON ingest sink (default: ./master.json)"),
|
||||||
|
ca_dir: Optional[str] = typer.Option(None, "--ca-dir", help="DECNET CA dir (default: ~/.decnet/ca)"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the master-side syslog-over-TLS listener (RFC 5425, mTLS)."""
|
||||||
|
from decnet.swarm import pki
|
||||||
|
from decnet.swarm.log_listener import ListenerConfig, run_listener
|
||||||
|
|
||||||
|
resolved_ca_dir = pathlib.Path(ca_dir) if ca_dir else pki.DEFAULT_CA_DIR
|
||||||
|
resolved_log = pathlib.Path(log_path) if log_path else pathlib.Path("master.log")
|
||||||
|
resolved_json = pathlib.Path(json_path) if json_path else pathlib.Path("master.json")
|
||||||
|
|
||||||
|
cfg = ListenerConfig(
|
||||||
|
log_path=resolved_log, json_path=resolved_json,
|
||||||
|
bind_host=bind_host, bind_port=bind_port, ca_dir=resolved_ca_dir,
|
||||||
|
)
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("listener daemonizing host=%s port=%d", bind_host, bind_port)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("listener command invoked host=%s port=%d", bind_host, bind_port)
|
||||||
|
console.print(f"[green]Starting DECNET log listener on {bind_host}:{bind_port} (mTLS)...[/]")
|
||||||
|
|
||||||
|
async def _main() -> None:
|
||||||
|
stop = asyncio.Event()
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||||
|
try:
|
||||||
|
loop.add_signal_handler(sig, stop.set)
|
||||||
|
except (NotImplementedError, RuntimeError): # pragma: no cover
|
||||||
|
pass
|
||||||
|
await run_listener(cfg, stop_event=stop)
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(_main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
34
decnet/cli/profiler.py
Normal file
34
decnet/cli/profiler.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command(name="profiler")
|
||||||
|
def profiler_cmd(
|
||||||
|
interval: int = typer.Option(30, "--interval", "-i", help="Seconds between profile rebuild cycles"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the attacker profiler as a standalone microservice."""
|
||||||
|
import asyncio
|
||||||
|
from decnet.profiler import attacker_profile_worker
|
||||||
|
from decnet.web.dependencies import repo
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("profiler daemonizing interval=%d", interval)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("profiler starting interval=%d", interval)
|
||||||
|
console.print(f"[bold cyan]Profiler starting[/] (interval: {interval}s)")
|
||||||
|
|
||||||
|
async def _run() -> None:
|
||||||
|
await repo.initialize()
|
||||||
|
await attacker_profile_worker(repo, interval=interval)
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(_run())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[yellow]Profiler stopped.[/]")
|
||||||
31
decnet/cli/sniffer.py
Normal file
31
decnet/cli/sniffer.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command(name="sniffer")
|
||||||
|
def sniffer_cmd(
|
||||||
|
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write captured syslog + JSON records"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the network sniffer as a standalone microservice."""
|
||||||
|
import asyncio
|
||||||
|
from decnet.sniffer import sniffer_worker
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("sniffer daemonizing log_file=%s", log_file)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("sniffer starting log_file=%s", log_file)
|
||||||
|
console.print(f"[bold cyan]Sniffer starting[/] → {log_file}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(sniffer_worker(log_file))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[yellow]Sniffer stopped.[/]")
|
||||||
346
decnet/cli/swarm.py
Normal file
346
decnet/cli/swarm.py
Normal file
@@ -0,0 +1,346 @@
|
|||||||
|
"""`decnet swarm ...` — master-side operator commands (HTTP to local swarmctl)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
swarm_app = typer.Typer(
|
||||||
|
name="swarm",
|
||||||
|
help="Manage swarm workers (enroll, list, decommission). Requires `decnet swarmctl` running.",
|
||||||
|
no_args_is_help=True,
|
||||||
|
)
|
||||||
|
app.add_typer(swarm_app, name="swarm")
|
||||||
|
|
||||||
|
@swarm_app.command("enroll")
|
||||||
|
def swarm_enroll(
|
||||||
|
name: str = typer.Option(..., "--name", help="Short hostname for the worker (also the cert CN)"),
|
||||||
|
address: str = typer.Option(..., "--address", help="IP or DNS the master uses to reach the worker"),
|
||||||
|
agent_port: int = typer.Option(8765, "--agent-port", help="Worker agent TCP port"),
|
||||||
|
sans: Optional[str] = typer.Option(None, "--sans", help="Comma-separated extra SANs for the worker cert"),
|
||||||
|
notes: Optional[str] = typer.Option(None, "--notes", help="Free-form operator notes"),
|
||||||
|
out_dir: Optional[str] = typer.Option(None, "--out-dir", help="Write the bundle (ca.crt/worker.crt/worker.key) to this dir for scp"),
|
||||||
|
updater: bool = typer.Option(False, "--updater", help="Also issue an updater-identity cert (CN=updater@<name>) for the remote self-updater"),
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL (default: 127.0.0.1:8770)"),
|
||||||
|
) -> None:
|
||||||
|
"""Issue a mTLS bundle for a new worker and register it in the swarm."""
|
||||||
|
import pathlib as _pathlib
|
||||||
|
|
||||||
|
body: dict = {"name": name, "address": address, "agent_port": agent_port}
|
||||||
|
if sans:
|
||||||
|
body["sans"] = [s.strip() for s in sans.split(",") if s.strip()]
|
||||||
|
if notes:
|
||||||
|
body["notes"] = notes
|
||||||
|
if updater:
|
||||||
|
body["issue_updater_bundle"] = True
|
||||||
|
|
||||||
|
resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/enroll", json_body=body)
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
console.print(f"[green]Enrolled worker:[/] {data['name']} "
|
||||||
|
f"[dim]uuid=[/]{data['host_uuid']} "
|
||||||
|
f"[dim]fingerprint=[/]{data['fingerprint']}")
|
||||||
|
if data.get("updater"):
|
||||||
|
console.print(f"[green] + updater identity[/] "
|
||||||
|
f"[dim]fingerprint=[/]{data['updater']['fingerprint']}")
|
||||||
|
|
||||||
|
if out_dir:
|
||||||
|
target = _pathlib.Path(out_dir).expanduser()
|
||||||
|
target.mkdir(parents=True, exist_ok=True)
|
||||||
|
(target / "ca.crt").write_text(data["ca_cert_pem"])
|
||||||
|
(target / "worker.crt").write_text(data["worker_cert_pem"])
|
||||||
|
(target / "worker.key").write_text(data["worker_key_pem"])
|
||||||
|
for leaf in ("worker.key",):
|
||||||
|
try:
|
||||||
|
(target / leaf).chmod(0o600)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
console.print(f"[cyan]Agent bundle written to[/] {target}")
|
||||||
|
|
||||||
|
if data.get("updater"):
|
||||||
|
upd_target = target.parent / f"{target.name}-updater"
|
||||||
|
upd_target.mkdir(parents=True, exist_ok=True)
|
||||||
|
(upd_target / "ca.crt").write_text(data["ca_cert_pem"])
|
||||||
|
(upd_target / "updater.crt").write_text(data["updater"]["updater_cert_pem"])
|
||||||
|
(upd_target / "updater.key").write_text(data["updater"]["updater_key_pem"])
|
||||||
|
try:
|
||||||
|
(upd_target / "updater.key").chmod(0o600)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
console.print(f"[cyan]Updater bundle written to[/] {upd_target}")
|
||||||
|
console.print("[dim]Ship the agent dir to ~/.decnet/agent/ and the updater dir to ~/.decnet/updater/ on the worker.[/]")
|
||||||
|
else:
|
||||||
|
console.print("[dim]Ship this directory to the worker at ~/.decnet/agent/ (or wherever `decnet agent --agent-dir` points).[/]")
|
||||||
|
else:
|
||||||
|
console.print("[yellow]No --out-dir given — bundle PEMs are in the JSON response; persist them before leaving this shell.[/]")
|
||||||
|
|
||||||
|
@swarm_app.command("list")
|
||||||
|
def swarm_list(
|
||||||
|
host_status: Optional[str] = typer.Option(None, "--status", help="Filter by status (enrolled|active|unreachable|decommissioned)"),
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||||
|
) -> None:
|
||||||
|
"""List enrolled workers."""
|
||||||
|
q = f"?host_status={host_status}" if host_status else ""
|
||||||
|
resp = _utils._http_request("GET", _utils._swarmctl_base_url(url) + "/swarm/hosts" + q)
|
||||||
|
rows = resp.json()
|
||||||
|
if not rows:
|
||||||
|
console.print("[dim]No workers enrolled.[/]")
|
||||||
|
return
|
||||||
|
table = Table(title="DECNET swarm workers")
|
||||||
|
for col in ("name", "address", "port", "status", "last heartbeat", "enrolled"):
|
||||||
|
table.add_column(col)
|
||||||
|
for r in rows:
|
||||||
|
table.add_row(
|
||||||
|
r.get("name") or "",
|
||||||
|
r.get("address") or "",
|
||||||
|
str(r.get("agent_port") or ""),
|
||||||
|
r.get("status") or "",
|
||||||
|
str(r.get("last_heartbeat") or "—"),
|
||||||
|
str(r.get("enrolled_at") or "—"),
|
||||||
|
)
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
@swarm_app.command("check")
|
||||||
|
def swarm_check(
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||||
|
json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
|
||||||
|
) -> None:
|
||||||
|
"""Actively probe every enrolled worker and refresh status + last_heartbeat."""
|
||||||
|
resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/check", timeout=60.0)
|
||||||
|
payload = resp.json()
|
||||||
|
results = payload.get("results", [])
|
||||||
|
|
||||||
|
if json_out:
|
||||||
|
console.print_json(data=payload)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
console.print("[dim]No workers enrolled.[/]")
|
||||||
|
return
|
||||||
|
|
||||||
|
table = Table(title="DECNET swarm check")
|
||||||
|
for col in ("name", "address", "reachable", "detail"):
|
||||||
|
table.add_column(col)
|
||||||
|
for r in results:
|
||||||
|
reachable = r.get("reachable")
|
||||||
|
mark = "[green]yes[/]" if reachable else "[red]no[/]"
|
||||||
|
detail = r.get("detail")
|
||||||
|
detail_str = "—"
|
||||||
|
if isinstance(detail, dict):
|
||||||
|
detail_str = detail.get("status") or ", ".join(f"{k}={v}" for k, v in detail.items())
|
||||||
|
elif detail is not None:
|
||||||
|
detail_str = str(detail)
|
||||||
|
table.add_row(
|
||||||
|
r.get("name") or "",
|
||||||
|
r.get("address") or "",
|
||||||
|
mark,
|
||||||
|
detail_str,
|
||||||
|
)
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
@swarm_app.command("update")
|
||||||
|
def swarm_update(
|
||||||
|
host: Optional[str] = typer.Option(None, "--host", help="Target worker (name or UUID). Omit with --all."),
|
||||||
|
all_hosts: bool = typer.Option(False, "--all", help="Push to every enrolled worker."),
|
||||||
|
include_self: bool = typer.Option(False, "--include-self", help="Also push to each updater's /update-self after a successful agent update."),
|
||||||
|
root: Optional[str] = typer.Option(None, "--root", help="Source tree to tar (default: CWD)."),
|
||||||
|
exclude: list[str] = typer.Option([], "--exclude", help="Additional exclude glob. Repeatable."),
|
||||||
|
updater_port: int = typer.Option(8766, "--updater-port", help="Port the workers' updater listens on."),
|
||||||
|
dry_run: bool = typer.Option(False, "--dry-run", help="Build the tarball and print stats; no network."),
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL."),
|
||||||
|
) -> None:
|
||||||
|
"""Push the current working tree to workers' self-updaters (with auto-rollback on failure)."""
|
||||||
|
import asyncio
|
||||||
|
import pathlib as _pathlib
|
||||||
|
|
||||||
|
from decnet.swarm.tar_tree import tar_working_tree, detect_git_sha
|
||||||
|
from decnet.swarm.updater_client import UpdaterClient
|
||||||
|
|
||||||
|
if not (host or all_hosts):
|
||||||
|
console.print("[red]Supply --host <name> or --all.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
if host and all_hosts:
|
||||||
|
console.print("[red]--host and --all are mutually exclusive.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
base = _utils._swarmctl_base_url(url)
|
||||||
|
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||||
|
rows = resp.json()
|
||||||
|
if host:
|
||||||
|
targets = [r for r in rows if r.get("name") == host or r.get("uuid") == host]
|
||||||
|
if not targets:
|
||||||
|
console.print(f"[red]No enrolled worker matching '{host}'.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
else:
|
||||||
|
targets = [r for r in rows if r.get("status") != "decommissioned"]
|
||||||
|
if not targets:
|
||||||
|
console.print("[dim]No targets.[/]")
|
||||||
|
return
|
||||||
|
|
||||||
|
tree_root = _pathlib.Path(root) if root else _pathlib.Path.cwd()
|
||||||
|
sha = detect_git_sha(tree_root)
|
||||||
|
console.print(f"[dim]Tarring[/] {tree_root} [dim]sha={sha or '(not a git repo)'}[/]")
|
||||||
|
tarball = tar_working_tree(tree_root, extra_excludes=exclude)
|
||||||
|
console.print(f"[dim]Tarball size:[/] {len(tarball):,} bytes")
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
console.print("[yellow]--dry-run: not pushing.[/]")
|
||||||
|
for t in targets:
|
||||||
|
console.print(f" would push to [cyan]{t.get('name')}[/] at {t.get('address')}:{updater_port}")
|
||||||
|
return
|
||||||
|
|
||||||
|
async def _push_one(h: dict) -> dict:
|
||||||
|
name = h.get("name") or h.get("uuid")
|
||||||
|
out: dict = {"name": name, "address": h.get("address"), "agent": None, "self": None}
|
||||||
|
try:
|
||||||
|
async with UpdaterClient(h, updater_port=updater_port) as u:
|
||||||
|
r = await u.update(tarball, sha=sha)
|
||||||
|
out["agent"] = {"status": r.status_code, "body": r.json() if r.content else {}}
|
||||||
|
if r.status_code == 200 and include_self:
|
||||||
|
rs = await u.update_self(tarball, sha=sha)
|
||||||
|
out["self"] = {"status": rs.status_code, "body": rs.json() if rs.content else {}}
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
out["error"] = f"{type(exc).__name__}: {exc}"
|
||||||
|
return out
|
||||||
|
|
||||||
|
async def _push_all() -> list[dict]:
|
||||||
|
return await asyncio.gather(*(_push_one(t) for t in targets))
|
||||||
|
|
||||||
|
results = asyncio.run(_push_all())
|
||||||
|
|
||||||
|
table = Table(title="DECNET swarm update")
|
||||||
|
for col in ("host", "address", "agent", "self", "detail"):
|
||||||
|
table.add_column(col)
|
||||||
|
any_failure = False
|
||||||
|
for r in results:
|
||||||
|
agent = r.get("agent") or {}
|
||||||
|
selff = r.get("self") or {}
|
||||||
|
err = r.get("error")
|
||||||
|
if err:
|
||||||
|
any_failure = True
|
||||||
|
table.add_row(r["name"], r.get("address") or "", "[red]error[/]", "—", err)
|
||||||
|
continue
|
||||||
|
a_status = agent.get("status")
|
||||||
|
if a_status == 200:
|
||||||
|
agent_cell = "[green]updated[/]"
|
||||||
|
elif a_status == 409:
|
||||||
|
agent_cell = "[yellow]rolled-back[/]"
|
||||||
|
any_failure = True
|
||||||
|
else:
|
||||||
|
agent_cell = f"[red]{a_status}[/]"
|
||||||
|
any_failure = True
|
||||||
|
if not include_self:
|
||||||
|
self_cell = "—"
|
||||||
|
elif selff.get("status") == 200 or selff.get("status") is None:
|
||||||
|
self_cell = "[green]ok[/]" if selff else "[dim]skipped[/]"
|
||||||
|
else:
|
||||||
|
self_cell = f"[red]{selff.get('status')}[/]"
|
||||||
|
detail = ""
|
||||||
|
body = agent.get("body") or {}
|
||||||
|
if isinstance(body, dict):
|
||||||
|
detail = body.get("release", {}).get("sha") or body.get("detail", {}).get("error") or ""
|
||||||
|
table.add_row(r["name"], r.get("address") or "", agent_cell, self_cell, str(detail)[:80])
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
if any_failure:
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
@swarm_app.command("deckies")
|
||||||
|
def swarm_deckies(
|
||||||
|
host: Optional[str] = typer.Option(None, "--host", help="Filter by worker name or UUID"),
|
||||||
|
state: Optional[str] = typer.Option(None, "--state", help="Filter by shard state (pending|running|failed|torn_down)"),
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||||
|
json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
|
||||||
|
) -> None:
|
||||||
|
"""List deployed deckies across the swarm with their owning worker host."""
|
||||||
|
base = _utils._swarmctl_base_url(url)
|
||||||
|
|
||||||
|
host_uuid: Optional[str] = None
|
||||||
|
if host:
|
||||||
|
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||||
|
rows = resp.json()
|
||||||
|
match = next((r for r in rows if r.get("uuid") == host or r.get("name") == host), None)
|
||||||
|
if match is None:
|
||||||
|
console.print(f"[red]No enrolled worker matching '{host}'.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
host_uuid = match["uuid"]
|
||||||
|
|
||||||
|
query = []
|
||||||
|
if host_uuid:
|
||||||
|
query.append(f"host_uuid={host_uuid}")
|
||||||
|
if state:
|
||||||
|
query.append(f"state={state}")
|
||||||
|
path = "/swarm/deckies" + ("?" + "&".join(query) if query else "")
|
||||||
|
|
||||||
|
resp = _utils._http_request("GET", base + path)
|
||||||
|
rows = resp.json()
|
||||||
|
|
||||||
|
if json_out:
|
||||||
|
console.print_json(data=rows)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
console.print("[dim]No deckies deployed.[/]")
|
||||||
|
return
|
||||||
|
|
||||||
|
table = Table(title="DECNET swarm deckies")
|
||||||
|
for col in ("decky", "host", "address", "state", "services"):
|
||||||
|
table.add_column(col)
|
||||||
|
for r in rows:
|
||||||
|
services = ",".join(r.get("services") or []) or "—"
|
||||||
|
state_val = r.get("state") or "pending"
|
||||||
|
colored = {
|
||||||
|
"running": f"[green]{state_val}[/]",
|
||||||
|
"failed": f"[red]{state_val}[/]",
|
||||||
|
"pending": f"[yellow]{state_val}[/]",
|
||||||
|
"torn_down": f"[dim]{state_val}[/]",
|
||||||
|
}.get(state_val, state_val)
|
||||||
|
table.add_row(
|
||||||
|
r.get("decky_name") or "",
|
||||||
|
r.get("host_name") or "<unknown>",
|
||||||
|
r.get("host_address") or "",
|
||||||
|
colored,
|
||||||
|
services,
|
||||||
|
)
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
@swarm_app.command("decommission")
|
||||||
|
def swarm_decommission(
|
||||||
|
name: Optional[str] = typer.Option(None, "--name", help="Worker hostname"),
|
||||||
|
uuid: Optional[str] = typer.Option(None, "--uuid", help="Worker UUID (skip lookup)"),
|
||||||
|
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||||
|
yes: bool = typer.Option(False, "--yes", "-y", help="Skip interactive confirmation"),
|
||||||
|
) -> None:
|
||||||
|
"""Remove a worker from the swarm (cascades decky shard rows)."""
|
||||||
|
if not (name or uuid):
|
||||||
|
console.print("[red]Supply --name or --uuid.[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
|
||||||
|
base = _utils._swarmctl_base_url(url)
|
||||||
|
target_uuid = uuid
|
||||||
|
target_name = name
|
||||||
|
if target_uuid is None:
|
||||||
|
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||||
|
rows = resp.json()
|
||||||
|
match = next((r for r in rows if r.get("name") == name), None)
|
||||||
|
if match is None:
|
||||||
|
console.print(f"[red]No enrolled worker named '{name}'.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
target_uuid = match["uuid"]
|
||||||
|
target_name = match.get("name") or target_name
|
||||||
|
|
||||||
|
if not yes:
|
||||||
|
confirm = typer.confirm(f"Decommission worker {target_name!r} ({target_uuid})?", default=False)
|
||||||
|
if not confirm:
|
||||||
|
console.print("[dim]Aborted.[/]")
|
||||||
|
raise typer.Exit(0)
|
||||||
|
|
||||||
|
_utils._http_request("DELETE", f"{base}/swarm/hosts/{target_uuid}")
|
||||||
|
console.print(f"[green]Decommissioned {target_name or target_uuid}.[/]")
|
||||||
104
decnet/cli/swarmctl.py
Normal file
104
decnet/cli/swarmctl.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import sys
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .gating import _require_master_mode
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def swarmctl(
|
||||||
|
port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
|
||||||
|
host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
|
||||||
|
tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
|
||||||
|
cert: Optional[str] = typer.Option(None, "--cert", help="BYOC: path to TLS server cert (PEM). Auto-issues from the DECNET CA if omitted."),
|
||||||
|
key: Optional[str] = typer.Option(None, "--key", help="BYOC: path to TLS server private key (PEM)."),
|
||||||
|
client_ca: Optional[str] = typer.Option(None, "--client-ca", help="CA bundle used to verify worker client certs. Defaults to the DECNET CA."),
|
||||||
|
) -> None:
|
||||||
|
"""Run the DECNET SWARM controller (master-side, separate process from `decnet api`).
|
||||||
|
|
||||||
|
By default, `decnet swarmctl` auto-spawns `decnet listener` as a fully-
|
||||||
|
detached sibling process so the master starts accepting forwarder
|
||||||
|
connections on 6514 without a second manual invocation. The listener
|
||||||
|
survives swarmctl restarts and crashes — if it dies on its own,
|
||||||
|
restart it manually with `decnet listener --daemon …`. Pass
|
||||||
|
--no-listener to skip.
|
||||||
|
|
||||||
|
Pass ``--tls`` to serve over HTTPS with mutual-TLS enforcement. By
|
||||||
|
default the server cert is auto-issued from the DECNET CA under
|
||||||
|
``~/.decnet/swarmctl/`` so enrolled workers (which already ship that
|
||||||
|
CA's ``ca.crt``) trust it out of the box. BYOC via ``--cert``/``--key``
|
||||||
|
if you need a publicly-trusted or externally-managed cert.
|
||||||
|
"""
|
||||||
|
_require_master_mode("swarmctl")
|
||||||
|
if daemon:
|
||||||
|
log.info("swarmctl daemonizing host=%s port=%d", host, port)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
if not no_listener:
|
||||||
|
listener_host = os.environ.get("DECNET_LISTENER_HOST", "0.0.0.0") # nosec B104
|
||||||
|
listener_port = int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))
|
||||||
|
lst_argv = [
|
||||||
|
sys.executable, "-m", "decnet", "listener",
|
||||||
|
"--host", listener_host,
|
||||||
|
"--port", str(listener_port),
|
||||||
|
"--daemon",
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
pid = _utils._spawn_detached(lst_argv, _utils._pid_dir() / "listener.pid")
|
||||||
|
log.info("swarmctl auto-spawned listener pid=%d bind=%s:%d",
|
||||||
|
pid, listener_host, listener_port)
|
||||||
|
console.print(f"[dim]Auto-spawned listener (pid {pid}) on {listener_host}:{listener_port}.[/]")
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
log.warning("swarmctl could not auto-spawn listener: %s", e)
|
||||||
|
console.print(f"[yellow]listener auto-spawn skipped: {e}[/]")
|
||||||
|
|
||||||
|
log.info("swarmctl command invoked host=%s port=%d tls=%s", host, port, tls)
|
||||||
|
scheme = "https" if tls else "http"
|
||||||
|
console.print(f"[green]Starting DECNET SWARM controller on {scheme}://{host}:{port}...[/]")
|
||||||
|
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app",
|
||||||
|
"--host", host, "--port", str(port)]
|
||||||
|
if tls:
|
||||||
|
from decnet.swarm import pki as _pki
|
||||||
|
if cert and key:
|
||||||
|
cert_path, key_path = cert, key
|
||||||
|
elif cert or key:
|
||||||
|
console.print("[red]--cert and --key must be provided together.[/]")
|
||||||
|
raise typer.Exit(code=2)
|
||||||
|
else:
|
||||||
|
auto_cert, auto_key, _auto_ca = _pki.ensure_swarmctl_cert(host)
|
||||||
|
cert_path, key_path = str(auto_cert), str(auto_key)
|
||||||
|
console.print(f"[dim]Auto-issued swarmctl server cert → {cert_path}[/]")
|
||||||
|
ca_path = client_ca or str(_pki.DEFAULT_CA_DIR / "ca.crt")
|
||||||
|
_cmd += [
|
||||||
|
"--ssl-keyfile", key_path,
|
||||||
|
"--ssl-certfile", cert_path,
|
||||||
|
"--ssl-ca-certs", ca_path,
|
||||||
|
"--ssl-cert-reqs", "2",
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
proc = subprocess.Popen(_cmd, start_new_session=True) # nosec B603 B404
|
||||||
|
try:
|
||||||
|
proc.wait()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
try:
|
||||||
|
os.killpg(proc.pid, signal.SIGTERM)
|
||||||
|
try:
|
||||||
|
proc.wait(timeout=10)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
os.killpg(proc.pid, signal.SIGKILL)
|
||||||
|
proc.wait()
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
except (FileNotFoundError, subprocess.SubprocessError):
|
||||||
|
console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||||
46
decnet/cli/updater.py
Normal file
46
decnet/cli/updater.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pathlib as _pathlib
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def updater(
|
||||||
|
port: int = typer.Option(8766, "--port", help="Port for the self-updater daemon"),
|
||||||
|
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the updater"), # nosec B104
|
||||||
|
updater_dir: Optional[str] = typer.Option(None, "--updater-dir", help="Updater cert bundle dir (default: ~/.decnet/updater)"),
|
||||||
|
install_dir: Optional[str] = typer.Option(None, "--install-dir", help="Release install root (default: /opt/decnet)"),
|
||||||
|
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker agent cert bundle (for local /health probes; default: ~/.decnet/agent)"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Run the DECNET self-updater (requires a bundle in ~/.decnet/updater/)."""
|
||||||
|
from decnet.swarm import pki as _pki
|
||||||
|
from decnet.updater import server as _upd_server
|
||||||
|
|
||||||
|
resolved_updater = _pathlib.Path(updater_dir) if updater_dir else _upd_server.DEFAULT_UPDATER_DIR
|
||||||
|
resolved_install = _pathlib.Path(install_dir) if install_dir else _pathlib.Path("/opt/decnet")
|
||||||
|
resolved_agent = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("updater daemonizing host=%s port=%d", host, port)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"updater command invoked host=%s port=%d updater_dir=%s install_dir=%s",
|
||||||
|
host, port, resolved_updater, resolved_install,
|
||||||
|
)
|
||||||
|
console.print(f"[green]Starting DECNET self-updater on {host}:{port} (mTLS)...[/]")
|
||||||
|
rc = _upd_server.run(
|
||||||
|
host, port,
|
||||||
|
updater_dir=resolved_updater,
|
||||||
|
install_dir=resolved_install,
|
||||||
|
agent_dir=resolved_agent,
|
||||||
|
)
|
||||||
|
if rc != 0:
|
||||||
|
raise typer.Exit(rc)
|
||||||
177
decnet/cli/utils.py
Normal file
177
decnet/cli/utils.py
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
"""Shared CLI helpers: console, logger, process management, swarm HTTP client.
|
||||||
|
|
||||||
|
Submodules reference these as ``from . import utils`` then ``utils.foo(...)``
|
||||||
|
so tests can patch ``decnet.cli.utils.<name>`` and have every caller see it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import subprocess # nosec B404
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.console import Console
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
log = get_logger("cli")
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
|
||||||
|
def _daemonize() -> None:
|
||||||
|
"""Fork the current process into a background daemon (Unix double-fork)."""
|
||||||
|
if os.fork() > 0:
|
||||||
|
raise SystemExit(0)
|
||||||
|
os.setsid()
|
||||||
|
if os.fork() > 0:
|
||||||
|
raise SystemExit(0)
|
||||||
|
sys.stdout = open(os.devnull, "w") # noqa: SIM115
|
||||||
|
sys.stderr = open(os.devnull, "w") # noqa: SIM115
|
||||||
|
sys.stdin = open(os.devnull, "r") # noqa: SIM115
|
||||||
|
|
||||||
|
|
||||||
|
def _pid_dir() -> Path:
|
||||||
|
"""Return the writable PID directory.
|
||||||
|
|
||||||
|
/opt/decnet when it exists and is writable (production), else
|
||||||
|
~/.decnet (dev). The directory is created if needed."""
|
||||||
|
candidates = [Path("/opt/decnet"), Path.home() / ".decnet"]
|
||||||
|
for path in candidates:
|
||||||
|
try:
|
||||||
|
path.mkdir(parents=True, exist_ok=True)
|
||||||
|
if os.access(path, os.W_OK):
|
||||||
|
return path
|
||||||
|
except (PermissionError, OSError):
|
||||||
|
continue
|
||||||
|
return Path("/tmp") # nosec B108
|
||||||
|
|
||||||
|
|
||||||
|
def _spawn_detached(argv: list[str], pid_file: Path) -> int:
|
||||||
|
"""Spawn a DECNET subcommand as a fully-independent sibling process.
|
||||||
|
|
||||||
|
The parent does NOT wait() on this child. start_new_session=True puts
|
||||||
|
the child in its own session so SIGHUP on parent exit doesn't kill it;
|
||||||
|
stdin/stdout/stderr go to /dev/null so the launching shell can close
|
||||||
|
without EIO on the child. close_fds=True prevents inherited sockets
|
||||||
|
from pinning ports we're trying to rebind.
|
||||||
|
|
||||||
|
This is deliberately NOT a supervisor — we fire-and-forget. If the
|
||||||
|
child dies, the operator restarts it manually via its own subcommand.
|
||||||
|
"""
|
||||||
|
if pid_file.exists():
|
||||||
|
try:
|
||||||
|
existing = int(pid_file.read_text().strip())
|
||||||
|
os.kill(existing, 0)
|
||||||
|
return existing
|
||||||
|
except (ValueError, ProcessLookupError, PermissionError, OSError):
|
||||||
|
pass # stale pid_file — fall through and spawn
|
||||||
|
|
||||||
|
with open(os.devnull, "rb") as dn_in, open(os.devnull, "ab") as dn_out:
|
||||||
|
proc = subprocess.Popen( # nosec B603
|
||||||
|
argv,
|
||||||
|
stdin=dn_in, stdout=dn_out, stderr=dn_out,
|
||||||
|
start_new_session=True, close_fds=True,
|
||||||
|
)
|
||||||
|
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
pid_file.write_text(f"{proc.pid}\n")
|
||||||
|
return proc.pid
|
||||||
|
|
||||||
|
|
||||||
|
def _is_running(match_fn) -> int | None:
|
||||||
|
"""Return PID of a running DECNET process matching ``match_fn(cmdline)``, or None."""
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
for proc in psutil.process_iter(["pid", "cmdline"]):
|
||||||
|
try:
|
||||||
|
cmd = proc.info["cmdline"]
|
||||||
|
if cmd and match_fn(cmd):
|
||||||
|
return proc.info["pid"]
|
||||||
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
|
||||||
|
"""Return the microservice registry for health-check and relaunch.
|
||||||
|
|
||||||
|
On agents these run as systemd units invoking /usr/local/bin/decnet,
|
||||||
|
which doesn't include "decnet.cli" in its cmdline. On master dev boxes
|
||||||
|
they're launched via `python -m decnet.cli`. Match either form — cmd
|
||||||
|
is a list of argv tokens, so substring-check the joined string.
|
||||||
|
"""
|
||||||
|
_py = sys.executable
|
||||||
|
|
||||||
|
def _matches(sub: str, extras: tuple[str, ...] = ()):
|
||||||
|
def _check(cmd) -> bool:
|
||||||
|
joined = " ".join(cmd) if not isinstance(cmd, str) else cmd
|
||||||
|
if "decnet" not in joined:
|
||||||
|
return False
|
||||||
|
if sub not in joined:
|
||||||
|
return False
|
||||||
|
return all(e in joined for e in extras)
|
||||||
|
return _check
|
||||||
|
|
||||||
|
return [
|
||||||
|
("Collector", _matches("collect"),
|
||||||
|
[_py, "-m", "decnet.cli", "collect", "--daemon", "--log-file", log_file]),
|
||||||
|
("Mutator", _matches("mutate", ("--watch",)),
|
||||||
|
[_py, "-m", "decnet.cli", "mutate", "--daemon", "--watch"]),
|
||||||
|
("Prober", _matches("probe"),
|
||||||
|
[_py, "-m", "decnet.cli", "probe", "--daemon", "--log-file", log_file]),
|
||||||
|
("Profiler", _matches("profiler"),
|
||||||
|
[_py, "-m", "decnet.cli", "profiler", "--daemon"]),
|
||||||
|
("Sniffer", _matches("sniffer"),
|
||||||
|
[_py, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", log_file]),
|
||||||
|
("API",
|
||||||
|
lambda cmd: "uvicorn" in cmd and "decnet.web.api:app" in cmd,
|
||||||
|
[_py, "-m", "uvicorn", "decnet.web.api:app",
|
||||||
|
"--host", DECNET_API_HOST, "--port", str(DECNET_API_PORT)]),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _kill_all_services() -> None:
|
||||||
|
"""Find and kill all running DECNET microservice processes."""
|
||||||
|
registry = _service_registry(str(DECNET_INGEST_LOG_FILE))
|
||||||
|
killed = 0
|
||||||
|
for name, match_fn, _launch_args in registry:
|
||||||
|
pid = _is_running(match_fn)
|
||||||
|
if pid is not None:
|
||||||
|
console.print(f"[yellow]Stopping {name} (PID {pid})...[/]")
|
||||||
|
os.kill(pid, signal.SIGTERM)
|
||||||
|
killed += 1
|
||||||
|
|
||||||
|
if killed:
|
||||||
|
console.print(f"[green]{killed} background process(es) stopped.[/]")
|
||||||
|
else:
|
||||||
|
console.print("[dim]No DECNET services were running.[/]")
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770"
|
||||||
|
|
||||||
|
|
||||||
|
def _swarmctl_base_url(url: Optional[str]) -> str:
|
||||||
|
return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL)
|
||||||
|
|
||||||
|
|
||||||
|
def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
|
||||||
|
"""Tiny sync wrapper around httpx; avoids leaking async into the CLI."""
|
||||||
|
import httpx
|
||||||
|
try:
|
||||||
|
resp = httpx.request(method, url, json=json_body, timeout=timeout)
|
||||||
|
except httpx.HTTPError as exc:
|
||||||
|
console.print(f"[red]Could not reach swarm controller at {url}: {exc}[/]")
|
||||||
|
console.print("[dim]Is `decnet swarmctl` running?[/]")
|
||||||
|
raise typer.Exit(2)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
try:
|
||||||
|
detail = resp.json().get("detail", resp.text)
|
||||||
|
except Exception: # nosec B110
|
||||||
|
detail = resp.text
|
||||||
|
console.print(f"[red]{method} {url} failed: {resp.status_code} — {detail}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
return resp
|
||||||
120
decnet/cli/web.py
Normal file
120
decnet/cli/web.py
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from decnet.env import DECNET_API_PORT, DECNET_WEB_HOST, DECNET_WEB_PORT
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command(name="web")
|
||||||
|
def serve_web(
|
||||||
|
web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"),
|
||||||
|
host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"),
|
||||||
|
api_port: int = typer.Option(DECNET_API_PORT, "--api-port", help="Port the DECNET API is listening on"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Serve the DECNET Web Dashboard frontend.
|
||||||
|
|
||||||
|
Proxies /api/* requests to the API server so the frontend can use
|
||||||
|
relative URLs (/api/v1/...) with no CORS configuration required.
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import http.server
|
||||||
|
import os
|
||||||
|
import socketserver
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
dist_dir = Path(__file__).resolve().parent.parent.parent / "decnet_web" / "dist"
|
||||||
|
|
||||||
|
if not dist_dir.exists():
|
||||||
|
console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("web daemonizing host=%s port=%d api_port=%d", host, web_port, api_port)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
_api_port = api_port
|
||||||
|
|
||||||
|
class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
|
def do_GET(self):
|
||||||
|
if self.path.startswith("/api/"):
|
||||||
|
self._proxy("GET")
|
||||||
|
return
|
||||||
|
path = self.translate_path(self.path)
|
||||||
|
if not Path(path).exists() or Path(path).is_dir():
|
||||||
|
self.path = "/index.html"
|
||||||
|
return super().do_GET()
|
||||||
|
|
||||||
|
def do_POST(self):
|
||||||
|
if self.path.startswith("/api/"):
|
||||||
|
self._proxy("POST")
|
||||||
|
return
|
||||||
|
self.send_error(405)
|
||||||
|
|
||||||
|
def do_PUT(self):
|
||||||
|
if self.path.startswith("/api/"):
|
||||||
|
self._proxy("PUT")
|
||||||
|
return
|
||||||
|
self.send_error(405)
|
||||||
|
|
||||||
|
def do_DELETE(self):
|
||||||
|
if self.path.startswith("/api/"):
|
||||||
|
self._proxy("DELETE")
|
||||||
|
return
|
||||||
|
self.send_error(405)
|
||||||
|
|
||||||
|
def _proxy(self, method: str) -> None:
|
||||||
|
content_length = int(self.headers.get("Content-Length", 0))
|
||||||
|
body = self.rfile.read(content_length) if content_length else None
|
||||||
|
|
||||||
|
forward = {k: v for k, v in self.headers.items()
|
||||||
|
if k.lower() not in ("host", "connection")}
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", _api_port, timeout=120)
|
||||||
|
conn.request(method, self.path, body=body, headers=forward)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
|
||||||
|
self.send_response(resp.status)
|
||||||
|
for key, val in resp.getheaders():
|
||||||
|
if key.lower() not in ("connection", "transfer-encoding"):
|
||||||
|
self.send_header(key, val)
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
content_type = resp.getheader("Content-Type", "")
|
||||||
|
if "text/event-stream" in content_type:
|
||||||
|
conn.sock.settimeout(None)
|
||||||
|
|
||||||
|
_read = getattr(resp, "read1", resp.read)
|
||||||
|
while True:
|
||||||
|
chunk = _read(4096)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
self.wfile.write(chunk)
|
||||||
|
self.wfile.flush()
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("web proxy error %s %s: %s", method, self.path, exc)
|
||||||
|
self.send_error(502, f"API proxy error: {exc}")
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
conn.close()
|
||||||
|
except Exception: # nosec B110 — best-effort conn cleanup
|
||||||
|
pass
|
||||||
|
|
||||||
|
def log_message(self, fmt: str, *args: object) -> None:
|
||||||
|
log.debug("web %s", fmt % args)
|
||||||
|
|
||||||
|
os.chdir(dist_dir)
|
||||||
|
|
||||||
|
socketserver.TCPServer.allow_reuse_address = True
|
||||||
|
with socketserver.ThreadingTCPServer((host, web_port), SPAHTTPRequestHandler) as httpd:
|
||||||
|
console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]")
|
||||||
|
console.print(f"[dim]Proxying /api/* → http://127.0.0.1:{_api_port}[/]")
|
||||||
|
try:
|
||||||
|
httpd.serve_forever()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[dim]Shutting down dashboard server.[/]")
|
||||||
142
decnet/cli/workers.py
Normal file
142
decnet/cli/workers.py
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command()
|
||||||
|
def probe(
|
||||||
|
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path for RFC 5424 syslog + .json output (reads attackers from .json, writes results to both)"),
|
||||||
|
interval: int = typer.Option(300, "--interval", "-i", help="Seconds between probe cycles (default: 300)"),
|
||||||
|
timeout: float = typer.Option(5.0, "--timeout", help="Per-probe TCP timeout in seconds"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background (used by deploy, no console output)"),
|
||||||
|
) -> None:
|
||||||
|
"""Fingerprint attackers (JARM + HASSH + TCP/IP stack) discovered in the log stream."""
|
||||||
|
import asyncio
|
||||||
|
from decnet.prober import prober_worker
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("probe daemonizing log_file=%s interval=%d", log_file, interval)
|
||||||
|
_utils._daemonize()
|
||||||
|
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
|
||||||
|
return
|
||||||
|
|
||||||
|
log.info("probe command invoked log_file=%s interval=%d", log_file, interval)
|
||||||
|
console.print(f"[bold cyan]DECNET-PROBER[/] watching {log_file} for attackers (interval: {interval}s)")
|
||||||
|
console.print("[dim]Press Ctrl+C to stop[/]")
|
||||||
|
try:
|
||||||
|
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[yellow]DECNET-PROBER stopped.[/]")
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def collect(
|
||||||
|
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Stream Docker logs from all running decky service containers to a log file."""
|
||||||
|
import asyncio
|
||||||
|
from decnet.collector import log_collector_worker
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("collect daemonizing log_file=%s", log_file)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("collect command invoked log_file=%s", log_file)
|
||||||
|
console.print(f"[bold cyan]Collector starting[/] → {log_file}")
|
||||||
|
asyncio.run(log_collector_worker(log_file))
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def mutate(
|
||||||
|
watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"),
|
||||||
|
decky_name: Optional[str] = typer.Option(None, "--decky", help="Force mutate a specific decky immediately"),
|
||||||
|
force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Manually trigger or continuously watch for decky mutation."""
|
||||||
|
import asyncio
|
||||||
|
from decnet.mutator import mutate_decky, mutate_all, run_watch_loop
|
||||||
|
from decnet.web.dependencies import repo
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("mutate daemonizing watch=%s", watch)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
async def _run() -> None:
|
||||||
|
await repo.initialize()
|
||||||
|
if watch:
|
||||||
|
await run_watch_loop(repo)
|
||||||
|
elif decky_name:
|
||||||
|
await mutate_decky(decky_name, repo)
|
||||||
|
elif force_all:
|
||||||
|
await mutate_all(force=True, repo=repo)
|
||||||
|
else:
|
||||||
|
await mutate_all(force=False, repo=repo)
|
||||||
|
|
||||||
|
asyncio.run(_run())
|
||||||
|
|
||||||
|
@app.command(name="correlate")
|
||||||
|
def correlate(
|
||||||
|
log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"),
|
||||||
|
min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"),
|
||||||
|
output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"),
|
||||||
|
emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"),
|
||||||
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
|
) -> None:
|
||||||
|
"""Analyse logs for cross-decky traversals and print the attacker movement graph."""
|
||||||
|
import sys
|
||||||
|
import json as _json
|
||||||
|
from pathlib import Path
|
||||||
|
from decnet.correlation.engine import CorrelationEngine
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("correlate daemonizing log_file=%s", log_file)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
engine = CorrelationEngine()
|
||||||
|
|
||||||
|
if log_file:
|
||||||
|
path = Path(log_file)
|
||||||
|
if not path.exists():
|
||||||
|
console.print(f"[red]Log file not found: {log_file}[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
engine.ingest_file(path)
|
||||||
|
elif not sys.stdin.isatty():
|
||||||
|
for line in sys.stdin:
|
||||||
|
engine.ingest(line)
|
||||||
|
else:
|
||||||
|
console.print("[red]Provide --log-file or pipe log data via stdin.[/]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
traversals = engine.traversals(min_deckies)
|
||||||
|
|
||||||
|
if output == "json":
|
||||||
|
console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2))
|
||||||
|
elif output == "syslog":
|
||||||
|
for line in engine.traversal_syslog_lines(min_deckies):
|
||||||
|
typer.echo(line)
|
||||||
|
else:
|
||||||
|
if not traversals:
|
||||||
|
console.print(
|
||||||
|
f"[yellow]No traversals detected "
|
||||||
|
f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
console.print(engine.report_table(min_deckies))
|
||||||
|
console.print(
|
||||||
|
f"[dim]Parsed {engine.lines_parsed} lines · "
|
||||||
|
f"indexed {engine.events_indexed} events · "
|
||||||
|
f"{len(engine.all_attackers())} unique IPs · "
|
||||||
|
f"[bold]{len(traversals)}[/] traversal(s)[/]"
|
||||||
|
)
|
||||||
|
|
||||||
|
if emit_syslog:
|
||||||
|
for line in engine.traversal_syslog_lines(min_deckies):
|
||||||
|
typer.echo(line)
|
||||||
@@ -110,14 +110,23 @@ _RFC5424_RE = re.compile(
|
|||||||
r"(\S+) " # 1: TIMESTAMP
|
r"(\S+) " # 1: TIMESTAMP
|
||||||
r"(\S+) " # 2: HOSTNAME (decky name)
|
r"(\S+) " # 2: HOSTNAME (decky name)
|
||||||
r"(\S+) " # 3: APP-NAME (service)
|
r"(\S+) " # 3: APP-NAME (service)
|
||||||
r"- " # PROCID always NILVALUE
|
r"\S+ " # PROCID — NILVALUE ("-") for syslog_bridge emitters,
|
||||||
|
# real PID for native syslog callers like sshd/sudo
|
||||||
|
# routed through rsyslog. Accept both; we don't consume it.
|
||||||
r"(\S+) " # 4: MSGID (event_type)
|
r"(\S+) " # 4: MSGID (event_type)
|
||||||
r"(.+)$", # 5: SD element + optional MSG
|
r"(.+)$", # 5: SD element + optional MSG
|
||||||
)
|
)
|
||||||
_SD_BLOCK_RE = re.compile(r'\[decnet@55555\s+(.*?)\]', re.DOTALL)
|
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||||
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
||||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
|
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
|
||||||
|
|
||||||
|
# Free-form `key=value` pairs in the MSG body. Used for lines that bypass the
|
||||||
|
# syslog_bridge SD format — e.g. the SSH container's PROMPT_COMMAND which
|
||||||
|
# calls `logger -t bash "CMD uid=0 user=root src=1.2.3.4 pwd=/root cmd=…"`.
|
||||||
|
# Values run until the next whitespace, so `cmd=…` at end-of-line is preserved
|
||||||
|
# as one unit; we only care about IP-shaped fields here anyway.
|
||||||
|
_MSG_KV_RE = re.compile(r'(\w+)=(\S+)')
|
||||||
|
|
||||||
|
|
||||||
def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
@@ -151,6 +160,19 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
|||||||
attacker_ip = fields[fname]
|
attacker_ip = fields[fname]
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Fallback for plain `logger` callers that don't use SD params (notably
|
||||||
|
# the SSH container's bash PROMPT_COMMAND: `logger -t bash "CMD … src=IP …"`).
|
||||||
|
# Scan the MSG body for IP-shaped `key=value` tokens ONLY — don't fold
|
||||||
|
# them into `fields`, because the frontend's parseEventBody already
|
||||||
|
# renders kv pairs from the msg and doubling them up produces noisy
|
||||||
|
# duplicate pills. This keeps attacker attribution working without
|
||||||
|
# changing the shape of `fields` for non-SD lines.
|
||||||
|
if attacker_ip == "Unknown" and msg:
|
||||||
|
for k, v in _MSG_KV_RE.findall(msg):
|
||||||
|
if k in _IP_FIELDS:
|
||||||
|
attacker_ip = v
|
||||||
|
break
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S")
|
ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ def _configure_logging(dev: bool) -> None:
|
|||||||
all microservice daemons — which redirect stderr to /dev/null — still
|
all microservice daemons — which redirect stderr to /dev/null — still
|
||||||
produce readable logs. File handler is skipped under pytest.
|
produce readable logs. File handler is skipped under pytest.
|
||||||
"""
|
"""
|
||||||
import logging.handlers as _lh
|
from decnet.logging.inode_aware_handler import InodeAwareRotatingFileHandler
|
||||||
|
|
||||||
root = logging.getLogger()
|
root = logging.getLogger()
|
||||||
# Guard: if our StreamHandler is already installed, all handlers are set.
|
# Guard: if our StreamHandler is already installed, all handlers are set.
|
||||||
@@ -82,7 +82,7 @@ def _configure_logging(dev: bool) -> None:
|
|||||||
_in_pytest = any(k.startswith("PYTEST") for k in os.environ)
|
_in_pytest = any(k.startswith("PYTEST") for k in os.environ)
|
||||||
if not _in_pytest:
|
if not _in_pytest:
|
||||||
_log_path = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.log")
|
_log_path = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.log")
|
||||||
file_handler = _lh.RotatingFileHandler(
|
file_handler = InodeAwareRotatingFileHandler(
|
||||||
_log_path,
|
_log_path,
|
||||||
mode="a",
|
mode="a",
|
||||||
maxBytes=10 * 1024 * 1024, # 10 MB
|
maxBytes=10 * 1024 * 1024, # 10 MB
|
||||||
@@ -91,6 +91,10 @@ def _configure_logging(dev: bool) -> None:
|
|||||||
)
|
)
|
||||||
file_handler.setFormatter(fmt)
|
file_handler.setFormatter(fmt)
|
||||||
root.addHandler(file_handler)
|
root.addHandler(file_handler)
|
||||||
|
# Drop root ownership when invoked via sudo so non-root follow-up
|
||||||
|
# commands (e.g. `decnet api` after `sudo decnet deploy`) can append.
|
||||||
|
from decnet.privdrop import chown_to_invoking_user
|
||||||
|
chown_to_invoking_user(_log_path)
|
||||||
|
|
||||||
|
|
||||||
_dev = os.environ.get("DECNET_DEVELOPER", "").lower() == "true"
|
_dev = os.environ.get("DECNET_DEVELOPER", "").lower() == "true"
|
||||||
|
|||||||
90
decnet/config_ini.py
Normal file
90
decnet/config_ini.py
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
"""Parse /etc/decnet/decnet.ini and seed os.environ defaults.
|
||||||
|
|
||||||
|
The INI file is a convenience layer on top of the existing DECNET_* env
|
||||||
|
vars. It never overrides an explicit environment variable (uses
|
||||||
|
os.environ.setdefault). Call load_ini_config() once, very early, before
|
||||||
|
any decnet.env import, so env.py picks up the seeded values as if they
|
||||||
|
had been exported by the shell.
|
||||||
|
|
||||||
|
Shape::
|
||||||
|
|
||||||
|
[decnet]
|
||||||
|
mode = agent # or "master"
|
||||||
|
log-directory = /var/log/decnet
|
||||||
|
disallow-master = true
|
||||||
|
|
||||||
|
[agent]
|
||||||
|
master-host = 192.168.1.50
|
||||||
|
master-port = 8770
|
||||||
|
agent-port = 8765
|
||||||
|
agent-dir = /home/anti/.decnet/agent
|
||||||
|
...
|
||||||
|
|
||||||
|
[master]
|
||||||
|
api-host = 0.0.0.0
|
||||||
|
swarmctl-port = 8770
|
||||||
|
listener-port = 6514
|
||||||
|
...
|
||||||
|
|
||||||
|
Only the section matching `mode` is loaded. The other section is
|
||||||
|
ignored silently so an agent host never reads master secrets (and
|
||||||
|
vice versa). Keys are converted to SCREAMING_SNAKE_CASE and prefixed
|
||||||
|
with ``DECNET_`` — e.g. ``master-host`` → ``DECNET_MASTER_HOST``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import configparser
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_CONFIG_PATH = Path("/etc/decnet/decnet.ini")
|
||||||
|
|
||||||
|
# The [decnet] section keys are role-agnostic and always exported.
|
||||||
|
_COMMON_KEYS = frozenset({"mode", "disallow-master", "log-directory"})
|
||||||
|
|
||||||
|
|
||||||
|
def _key_to_env(key: str) -> str:
|
||||||
|
return "DECNET_" + key.replace("-", "_").upper()
|
||||||
|
|
||||||
|
|
||||||
|
def load_ini_config(path: Optional[Path] = None) -> Optional[Path]:
|
||||||
|
"""Seed os.environ defaults from the DECNET INI file.
|
||||||
|
|
||||||
|
Returns the path that was actually loaded (so callers can log it), or
|
||||||
|
None if no file was read. Missing file is a no-op — callers fall back
|
||||||
|
to env vars / CLI flags / hardcoded defaults.
|
||||||
|
|
||||||
|
Precedence: real os.environ > INI > defaults. Real env vars are never
|
||||||
|
overwritten because we use setdefault().
|
||||||
|
"""
|
||||||
|
if path is None:
|
||||||
|
override = os.environ.get("DECNET_CONFIG")
|
||||||
|
path = Path(override) if override else DEFAULT_CONFIG_PATH
|
||||||
|
|
||||||
|
if not path.is_file():
|
||||||
|
return None
|
||||||
|
|
||||||
|
parser = configparser.ConfigParser()
|
||||||
|
parser.read(path)
|
||||||
|
|
||||||
|
# [decnet] first — mode/disallow-master/log-directory. These seed the
|
||||||
|
# mode decision for the section selection below.
|
||||||
|
if parser.has_section("decnet"):
|
||||||
|
for key, value in parser.items("decnet"):
|
||||||
|
os.environ.setdefault(_key_to_env(key), value)
|
||||||
|
|
||||||
|
mode = os.environ.get("DECNET_MODE", "master").lower()
|
||||||
|
if mode not in ("agent", "master"):
|
||||||
|
raise ValueError(
|
||||||
|
f"decnet.ini: [decnet] mode must be 'agent' or 'master', got '{mode}'"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Role-specific section.
|
||||||
|
section = mode
|
||||||
|
if parser.has_section(section):
|
||||||
|
for key, value in parser.items(section):
|
||||||
|
os.environ.setdefault(_key_to_env(key), value)
|
||||||
|
|
||||||
|
return path
|
||||||
@@ -6,7 +6,7 @@ the fields needed for cross-decky correlation: attacker IP, decky name,
|
|||||||
service, event type, and timestamp.
|
service, event type, and timestamp.
|
||||||
|
|
||||||
Log format (produced by decnet.logging.syslog_formatter):
|
Log format (produced by decnet.logging.syslog_formatter):
|
||||||
<PRI>1 TIMESTAMP HOSTNAME APP-NAME - MSGID [decnet@55555 k1="v1" k2="v2"] [MSG]
|
<PRI>1 TIMESTAMP HOSTNAME APP-NAME - MSGID [relay@55555 k1="v1" k2="v2"] [MSG]
|
||||||
|
|
||||||
The attacker IP may appear under several field names depending on service:
|
The attacker IP may appear under several field names depending on service:
|
||||||
src_ip — ftp, smtp, http, most services
|
src_ip — ftp, smtp, http, most services
|
||||||
@@ -31,8 +31,8 @@ _RFC5424_RE = re.compile(
|
|||||||
r"(.+)$", # 5: SD element + optional MSG
|
r"(.+)$", # 5: SD element + optional MSG
|
||||||
)
|
)
|
||||||
|
|
||||||
# Structured data block: [decnet@55555 k="v" ...]
|
# Structured data block: [relay@55555 k="v" ...]
|
||||||
_SD_BLOCK_RE = re.compile(r'\[decnet@55555\s+(.*?)\]', re.DOTALL)
|
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||||
|
|
||||||
# Individual param: key="value" (with escaped chars inside value)
|
# Individual param: key="value" (with escaped chars inside value)
|
||||||
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
||||||
|
|||||||
@@ -31,11 +31,11 @@ from decnet.network import (
|
|||||||
log = get_logger("engine")
|
log = get_logger("engine")
|
||||||
console = Console()
|
console = Console()
|
||||||
COMPOSE_FILE = Path("decnet-compose.yml")
|
COMPOSE_FILE = Path("decnet-compose.yml")
|
||||||
_CANONICAL_LOGGING = Path(__file__).parent.parent.parent / "templates" / "decnet_logging.py"
|
_CANONICAL_LOGGING = Path(__file__).parent.parent / "templates" / "syslog_bridge.py"
|
||||||
|
|
||||||
|
|
||||||
def _sync_logging_helper(config: DecnetConfig) -> None:
|
def _sync_logging_helper(config: DecnetConfig) -> None:
|
||||||
"""Copy the canonical decnet_logging.py into every active template build context."""
|
"""Copy the canonical syslog_bridge.py into every active template build context."""
|
||||||
from decnet.services.registry import get_service
|
from decnet.services.registry import get_service
|
||||||
seen: set[Path] = set()
|
seen: set[Path] = set()
|
||||||
for decky in config.deckies:
|
for decky in config.deckies:
|
||||||
@@ -47,16 +47,32 @@ def _sync_logging_helper(config: DecnetConfig) -> None:
|
|||||||
if ctx is None or ctx in seen:
|
if ctx is None or ctx in seen:
|
||||||
continue
|
continue
|
||||||
seen.add(ctx)
|
seen.add(ctx)
|
||||||
dest = ctx / "decnet_logging.py"
|
dest = ctx / "syslog_bridge.py"
|
||||||
if not dest.exists() or dest.read_bytes() != _CANONICAL_LOGGING.read_bytes():
|
if not dest.exists() or dest.read_bytes() != _CANONICAL_LOGGING.read_bytes():
|
||||||
shutil.copy2(_CANONICAL_LOGGING, dest)
|
shutil.copy2(_CANONICAL_LOGGING, dest)
|
||||||
|
|
||||||
|
|
||||||
def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
|
def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
|
||||||
import os
|
import os
|
||||||
cmd = ["docker", "compose", "-f", str(compose_file), *args]
|
# -p decnet pins the compose project name. Without it, docker compose
|
||||||
|
# derives the project from basename($PWD); when a daemon (systemd) runs
|
||||||
|
# with WorkingDirectory=/ that basename is empty and compose aborts with
|
||||||
|
# "project name must not be empty".
|
||||||
|
cmd = ["docker", "compose", "-p", "decnet", "-f", str(compose_file), *args]
|
||||||
merged = {**os.environ, **(env or {})}
|
merged = {**os.environ, **(env or {})}
|
||||||
subprocess.run(cmd, check=True, env=merged) # nosec B603
|
result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603
|
||||||
|
if result.stdout:
|
||||||
|
print(result.stdout, end="")
|
||||||
|
if result.returncode != 0:
|
||||||
|
# Docker emits the useful detail ("Address already in use", which IP,
|
||||||
|
# which port) on stderr. Surface it to the structured log so the
|
||||||
|
# agent's journal carries it — without this the upstream traceback
|
||||||
|
# just shows the exit code.
|
||||||
|
if result.stderr:
|
||||||
|
log.error("docker compose %s failed: %s", " ".join(args), result.stderr.strip())
|
||||||
|
raise subprocess.CalledProcessError(
|
||||||
|
result.returncode, cmd, result.stdout, result.stderr
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
_PERMANENT_ERRORS = (
|
_PERMANENT_ERRORS = (
|
||||||
@@ -79,7 +95,11 @@ def _compose_with_retry(
|
|||||||
"""Run a docker compose command, retrying on transient failures."""
|
"""Run a docker compose command, retrying on transient failures."""
|
||||||
import os
|
import os
|
||||||
last_exc: subprocess.CalledProcessError | None = None
|
last_exc: subprocess.CalledProcessError | None = None
|
||||||
cmd = ["docker", "compose", "-f", str(compose_file), *args]
|
# -p decnet pins the compose project name. Without it, docker compose
|
||||||
|
# derives the project from basename($PWD); when a daemon (systemd) runs
|
||||||
|
# with WorkingDirectory=/ that basename is empty and compose aborts with
|
||||||
|
# "project name must not be empty".
|
||||||
|
cmd = ["docker", "compose", "-p", "decnet", "-f", str(compose_file), *args]
|
||||||
merged = {**os.environ, **(env or {})}
|
merged = {**os.environ, **(env or {})}
|
||||||
for attempt in range(1, retries + 1):
|
for attempt in range(1, retries + 1):
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603
|
result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603
|
||||||
@@ -106,6 +126,8 @@ def _compose_with_retry(
|
|||||||
else:
|
else:
|
||||||
if result.stderr:
|
if result.stderr:
|
||||||
console.print(f"[red]{result.stderr.strip()}[/]")
|
console.print(f"[red]{result.stderr.strip()}[/]")
|
||||||
|
log.error("docker compose %s failed after %d attempts: %s",
|
||||||
|
" ".join(args), retries, result.stderr.strip())
|
||||||
raise last_exc
|
raise last_exc
|
||||||
|
|
||||||
|
|
||||||
@@ -154,6 +176,15 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False,
|
|||||||
|
|
||||||
save_state(config, compose_path)
|
save_state(config, compose_path)
|
||||||
|
|
||||||
|
# Pre-up cleanup: a prior half-failed `up` can leave containers still
|
||||||
|
# holding the IPs/ports this run wants, which surfaces as the recurring
|
||||||
|
# "Address already in use" from Docker's IPAM. Best-effort — ignore
|
||||||
|
# failure (e.g. nothing to tear down on a clean host).
|
||||||
|
try:
|
||||||
|
_compose("down", "--remove-orphans", compose_file=compose_path)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
log.debug("pre-up cleanup: compose down failed (likely nothing to remove)")
|
||||||
|
|
||||||
build_env = {"DOCKER_BUILDKIT": "1"} if parallel else {}
|
build_env = {"DOCKER_BUILDKIT": "1"} if parallel else {}
|
||||||
|
|
||||||
console.print("[bold cyan]Building images and starting deckies...[/]")
|
console.print("[bold cyan]Building images and starting deckies...[/]")
|
||||||
@@ -187,10 +218,14 @@ def teardown(decky_id: str | None = None) -> None:
|
|||||||
client = docker.from_env()
|
client = docker.from_env()
|
||||||
|
|
||||||
if decky_id:
|
if decky_id:
|
||||||
svc_names = [f"{decky_id}-{svc}" for svc in [d.services for d in config.deckies if d.name == decky_id]]
|
decky = next((d for d in config.deckies if d.name == decky_id), None)
|
||||||
if not svc_names:
|
if decky is None:
|
||||||
console.print(f"[red]Decky '{decky_id}' not found in current deployment.[/]")
|
console.print(f"[red]Decky '{decky_id}' not found in current deployment.[/]")
|
||||||
return
|
return
|
||||||
|
svc_names = [f"{decky_id}-{svc}" for svc in decky.services]
|
||||||
|
if not svc_names:
|
||||||
|
log.warning("teardown: decky %s has no services to stop", decky_id)
|
||||||
|
return
|
||||||
_compose("stop", *svc_names, compose_file=compose_path)
|
_compose("stop", *svc_names, compose_file=compose_path)
|
||||||
_compose("rm", "-f", *svc_names, compose_file=compose_path)
|
_compose("rm", "-f", *svc_names, compose_file=compose_path)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -6,9 +6,14 @@ from dotenv import load_dotenv
|
|||||||
# Calculate absolute path to the project root
|
# Calculate absolute path to the project root
|
||||||
_ROOT: Path = Path(__file__).parent.parent.absolute()
|
_ROOT: Path = Path(__file__).parent.parent.absolute()
|
||||||
|
|
||||||
# Load .env.local first, then fallback to .env
|
# Load .env.local first, then fallback to .env.
|
||||||
|
# Also check CWD so deployments that install into site-packages (e.g. the
|
||||||
|
# self-updater's release slots) can ship a per-host .env.local at the
|
||||||
|
# process's working directory without having to edit site-packages.
|
||||||
load_dotenv(_ROOT / ".env.local")
|
load_dotenv(_ROOT / ".env.local")
|
||||||
load_dotenv(_ROOT / ".env")
|
load_dotenv(_ROOT / ".env")
|
||||||
|
load_dotenv(Path.cwd() / ".env.local")
|
||||||
|
load_dotenv(Path.cwd() / ".env")
|
||||||
|
|
||||||
|
|
||||||
def _port(name: str, default: int) -> int:
|
def _port(name: str, default: int) -> int:
|
||||||
@@ -59,12 +64,45 @@ DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.lo
|
|||||||
# which causes events to be skipped or processed twice.
|
# which causes events to be skipped or processed twice.
|
||||||
DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
|
DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
|
||||||
|
|
||||||
|
# Set to "true" to embed the MACVLAN sniffer inside the API process.
|
||||||
|
# Leave unset (default) when the standalone `decnet sniffer --daemon` is
|
||||||
|
# running (which `decnet deploy` always does). Embedding both produces two
|
||||||
|
# workers sniffing the same interface — duplicated events and wasted CPU.
|
||||||
|
DECNET_EMBED_SNIFFER: bool = os.environ.get("DECNET_EMBED_SNIFFER", "").lower() == "true"
|
||||||
|
|
||||||
|
# Set to "true" to mount the Pyinstrument ASGI middleware on the FastAPI app.
|
||||||
|
# Produces per-request HTML flamegraphs under ./profiles/. Off by default so
|
||||||
|
# production and normal dev runs pay zero profiling overhead.
|
||||||
|
DECNET_PROFILE_REQUESTS: bool = os.environ.get("DECNET_PROFILE_REQUESTS", "").lower() == "true"
|
||||||
|
DECNET_PROFILE_DIR: str = os.environ.get("DECNET_PROFILE_DIR", "profiles")
|
||||||
|
|
||||||
# API Options
|
# API Options
|
||||||
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "127.0.0.1")
|
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "127.0.0.1")
|
||||||
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
|
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
|
||||||
DECNET_JWT_SECRET: str = _require_env("DECNET_JWT_SECRET")
|
# DECNET_JWT_SECRET is resolved lazily via module __getattr__ so that agent /
|
||||||
|
# updater / swarmctl subcommands (which never touch auth) can start without
|
||||||
|
# the master's JWT secret being present in the environment.
|
||||||
DECNET_INGEST_LOG_FILE: str | None = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
|
DECNET_INGEST_LOG_FILE: str | None = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
|
||||||
|
|
||||||
|
# SWARM log pipeline — RFC 5425 syslog-over-TLS between worker forwarders
|
||||||
|
# and the master listener. Plaintext syslog across hosts is forbidden.
|
||||||
|
DECNET_SWARM_SYSLOG_PORT: int = _port("DECNET_SWARM_SYSLOG_PORT", 6514)
|
||||||
|
DECNET_SWARM_MASTER_HOST: str | None = os.environ.get("DECNET_SWARM_MASTER_HOST")
|
||||||
|
|
||||||
|
# Worker-side identity + swarmctl locator, seeded by the enroll bundle's
|
||||||
|
# /etc/decnet/decnet.ini ([agent] host-uuid / master-host / swarmctl-port).
|
||||||
|
# The agent heartbeat loop uses these to self-identify to the master.
|
||||||
|
DECNET_HOST_UUID: str | None = os.environ.get("DECNET_HOST_UUID")
|
||||||
|
DECNET_MASTER_HOST: str | None = os.environ.get("DECNET_MASTER_HOST")
|
||||||
|
DECNET_SWARMCTL_PORT: int = _port("DECNET_SWARMCTL_PORT", 8770)
|
||||||
|
|
||||||
|
# Ingester batching: how many log rows to accumulate per commit, and the
|
||||||
|
# max wait (ms) before flushing a partial batch. Larger batches reduce
|
||||||
|
# SQLite write-lock contention; the timeout keeps latency bounded during
|
||||||
|
# low-traffic periods.
|
||||||
|
DECNET_BATCH_SIZE: int = int(os.environ.get("DECNET_BATCH_SIZE", "100"))
|
||||||
|
DECNET_BATCH_MAX_WAIT_MS: int = int(os.environ.get("DECNET_BATCH_MAX_WAIT_MS", "250"))
|
||||||
|
|
||||||
# Web Dashboard Options
|
# Web Dashboard Options
|
||||||
DECNET_WEB_HOST: str = os.environ.get("DECNET_WEB_HOST", "127.0.0.1")
|
DECNET_WEB_HOST: str = os.environ.get("DECNET_WEB_HOST", "127.0.0.1")
|
||||||
DECNET_WEB_PORT: int = _port("DECNET_WEB_PORT", 8080)
|
DECNET_WEB_PORT: int = _port("DECNET_WEB_PORT", 8080)
|
||||||
@@ -72,6 +110,17 @@ DECNET_ADMIN_USER: str = os.environ.get("DECNET_ADMIN_USER", "admin")
|
|||||||
DECNET_ADMIN_PASSWORD: str = os.environ.get("DECNET_ADMIN_PASSWORD", "admin")
|
DECNET_ADMIN_PASSWORD: str = os.environ.get("DECNET_ADMIN_PASSWORD", "admin")
|
||||||
DECNET_DEVELOPER: bool = os.environ.get("DECNET_DEVELOPER", "False").lower() == "true"
|
DECNET_DEVELOPER: bool = os.environ.get("DECNET_DEVELOPER", "False").lower() == "true"
|
||||||
|
|
||||||
|
# Host role — seeded by /etc/decnet/decnet.ini or exported directly.
|
||||||
|
# "master" = the central server (api, web, swarmctl, listener).
|
||||||
|
# "agent" = a worker node (agent, forwarder, updater). Workers gate their
|
||||||
|
# Typer CLI to hide master-only commands (see decnet/cli.py).
|
||||||
|
DECNET_MODE: str = os.environ.get("DECNET_MODE", "master").lower()
|
||||||
|
# When mode=agent, hide master-only Typer commands. Set to "false" for dual-
|
||||||
|
# role dev hosts where a single machine plays both sides.
|
||||||
|
DECNET_DISALLOW_MASTER: bool = (
|
||||||
|
os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true"
|
||||||
|
)
|
||||||
|
|
||||||
# Tracing — set to "true" to enable OpenTelemetry distributed tracing.
|
# Tracing — set to "true" to enable OpenTelemetry distributed tracing.
|
||||||
# Separate from DECNET_DEVELOPER so tracing can be toggled independently.
|
# Separate from DECNET_DEVELOPER so tracing can be toggled independently.
|
||||||
DECNET_DEVELOPER_TRACING: bool = os.environ.get("DECNET_DEVELOPER_TRACING", "").lower() == "true"
|
DECNET_DEVELOPER_TRACING: bool = os.environ.get("DECNET_DEVELOPER_TRACING", "").lower() == "true"
|
||||||
@@ -95,3 +144,10 @@ _web_hostname: str = "localhost" if DECNET_WEB_HOST in _WILDCARD_ADDRS else DECN
|
|||||||
_cors_default: str = f"http://{_web_hostname}:{DECNET_WEB_PORT}"
|
_cors_default: str = f"http://{_web_hostname}:{DECNET_WEB_PORT}"
|
||||||
_cors_raw: str = os.environ.get("DECNET_CORS_ORIGINS", _cors_default)
|
_cors_raw: str = os.environ.get("DECNET_CORS_ORIGINS", _cors_default)
|
||||||
DECNET_CORS_ORIGINS: list[str] = [o.strip() for o in _cors_raw.split(",") if o.strip()]
|
DECNET_CORS_ORIGINS: list[str] = [o.strip() for o in _cors_raw.split(",") if o.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def __getattr__(name: str) -> str:
|
||||||
|
"""Lazy resolution for secrets only the master web/api process needs."""
|
||||||
|
if name == "DECNET_JWT_SECRET":
|
||||||
|
return _require_env("DECNET_JWT_SECRET")
|
||||||
|
raise AttributeError(f"module 'decnet.env' has no attribute {name!r}")
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ import logging.handlers
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from decnet.logging.inode_aware_handler import InodeAwareRotatingFileHandler
|
||||||
|
from decnet.privdrop import chown_to_invoking_user, chown_tree_to_invoking_user
|
||||||
from decnet.telemetry import traced as _traced
|
from decnet.telemetry import traced as _traced
|
||||||
|
|
||||||
_LOG_FILE_ENV = "DECNET_LOG_FILE"
|
_LOG_FILE_ENV = "DECNET_LOG_FILE"
|
||||||
@@ -20,7 +22,7 @@ _DEFAULT_LOG_FILE = "/var/log/decnet/decnet.log"
|
|||||||
_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
|
_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
|
||||||
_BACKUP_COUNT = 5
|
_BACKUP_COUNT = 5
|
||||||
|
|
||||||
_handler: logging.handlers.RotatingFileHandler | None = None
|
_handler: InodeAwareRotatingFileHandler | None = None
|
||||||
_logger: logging.Logger | None = None
|
_logger: logging.Logger | None = None
|
||||||
|
|
||||||
|
|
||||||
@@ -31,13 +33,17 @@ def _init_file_handler() -> logging.Logger:
|
|||||||
|
|
||||||
log_path = Path(os.environ.get(_LOG_FILE_ENV, _DEFAULT_LOG_FILE))
|
log_path = Path(os.environ.get(_LOG_FILE_ENV, _DEFAULT_LOG_FILE))
|
||||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
# When running under sudo, hand the parent dir back to the invoking user
|
||||||
|
# so a subsequent non-root `decnet api` can also write to it.
|
||||||
|
chown_tree_to_invoking_user(log_path.parent)
|
||||||
|
|
||||||
_handler = logging.handlers.RotatingFileHandler(
|
_handler = InodeAwareRotatingFileHandler(
|
||||||
log_path,
|
log_path,
|
||||||
maxBytes=_MAX_BYTES,
|
maxBytes=_MAX_BYTES,
|
||||||
backupCount=_BACKUP_COUNT,
|
backupCount=_BACKUP_COUNT,
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
|
chown_to_invoking_user(log_path)
|
||||||
_handler.setFormatter(logging.Formatter("%(message)s"))
|
_handler.setFormatter(logging.Formatter("%(message)s"))
|
||||||
|
|
||||||
_logger = logging.getLogger("decnet.syslog")
|
_logger = logging.getLogger("decnet.syslog")
|
||||||
|
|||||||
60
decnet/logging/inode_aware_handler.py
Normal file
60
decnet/logging/inode_aware_handler.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
"""
|
||||||
|
RotatingFileHandler that detects external deletion or rotation.
|
||||||
|
|
||||||
|
Stdlib ``RotatingFileHandler`` holds an open file descriptor for the
|
||||||
|
lifetime of the handler. If the target file is deleted (``rm``) or
|
||||||
|
rotated out (``logrotate`` without ``copytruncate``), the handler keeps
|
||||||
|
writing to the now-orphaned inode until its own size-based rotation
|
||||||
|
finally triggers — silently losing every line in between.
|
||||||
|
|
||||||
|
Stdlib ``WatchedFileHandler`` solves exactly this problem but doesn't
|
||||||
|
rotate by size. This subclass combines both: before each emit we stat
|
||||||
|
the configured path and compare its inode/device to the currently open
|
||||||
|
file; on mismatch we close and reopen.
|
||||||
|
|
||||||
|
Cheap: one ``os.stat`` per log record. Matches the pattern used by
|
||||||
|
``decnet/collector/worker.py:_reopen_if_needed``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import logging.handlers
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class InodeAwareRotatingFileHandler(logging.handlers.RotatingFileHandler):
|
||||||
|
"""RotatingFileHandler that reopens the target on external rotation/deletion."""
|
||||||
|
|
||||||
|
def _should_reopen(self) -> bool:
|
||||||
|
if self.stream is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
disk_stat = os.stat(self.baseFilename)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return True
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
open_stat = os.fstat(self.stream.fileno())
|
||||||
|
except OSError:
|
||||||
|
return True
|
||||||
|
return (disk_stat.st_ino != open_stat.st_ino
|
||||||
|
or disk_stat.st_dev != open_stat.st_dev)
|
||||||
|
|
||||||
|
def emit(self, record: logging.LogRecord) -> None:
|
||||||
|
if self._should_reopen():
|
||||||
|
try:
|
||||||
|
if self.stream is not None:
|
||||||
|
self.close()
|
||||||
|
except Exception: # nosec B110
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.stream = self._open()
|
||||||
|
except OSError:
|
||||||
|
# A logging handler MUST NOT crash its caller. If we can't
|
||||||
|
# reopen (e.g. file is root-owned after `sudo decnet deploy`
|
||||||
|
# and the current process is non-root), defer to the stdlib
|
||||||
|
# error path, which just prints a traceback to stderr.
|
||||||
|
self.handleError(record)
|
||||||
|
return
|
||||||
|
super().emit(record)
|
||||||
@@ -5,7 +5,7 @@ Produces fully-compliant syslog messages:
|
|||||||
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
|
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
|
||||||
|
|
||||||
Facility: local0 (16)
|
Facility: local0 (16)
|
||||||
PEN for structured data: decnet@55555
|
PEN for structured data: relay@55555
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -16,7 +16,7 @@ from typing import Any
|
|||||||
|
|
||||||
FACILITY_LOCAL0 = 16
|
FACILITY_LOCAL0 = 16
|
||||||
NILVALUE = "-"
|
NILVALUE = "-"
|
||||||
_SD_ID = "decnet@55555"
|
_SD_ID = "relay@55555"
|
||||||
|
|
||||||
SEVERITY_INFO = 6
|
SEVERITY_INFO = 6
|
||||||
SEVERITY_WARNING = 4
|
SEVERITY_WARNING = 4
|
||||||
|
|||||||
@@ -99,6 +99,9 @@ class DeckyConfig(BaseModel):
|
|||||||
mutate_interval: int | None = None # automatic rotation interval in minutes
|
mutate_interval: int | None = None # automatic rotation interval in minutes
|
||||||
last_mutated: float = 0.0 # timestamp of last mutation
|
last_mutated: float = 0.0 # timestamp of last mutation
|
||||||
last_login_attempt: float = 0.0 # timestamp of most recent interaction
|
last_login_attempt: float = 0.0 # timestamp of most recent interaction
|
||||||
|
# SWARM: the SwarmHost.uuid that runs this decky. None in unihost mode
|
||||||
|
# so existing state files deserialize unchanged.
|
||||||
|
host_uuid: str | None = None
|
||||||
|
|
||||||
@field_validator("services")
|
@field_validator("services")
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -126,22 +126,57 @@ def allocate_ips(
|
|||||||
# Docker MACVLAN network
|
# Docker MACVLAN network
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def create_macvlan_network(
|
def _ensure_network(
|
||||||
client: docker.DockerClient,
|
client: docker.DockerClient,
|
||||||
|
*,
|
||||||
|
driver: str,
|
||||||
interface: str,
|
interface: str,
|
||||||
subnet: str,
|
subnet: str,
|
||||||
gateway: str,
|
gateway: str,
|
||||||
ip_range: str,
|
ip_range: str,
|
||||||
|
extra_options: dict | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Create the MACVLAN Docker network. No-op if it already exists."""
|
"""Create the decnet docker network with ``driver``, replacing any
|
||||||
existing = [n.name for n in client.networks.list()]
|
existing network of the same name that was built with a different driver.
|
||||||
if MACVLAN_NETWORK_NAME in existing:
|
|
||||||
return
|
Why the replace-on-driver-mismatch: macvlan and ipvlan slaves can't
|
||||||
|
coexist on the same parent interface. If an earlier run left behind a
|
||||||
|
macvlan-driver network and we're now asked for ipvlan (or vice versa),
|
||||||
|
short-circuiting on name alone leaves Docker attaching new containers
|
||||||
|
to the old driver and the host NIC ends up EBUSY on the next port
|
||||||
|
create. So: when driver disagrees, disconnect everything and DROP it.
|
||||||
|
"""
|
||||||
|
options = {"parent": interface}
|
||||||
|
if extra_options:
|
||||||
|
options.update(extra_options)
|
||||||
|
|
||||||
|
for net in client.networks.list(names=[MACVLAN_NETWORK_NAME]):
|
||||||
|
if net.attrs.get("Driver") == driver:
|
||||||
|
# Same driver — but if the IPAM pool drifted (different subnet,
|
||||||
|
# gateway, or ip-range than this deploy asks for), reusing it
|
||||||
|
# hands out addresses from the old pool and we race the real LAN.
|
||||||
|
# Compare and rebuild on mismatch.
|
||||||
|
pools = (net.attrs.get("IPAM") or {}).get("Config") or []
|
||||||
|
cur = pools[0] if pools else {}
|
||||||
|
if (
|
||||||
|
cur.get("Subnet") == subnet
|
||||||
|
and cur.get("Gateway") == gateway
|
||||||
|
and cur.get("IPRange") == ip_range
|
||||||
|
):
|
||||||
|
return # right driver AND matching pool, leave it alone
|
||||||
|
# Driver mismatch OR IPAM drift — tear it down. Disconnect any live
|
||||||
|
# containers first so `remove()` doesn't refuse with ErrNetworkInUse.
|
||||||
|
for cid in (net.attrs.get("Containers") or {}):
|
||||||
|
try:
|
||||||
|
net.disconnect(cid, force=True)
|
||||||
|
except docker.errors.APIError:
|
||||||
|
pass
|
||||||
|
net.remove()
|
||||||
|
|
||||||
client.networks.create(
|
client.networks.create(
|
||||||
name=MACVLAN_NETWORK_NAME,
|
name=MACVLAN_NETWORK_NAME,
|
||||||
driver="macvlan",
|
driver=driver,
|
||||||
options={"parent": interface},
|
options=options,
|
||||||
ipam=docker.types.IPAMConfig(
|
ipam=docker.types.IPAMConfig(
|
||||||
driver="default",
|
driver="default",
|
||||||
pool_configs=[
|
pool_configs=[
|
||||||
@@ -155,6 +190,21 @@ def create_macvlan_network(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_macvlan_network(
|
||||||
|
client: docker.DockerClient,
|
||||||
|
interface: str,
|
||||||
|
subnet: str,
|
||||||
|
gateway: str,
|
||||||
|
ip_range: str,
|
||||||
|
) -> None:
|
||||||
|
"""Create the MACVLAN Docker network, replacing an ipvlan-driver one of
|
||||||
|
the same name if necessary (parent-NIC can't host both drivers)."""
|
||||||
|
_ensure_network(
|
||||||
|
client, driver="macvlan", interface=interface,
|
||||||
|
subnet=subnet, gateway=gateway, ip_range=ip_range,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_ipvlan_network(
|
def create_ipvlan_network(
|
||||||
client: docker.DockerClient,
|
client: docker.DockerClient,
|
||||||
interface: str,
|
interface: str,
|
||||||
@@ -162,25 +212,12 @@ def create_ipvlan_network(
|
|||||||
gateway: str,
|
gateway: str,
|
||||||
ip_range: str,
|
ip_range: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Create an IPvlan L2 Docker network. No-op if it already exists."""
|
"""Create an IPvlan L2 Docker network, replacing a macvlan-driver one of
|
||||||
existing = [n.name for n in client.networks.list()]
|
the same name if necessary (parent-NIC can't host both drivers)."""
|
||||||
if MACVLAN_NETWORK_NAME in existing:
|
_ensure_network(
|
||||||
return
|
client, driver="ipvlan", interface=interface,
|
||||||
|
subnet=subnet, gateway=gateway, ip_range=ip_range,
|
||||||
client.networks.create(
|
extra_options={"ipvlan_mode": "l2"},
|
||||||
name=MACVLAN_NETWORK_NAME,
|
|
||||||
driver="ipvlan",
|
|
||||||
options={"parent": interface, "ipvlan_mode": "l2"},
|
|
||||||
ipam=docker.types.IPAMConfig(
|
|
||||||
driver="default",
|
|
||||||
pool_configs=[
|
|
||||||
docker.types.IPAMPool(
|
|
||||||
subnet=subnet,
|
|
||||||
gateway=gateway,
|
|
||||||
iprange=ip_range,
|
|
||||||
)
|
|
||||||
],
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -204,10 +241,14 @@ def _require_root() -> None:
|
|||||||
def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
|
def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
|
||||||
"""
|
"""
|
||||||
Create a macvlan interface on the host so the deployer can reach deckies.
|
Create a macvlan interface on the host so the deployer can reach deckies.
|
||||||
Idempotent — skips steps that are already done.
|
Idempotent — skips steps that are already done. Drops a stale ipvlan
|
||||||
|
host-helper first: the two drivers can share a parent NIC on paper but
|
||||||
|
leaving the opposite helper in place is just cruft after a driver swap.
|
||||||
"""
|
"""
|
||||||
_require_root()
|
_require_root()
|
||||||
|
|
||||||
|
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
||||||
|
|
||||||
# Check if interface already exists
|
# Check if interface already exists
|
||||||
result = _run(["ip", "link", "show", HOST_MACVLAN_IFACE], check=False)
|
result = _run(["ip", "link", "show", HOST_MACVLAN_IFACE], check=False)
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
@@ -227,10 +268,14 @@ def teardown_host_macvlan(decky_ip_range: str) -> None:
|
|||||||
def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str) -> None:
|
def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str) -> None:
|
||||||
"""
|
"""
|
||||||
Create an IPvlan interface on the host so the deployer can reach deckies.
|
Create an IPvlan interface on the host so the deployer can reach deckies.
|
||||||
Idempotent — skips steps that are already done.
|
Idempotent — skips steps that are already done. Drops a stale macvlan
|
||||||
|
host-helper first so a prior macvlan deploy doesn't leave its slave
|
||||||
|
dangling on the parent NIC after the driver swap.
|
||||||
"""
|
"""
|
||||||
_require_root()
|
_require_root()
|
||||||
|
|
||||||
|
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
||||||
|
|
||||||
result = _run(["ip", "link", "show", HOST_IPVLAN_IFACE], check=False)
|
result = _run(["ip", "link", "show", HOST_IPVLAN_IFACE], check=False)
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
_run(["ip", "link", "add", HOST_IPVLAN_IFACE, "link", interface, "type", "ipvlan", "mode", "l2"])
|
_run(["ip", "link", "add", HOST_IPVLAN_IFACE, "link", interface, "type", "ipvlan", "mode", "l2"])
|
||||||
|
|||||||
67
decnet/privdrop.py
Normal file
67
decnet/privdrop.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
"""
|
||||||
|
Helpers for dropping root ownership on files created during privileged
|
||||||
|
operations (e.g. `sudo decnet deploy` needs root for MACVLAN, but its log
|
||||||
|
files should be owned by the invoking user so a subsequent non-root
|
||||||
|
`decnet api` can append to them).
|
||||||
|
|
||||||
|
When sudo invokes a process, it sets SUDO_UID / SUDO_GID in the
|
||||||
|
environment to the original user's IDs. We use those to chown files
|
||||||
|
back after creation.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
def _sudo_ids() -> Optional[tuple[int, int]]:
|
||||||
|
"""Return (uid, gid) of the sudo-invoking user, or None when the
|
||||||
|
process was not launched via sudo / the env vars are missing."""
|
||||||
|
raw_uid = os.environ.get("SUDO_UID")
|
||||||
|
raw_gid = os.environ.get("SUDO_GID")
|
||||||
|
if not raw_uid or not raw_gid:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return int(raw_uid), int(raw_gid)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def chown_to_invoking_user(path: str | os.PathLike[str]) -> None:
|
||||||
|
"""Best-effort chown of *path* to the sudo-invoking user.
|
||||||
|
|
||||||
|
No-op when:
|
||||||
|
* not running as root (nothing to drop),
|
||||||
|
* not launched via sudo (no SUDO_UID/SUDO_GID),
|
||||||
|
* the path does not exist,
|
||||||
|
* chown fails (logged-only — never raises).
|
||||||
|
"""
|
||||||
|
if os.geteuid() != 0:
|
||||||
|
return
|
||||||
|
ids = _sudo_ids()
|
||||||
|
if ids is None:
|
||||||
|
return
|
||||||
|
uid, gid = ids
|
||||||
|
p = Path(path)
|
||||||
|
if not p.exists():
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
os.chown(p, uid, gid)
|
||||||
|
except OSError:
|
||||||
|
# Best-effort; a failed chown is not fatal to logging.
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def chown_tree_to_invoking_user(root: str | os.PathLike[str]) -> None:
|
||||||
|
"""Apply :func:`chown_to_invoking_user` to *root* and every file/dir
|
||||||
|
beneath it. Used for parent directories that we just created with
|
||||||
|
``mkdir(parents=True)`` as root."""
|
||||||
|
if os.geteuid() != 0 or _sudo_ids() is None:
|
||||||
|
return
|
||||||
|
root_path = Path(root)
|
||||||
|
if not root_path.exists():
|
||||||
|
return
|
||||||
|
chown_to_invoking_user(root_path)
|
||||||
|
for entry in root_path.rglob("*"):
|
||||||
|
chown_to_invoking_user(entry)
|
||||||
@@ -51,7 +51,7 @@ DEFAULT_TCPFP_PORTS: list[int] = [22, 80, 443, 8080, 8443, 445, 3389]
|
|||||||
# ─── RFC 5424 formatting (inline, mirrors templates/*/decnet_logging.py) ─────
|
# ─── RFC 5424 formatting (inline, mirrors templates/*/decnet_logging.py) ─────
|
||||||
|
|
||||||
_FACILITY_LOCAL0 = 16
|
_FACILITY_LOCAL0 = 16
|
||||||
_SD_ID = "decnet@55555"
|
_SD_ID = "relay@55555"
|
||||||
_SEVERITY_INFO = 6
|
_SEVERITY_INFO = 6
|
||||||
_SEVERITY_WARNING = 4
|
_SEVERITY_WARNING = 4
|
||||||
|
|
||||||
@@ -98,7 +98,7 @@ _RFC5424_RE = re.compile(
|
|||||||
r"(\S+) " # 4: MSGID (event_type)
|
r"(\S+) " # 4: MSGID (event_type)
|
||||||
r"(.+)$", # 5: SD + MSG
|
r"(.+)$", # 5: SD + MSG
|
||||||
)
|
)
|
||||||
_SD_BLOCK_RE = re.compile(r'\[decnet@55555\s+(.*?)\]', re.DOTALL)
|
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||||
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
||||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip", "target_ip")
|
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip", "target_ip")
|
||||||
|
|
||||||
|
|||||||
@@ -32,4 +32,4 @@ class ConpotService(BaseService):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def dockerfile_context(self):
|
def dockerfile_context(self):
|
||||||
return Path(__file__).parent.parent.parent / "templates" / "conpot"
|
return Path(__file__).parent.parent / "templates" / "conpot"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "docker_api"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "docker_api"
|
||||||
|
|
||||||
|
|
||||||
class DockerAPIService(BaseService):
|
class DockerAPIService(BaseService):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "elasticsearch"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "elasticsearch"
|
||||||
|
|
||||||
|
|
||||||
class ElasticsearchService(BaseService):
|
class ElasticsearchService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "ftp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ftp"
|
||||||
|
|
||||||
|
|
||||||
class FTPService(BaseService):
|
class FTPService(BaseService):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import json
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "http"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "http"
|
||||||
|
|
||||||
|
|
||||||
class HTTPService(BaseService):
|
class HTTPService(BaseService):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import json
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "https"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "https"
|
||||||
|
|
||||||
|
|
||||||
class HTTPSService(BaseService):
|
class HTTPSService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "imap"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "imap"
|
||||||
|
|
||||||
|
|
||||||
class IMAPService(BaseService):
|
class IMAPService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "k8s"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "k8s"
|
||||||
|
|
||||||
|
|
||||||
class KubernetesAPIService(BaseService):
|
class KubernetesAPIService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "ldap"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ldap"
|
||||||
|
|
||||||
|
|
||||||
class LDAPService(BaseService):
|
class LDAPService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "llmnr"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "llmnr"
|
||||||
|
|
||||||
|
|
||||||
class LLMNRService(BaseService):
|
class LLMNRService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mongodb"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mongodb"
|
||||||
|
|
||||||
|
|
||||||
class MongoDBService(BaseService):
|
class MongoDBService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mqtt"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mqtt"
|
||||||
|
|
||||||
|
|
||||||
class MQTTService(BaseService):
|
class MQTTService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mssql"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mssql"
|
||||||
|
|
||||||
|
|
||||||
class MSSQLService(BaseService):
|
class MSSQLService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mysql"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mysql"
|
||||||
|
|
||||||
|
|
||||||
class MySQLService(BaseService):
|
class MySQLService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "pop3"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "pop3"
|
||||||
|
|
||||||
|
|
||||||
class POP3Service(BaseService):
|
class POP3Service(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "postgres"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "postgres"
|
||||||
|
|
||||||
|
|
||||||
class PostgresService(BaseService):
|
class PostgresService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "rdp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "rdp"
|
||||||
|
|
||||||
|
|
||||||
class RDPService(BaseService):
|
class RDPService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "redis"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "redis"
|
||||||
|
|
||||||
|
|
||||||
class RedisService(BaseService):
|
class RedisService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "sip"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "sip"
|
||||||
|
|
||||||
|
|
||||||
class SIPService(BaseService):
|
class SIPService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "smb"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smb"
|
||||||
|
|
||||||
|
|
||||||
class SMBService(BaseService):
|
class SMBService(BaseService):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "smtp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smtp"
|
||||||
|
|
||||||
|
|
||||||
class SMTPService(BaseService):
|
class SMTPService(BaseService):
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from decnet.services.base import BaseService
|
|||||||
|
|
||||||
# Reuses the same template as the smtp service — only difference is
|
# Reuses the same template as the smtp service — only difference is
|
||||||
# SMTP_OPEN_RELAY=1 in the environment, which enables the open relay persona.
|
# SMTP_OPEN_RELAY=1 in the environment, which enables the open relay persona.
|
||||||
_TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "smtp"
|
_TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smtp"
|
||||||
|
|
||||||
|
|
||||||
class SMTPRelayService(BaseService):
|
class SMTPRelayService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "sniffer"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "sniffer"
|
||||||
|
|
||||||
|
|
||||||
class SnifferService(BaseService):
|
class SnifferService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "snmp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "snmp"
|
||||||
|
|
||||||
|
|
||||||
class SNMPService(BaseService):
|
class SNMPService(BaseService):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "ssh"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ssh"
|
||||||
|
|
||||||
|
|
||||||
class SSHService(BaseService):
|
class SSHService(BaseService):
|
||||||
@@ -32,16 +32,28 @@ class SSHService(BaseService):
|
|||||||
cfg = service_cfg or {}
|
cfg = service_cfg or {}
|
||||||
env: dict = {
|
env: dict = {
|
||||||
"SSH_ROOT_PASSWORD": cfg.get("password", "admin"),
|
"SSH_ROOT_PASSWORD": cfg.get("password", "admin"),
|
||||||
|
# NODE_NAME is the authoritative decky identifier for log
|
||||||
|
# attribution — matches the host path used for the artifacts
|
||||||
|
# bind mount below. The container hostname (optionally overridden
|
||||||
|
# via SSH_HOSTNAME) is cosmetic and may differ to keep the
|
||||||
|
# decoy looking heterogeneous.
|
||||||
|
"NODE_NAME": decky_name,
|
||||||
}
|
}
|
||||||
if "hostname" in cfg:
|
if "hostname" in cfg:
|
||||||
env["SSH_HOSTNAME"] = cfg["hostname"]
|
env["SSH_HOSTNAME"] = cfg["hostname"]
|
||||||
|
|
||||||
|
# File-catcher quarantine: bind-mount a per-decky host dir so attacker
|
||||||
|
# drops (scp/sftp/wget) are mirrored out-of-band for forensic analysis.
|
||||||
|
# The in-container path masquerades as systemd-coredump so `mount`/`df`
|
||||||
|
# from inside the container looks benign.
|
||||||
|
quarantine_host = f"/var/lib/decnet/artifacts/{decky_name}/ssh"
|
||||||
return {
|
return {
|
||||||
"build": {"context": str(TEMPLATES_DIR)},
|
"build": {"context": str(TEMPLATES_DIR)},
|
||||||
"container_name": f"{decky_name}-ssh",
|
"container_name": f"{decky_name}-ssh",
|
||||||
"restart": "unless-stopped",
|
"restart": "unless-stopped",
|
||||||
"cap_add": ["NET_BIND_SERVICE"],
|
"cap_add": ["NET_BIND_SERVICE"],
|
||||||
"environment": env,
|
"environment": env,
|
||||||
|
"volumes": [f"{quarantine_host}:/var/lib/systemd/coredump:rw"],
|
||||||
}
|
}
|
||||||
|
|
||||||
def dockerfile_context(self) -> Path:
|
def dockerfile_context(self) -> Path:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "telnet"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "telnet"
|
||||||
|
|
||||||
|
|
||||||
class TelnetService(BaseService):
|
class TelnetService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "tftp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "tftp"
|
||||||
|
|
||||||
|
|
||||||
class TFTPService(BaseService):
|
class TFTPService(BaseService):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "vnc"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "vnc"
|
||||||
|
|
||||||
|
|
||||||
class VNCService(BaseService):
|
class VNCService(BaseService):
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ from decnet.telemetry import traced as _traced
|
|||||||
# ─── Constants (must match templates/sniffer/decnet_logging.py) ──────────────
|
# ─── Constants (must match templates/sniffer/decnet_logging.py) ──────────────
|
||||||
|
|
||||||
_FACILITY_LOCAL0 = 16
|
_FACILITY_LOCAL0 = 16
|
||||||
_SD_ID = "decnet@55555"
|
_SD_ID = "relay@55555"
|
||||||
_NILVALUE = "-"
|
_NILVALUE = "-"
|
||||||
|
|
||||||
SEVERITY_INFO = 6
|
SEVERITY_INFO = 6
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from concurrent.futures import ThreadPoolExecutor
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.network import HOST_MACVLAN_IFACE
|
from decnet.network import HOST_IPVLAN_IFACE, HOST_MACVLAN_IFACE
|
||||||
from decnet.sniffer.fingerprint import SnifferEngine
|
from decnet.sniffer.fingerprint import SnifferEngine
|
||||||
from decnet.sniffer.syslog import write_event
|
from decnet.sniffer.syslog import write_event
|
||||||
from decnet.telemetry import traced as _traced
|
from decnet.telemetry import traced as _traced
|
||||||
@@ -119,7 +119,23 @@ async def sniffer_worker(log_file: str) -> None:
|
|||||||
cleanly. The API continues running regardless of sniffer state.
|
cleanly. The API continues running regardless of sniffer state.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
interface = os.environ.get("DECNET_SNIFFER_IFACE", HOST_MACVLAN_IFACE)
|
# Interface selection: explicit env override wins, otherwise probe
|
||||||
|
# both the MACVLAN and IPvlan host-side names since the driver
|
||||||
|
# choice is per-deploy (--ipvlan flag).
|
||||||
|
env_iface = os.environ.get("DECNET_SNIFFER_IFACE")
|
||||||
|
if env_iface:
|
||||||
|
interface = env_iface
|
||||||
|
elif _interface_exists(HOST_MACVLAN_IFACE):
|
||||||
|
interface = HOST_MACVLAN_IFACE
|
||||||
|
elif _interface_exists(HOST_IPVLAN_IFACE):
|
||||||
|
interface = HOST_IPVLAN_IFACE
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"sniffer: neither %s nor %s found — sniffer disabled "
|
||||||
|
"(fleet may not be deployed yet)",
|
||||||
|
HOST_MACVLAN_IFACE, HOST_IPVLAN_IFACE,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
if not _interface_exists(interface):
|
if not _interface_exists(interface):
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|||||||
7
decnet/swarm/__init__.py
Normal file
7
decnet/swarm/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
"""DECNET SWARM — multihost deployment subsystem.
|
||||||
|
|
||||||
|
Components:
|
||||||
|
* ``pki`` — X.509 CA + CSR signing used by all swarm mTLS channels
|
||||||
|
* ``client`` — master-side HTTP client that talks to remote workers
|
||||||
|
* ``log_forwarder``— worker-side syslog-over-TLS (RFC 5425) forwarder
|
||||||
|
"""
|
||||||
200
decnet/swarm/client.py
Normal file
200
decnet/swarm/client.py
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
"""Master-side HTTP client that talks to a worker's DECNET agent.
|
||||||
|
|
||||||
|
All traffic is mTLS: the master presents a cert issued by its own CA (which
|
||||||
|
workers trust) and the master validates the worker's cert against the same
|
||||||
|
CA. In practice the "client cert" the master shows is just another cert
|
||||||
|
signed by itself — the master is both the CA and the sole control-plane
|
||||||
|
client.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
async with AgentClient(host) as agent:
|
||||||
|
await agent.deploy(config)
|
||||||
|
status = await agent.status()
|
||||||
|
|
||||||
|
The ``host`` is a SwarmHost dict returned by the repository.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pathlib
|
||||||
|
import ssl
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from decnet.config import DecnetConfig
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm import pki
|
||||||
|
|
||||||
|
log = get_logger("swarm.client")
|
||||||
|
|
||||||
|
# How long a single HTTP operation can take. Deploy is the long pole —
|
||||||
|
# docker compose up pulls images, builds contexts, etc. Tune via env in a
|
||||||
|
# later iteration if the default proves too short.
|
||||||
|
_TIMEOUT_DEPLOY = httpx.Timeout(connect=10.0, read=600.0, write=30.0, pool=5.0)
|
||||||
|
_TIMEOUT_CONTROL = httpx.Timeout(connect=5.0, read=15.0, write=5.0, pool=5.0)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class MasterIdentity:
|
||||||
|
"""Paths to the master's own mTLS client bundle.
|
||||||
|
|
||||||
|
The master uses ONE master-client cert to talk to every worker. It is
|
||||||
|
signed by the DECNET CA (same CA that signs worker certs). Stored
|
||||||
|
under ``~/.decnet/ca/master/`` by ``ensure_master_identity``.
|
||||||
|
"""
|
||||||
|
key_path: pathlib.Path
|
||||||
|
cert_path: pathlib.Path
|
||||||
|
ca_cert_path: pathlib.Path
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_master_identity(
|
||||||
|
ca_dir: pathlib.Path = pki.DEFAULT_CA_DIR,
|
||||||
|
) -> MasterIdentity:
|
||||||
|
"""Create (or load) the master's own client cert.
|
||||||
|
|
||||||
|
Called once by the swarm controller on startup and by the CLI before
|
||||||
|
any master→worker call. Idempotent.
|
||||||
|
"""
|
||||||
|
ca = pki.ensure_ca(ca_dir)
|
||||||
|
master_dir = ca_dir / "master"
|
||||||
|
bundle = pki.load_worker_bundle(master_dir)
|
||||||
|
if bundle is None:
|
||||||
|
issued = pki.issue_worker_cert(ca, "decnet-master", ["127.0.0.1", "decnet-master"])
|
||||||
|
pki.write_worker_bundle(issued, master_dir)
|
||||||
|
return MasterIdentity(
|
||||||
|
key_path=master_dir / "worker.key",
|
||||||
|
cert_path=master_dir / "worker.crt",
|
||||||
|
ca_cert_path=master_dir / "ca.crt",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AgentClient:
|
||||||
|
"""Thin async wrapper around the worker agent's HTTP API."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
host: dict[str, Any] | None = None,
|
||||||
|
*,
|
||||||
|
address: Optional[str] = None,
|
||||||
|
agent_port: Optional[int] = None,
|
||||||
|
identity: Optional[MasterIdentity] = None,
|
||||||
|
verify_hostname: bool = False,
|
||||||
|
):
|
||||||
|
"""Either pass a SwarmHost dict, or explicit address/port.
|
||||||
|
|
||||||
|
``verify_hostname`` stays False by default because the worker's
|
||||||
|
cert SAN is populated from the operator-supplied address list, not
|
||||||
|
from modern TLS hostname-verification semantics. The mTLS client
|
||||||
|
cert + CA pinning are what authenticate the peer.
|
||||||
|
"""
|
||||||
|
if host is not None:
|
||||||
|
self._address = host["address"]
|
||||||
|
self._port = int(host.get("agent_port") or 8765)
|
||||||
|
self._host_uuid = host.get("uuid")
|
||||||
|
self._host_name = host.get("name")
|
||||||
|
else:
|
||||||
|
if address is None or agent_port is None:
|
||||||
|
raise ValueError(
|
||||||
|
"AgentClient requires either a host dict or address+agent_port"
|
||||||
|
)
|
||||||
|
self._address = address
|
||||||
|
self._port = int(agent_port)
|
||||||
|
self._host_uuid = None
|
||||||
|
self._host_name = None
|
||||||
|
|
||||||
|
self._identity = identity or ensure_master_identity()
|
||||||
|
self._verify_hostname = verify_hostname
|
||||||
|
self._client: Optional[httpx.AsyncClient] = None
|
||||||
|
|
||||||
|
# --------------------------------------------------------------- lifecycle
|
||||||
|
|
||||||
|
def _build_client(self, timeout: httpx.Timeout) -> httpx.AsyncClient:
|
||||||
|
# Build the SSL context manually — httpx.create_ssl_context layers on
|
||||||
|
# purpose/ALPN/default-CA logic that doesn't compose with private-CA
|
||||||
|
# mTLS in all combinations. A bare SSLContext is predictable.
|
||||||
|
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||||
|
ctx.load_cert_chain(
|
||||||
|
str(self._identity.cert_path), str(self._identity.key_path)
|
||||||
|
)
|
||||||
|
ctx.load_verify_locations(cafile=str(self._identity.ca_cert_path))
|
||||||
|
ctx.verify_mode = ssl.CERT_REQUIRED
|
||||||
|
# Pin by CA + cert chain, not by DNS — workers enroll with arbitrary
|
||||||
|
# SANs (IPs, hostnames) and we don't want to force operators to keep
|
||||||
|
# those in sync with whatever URL the master happens to use.
|
||||||
|
ctx.check_hostname = self._verify_hostname
|
||||||
|
return httpx.AsyncClient(
|
||||||
|
base_url=f"https://{self._address}:{self._port}",
|
||||||
|
verify=ctx,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def __aenter__(self) -> "AgentClient":
|
||||||
|
self._client = self._build_client(_TIMEOUT_CONTROL)
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, *exc: Any) -> None:
|
||||||
|
if self._client:
|
||||||
|
await self._client.aclose()
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
def _require_client(self) -> httpx.AsyncClient:
|
||||||
|
if self._client is None:
|
||||||
|
raise RuntimeError("AgentClient used outside `async with` block")
|
||||||
|
return self._client
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------- RPCs
|
||||||
|
|
||||||
|
async def health(self) -> dict[str, Any]:
|
||||||
|
resp = await self._require_client().get("/health")
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
async def status(self) -> dict[str, Any]:
|
||||||
|
resp = await self._require_client().get("/status")
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
async def deploy(
|
||||||
|
self,
|
||||||
|
config: DecnetConfig,
|
||||||
|
*,
|
||||||
|
dry_run: bool = False,
|
||||||
|
no_cache: bool = False,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
body = {
|
||||||
|
"config": config.model_dump(mode="json"),
|
||||||
|
"dry_run": dry_run,
|
||||||
|
"no_cache": no_cache,
|
||||||
|
}
|
||||||
|
# Swap in a long-deploy timeout for this call only.
|
||||||
|
old = self._require_client().timeout
|
||||||
|
self._require_client().timeout = _TIMEOUT_DEPLOY
|
||||||
|
try:
|
||||||
|
resp = await self._require_client().post("/deploy", json=body)
|
||||||
|
finally:
|
||||||
|
self._require_client().timeout = old
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
async def teardown(self, decky_id: Optional[str] = None) -> dict[str, Any]:
|
||||||
|
resp = await self._require_client().post(
|
||||||
|
"/teardown", json={"decky_id": decky_id}
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
async def self_destruct(self) -> dict[str, Any]:
|
||||||
|
"""Trigger the worker to stop services and wipe its install."""
|
||||||
|
resp = await self._require_client().post("/self-destruct")
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
# -------------------------------------------------------------- diagnostics
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return (
|
||||||
|
f"AgentClient(name={self._host_name!r}, "
|
||||||
|
f"address={self._address!r}, port={self._port})"
|
||||||
|
)
|
||||||
293
decnet/swarm/log_forwarder.py
Normal file
293
decnet/swarm/log_forwarder.py
Normal file
@@ -0,0 +1,293 @@
|
|||||||
|
"""Worker-side syslog-over-TLS forwarder (RFC 5425).
|
||||||
|
|
||||||
|
Runs alongside the worker agent. Tails the worker's local RFC 5424 log
|
||||||
|
file (written by the existing docker-collector) and ships each line to
|
||||||
|
the master's listener on TCP 6514 using octet-counted framing over mTLS.
|
||||||
|
Persists the last-forwarded byte offset in a tiny local SQLite so a
|
||||||
|
master crash never causes loss or duplication.
|
||||||
|
|
||||||
|
Design constraints (from the plan, non-negotiable):
|
||||||
|
* transport MUST be TLS — plaintext syslog is never acceptable between
|
||||||
|
hosts; only loopback (decky → worker-local collector) may be plaintext;
|
||||||
|
* mTLS — the listener pins the worker cert against the DECNET CA, so only
|
||||||
|
enrolled workers can push logs;
|
||||||
|
* offset persistence MUST be transactional w.r.t. the send — we only
|
||||||
|
advance the offset after ``writer.drain()`` returns without error.
|
||||||
|
|
||||||
|
The forwarder is intentionally a standalone coroutine, not a worker
|
||||||
|
inside the agent process. That keeps ``decnet agent`` crashes from
|
||||||
|
losing the log tail, and vice versa.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import sqlite3
|
||||||
|
import ssl
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm import pki
|
||||||
|
|
||||||
|
log = get_logger("swarm.forwarder")
|
||||||
|
|
||||||
|
# RFC 5425 framing: "<octet-count> <syslog-msg>".
|
||||||
|
# The message itself is a standard RFC 5424 line (no trailing newline).
|
||||||
|
_FRAME_SEP = b" "
|
||||||
|
|
||||||
|
_INITIAL_BACKOFF = 1.0
|
||||||
|
_MAX_BACKOFF = 30.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ForwarderConfig:
|
||||||
|
log_path: pathlib.Path # worker's RFC 5424 .log file
|
||||||
|
master_host: str
|
||||||
|
master_port: int = 6514
|
||||||
|
agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR
|
||||||
|
state_db: Optional[pathlib.Path] = None # default: agent_dir / "forwarder.db"
|
||||||
|
# Max unacked bytes to keep in the local buffer when master is down.
|
||||||
|
# We bound the lag to avoid unbounded disk growth on catastrophic master
|
||||||
|
# outage — older lines are surfaced as a warning and dropped by advancing
|
||||||
|
# the offset.
|
||||||
|
max_lag_bytes: int = 128 * 1024 * 1024 # 128 MiB
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------ offset storage
|
||||||
|
|
||||||
|
|
||||||
|
class _OffsetStore:
|
||||||
|
"""Single-row SQLite offset tracker. Stdlib only — no ORM, no async."""
|
||||||
|
|
||||||
|
def __init__(self, db_path: pathlib.Path) -> None:
|
||||||
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self._conn = sqlite3.connect(str(db_path))
|
||||||
|
self._conn.execute(
|
||||||
|
"CREATE TABLE IF NOT EXISTS forwarder_offset ("
|
||||||
|
" key TEXT PRIMARY KEY, offset INTEGER NOT NULL)"
|
||||||
|
)
|
||||||
|
self._conn.commit()
|
||||||
|
|
||||||
|
def get(self, key: str = "default") -> int:
|
||||||
|
row = self._conn.execute(
|
||||||
|
"SELECT offset FROM forwarder_offset WHERE key=?", (key,)
|
||||||
|
).fetchone()
|
||||||
|
return int(row[0]) if row else 0
|
||||||
|
|
||||||
|
def set(self, offset: int, key: str = "default") -> None:
|
||||||
|
self._conn.execute(
|
||||||
|
"INSERT INTO forwarder_offset(key, offset) VALUES(?, ?) "
|
||||||
|
"ON CONFLICT(key) DO UPDATE SET offset=excluded.offset",
|
||||||
|
(key, offset),
|
||||||
|
)
|
||||||
|
self._conn.commit()
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
self._conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------- TLS setup
|
||||||
|
|
||||||
|
|
||||||
|
def build_worker_ssl_context(agent_dir: pathlib.Path) -> ssl.SSLContext:
|
||||||
|
"""Client-side mTLS context for the forwarder.
|
||||||
|
|
||||||
|
Worker presents its agent bundle (same cert used for the control-plane
|
||||||
|
HTTPS listener). The CA is the DECNET CA; we pin by CA, not hostname,
|
||||||
|
because workers reach masters by operator-supplied address.
|
||||||
|
"""
|
||||||
|
bundle = pki.load_worker_bundle(agent_dir)
|
||||||
|
if bundle is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"no worker bundle at {agent_dir} — enroll from the master first"
|
||||||
|
)
|
||||||
|
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||||
|
ctx.load_cert_chain(
|
||||||
|
certfile=str(agent_dir / "worker.crt"),
|
||||||
|
keyfile=str(agent_dir / "worker.key"),
|
||||||
|
)
|
||||||
|
ctx.load_verify_locations(cafile=str(agent_dir / "ca.crt"))
|
||||||
|
ctx.verify_mode = ssl.CERT_REQUIRED
|
||||||
|
ctx.check_hostname = False
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------- frame encoding
|
||||||
|
|
||||||
|
|
||||||
|
def encode_frame(line: str) -> bytes:
|
||||||
|
"""RFC 5425 octet-counted framing: ``"<N> <msg>"``.
|
||||||
|
|
||||||
|
``N`` is the byte length of the payload that follows (after the space).
|
||||||
|
"""
|
||||||
|
payload = line.rstrip("\n").encode("utf-8", errors="replace")
|
||||||
|
return f"{len(payload)}".encode("ascii") + _FRAME_SEP + payload
|
||||||
|
|
||||||
|
|
||||||
|
async def read_frame(reader: asyncio.StreamReader) -> Optional[bytes]:
|
||||||
|
"""Read one octet-counted frame. Returns None on clean EOF."""
|
||||||
|
# Read the ASCII length up to the first space. Bound the prefix so a
|
||||||
|
# malicious peer can't force us to buffer unbounded bytes before we know
|
||||||
|
# it's a valid frame.
|
||||||
|
prefix = b""
|
||||||
|
while True:
|
||||||
|
c = await reader.read(1)
|
||||||
|
if not c:
|
||||||
|
return None if not prefix else b""
|
||||||
|
if c == _FRAME_SEP:
|
||||||
|
break
|
||||||
|
if len(prefix) >= 10 or not c.isdigit():
|
||||||
|
# RFC 5425 caps the length prefix at ~10 digits (< 4 GiB payload).
|
||||||
|
raise ValueError(f"invalid octet-count prefix: {prefix!r}")
|
||||||
|
prefix += c
|
||||||
|
n = int(prefix)
|
||||||
|
buf = await reader.readexactly(n)
|
||||||
|
return buf
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------- main loop
|
||||||
|
|
||||||
|
|
||||||
|
async def _send_batch(
|
||||||
|
writer: asyncio.StreamWriter,
|
||||||
|
offset: int,
|
||||||
|
lines: list[tuple[int, str]],
|
||||||
|
store: _OffsetStore,
|
||||||
|
) -> int:
|
||||||
|
"""Write every line as a frame, drain, then persist the last offset."""
|
||||||
|
for _, line in lines:
|
||||||
|
writer.write(encode_frame(line))
|
||||||
|
await writer.drain()
|
||||||
|
last_offset = lines[-1][0]
|
||||||
|
store.set(last_offset)
|
||||||
|
return last_offset
|
||||||
|
|
||||||
|
|
||||||
|
async def run_forwarder(
|
||||||
|
cfg: ForwarderConfig,
|
||||||
|
*,
|
||||||
|
poll_interval: float = 0.5,
|
||||||
|
stop_event: Optional[asyncio.Event] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Main forwarder loop. Run as a dedicated task.
|
||||||
|
|
||||||
|
Stops when ``stop_event`` is set (used by tests and clean shutdown).
|
||||||
|
Exceptions trigger exponential backoff but are never fatal — the
|
||||||
|
forwarder is expected to outlive transient master/network failures.
|
||||||
|
"""
|
||||||
|
state_db = cfg.state_db or (cfg.agent_dir / "forwarder.db")
|
||||||
|
store = _OffsetStore(state_db)
|
||||||
|
offset = store.get()
|
||||||
|
backoff = _INITIAL_BACKOFF
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"forwarder start log=%s master=%s:%d offset=%d",
|
||||||
|
cfg.log_path, cfg.master_host, cfg.master_port, offset,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
while stop_event is None or not stop_event.is_set():
|
||||||
|
try:
|
||||||
|
ctx = build_worker_ssl_context(cfg.agent_dir)
|
||||||
|
reader, writer = await asyncio.open_connection(
|
||||||
|
cfg.master_host, cfg.master_port, ssl=ctx
|
||||||
|
)
|
||||||
|
log.info("forwarder connected master=%s:%d", cfg.master_host, cfg.master_port)
|
||||||
|
backoff = _INITIAL_BACKOFF
|
||||||
|
try:
|
||||||
|
offset = await _pump(cfg, store, writer, offset, poll_interval, stop_event)
|
||||||
|
finally:
|
||||||
|
writer.close()
|
||||||
|
try:
|
||||||
|
await writer.wait_closed()
|
||||||
|
except Exception: # nosec B110 — socket cleanup is best-effort
|
||||||
|
pass
|
||||||
|
# Keep reader alive until here to avoid "reader garbage
|
||||||
|
# collected" warnings on some Python builds.
|
||||||
|
del reader
|
||||||
|
except (OSError, ssl.SSLError, ConnectionError) as exc:
|
||||||
|
log.warning(
|
||||||
|
"forwarder disconnected: %s — retrying in %.1fs", exc, backoff
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(
|
||||||
|
_sleep_unless_stopped(backoff, stop_event), timeout=backoff + 1
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
pass
|
||||||
|
backoff = min(_MAX_BACKOFF, backoff * 2)
|
||||||
|
finally:
|
||||||
|
store.close()
|
||||||
|
log.info("forwarder stopped offset=%d", offset)
|
||||||
|
|
||||||
|
|
||||||
|
async def _pump(
|
||||||
|
cfg: ForwarderConfig,
|
||||||
|
store: _OffsetStore,
|
||||||
|
writer: asyncio.StreamWriter,
|
||||||
|
offset: int,
|
||||||
|
poll_interval: float,
|
||||||
|
stop_event: Optional[asyncio.Event],
|
||||||
|
) -> int:
|
||||||
|
"""Read new lines since ``offset`` and ship them until disconnect."""
|
||||||
|
while stop_event is None or not stop_event.is_set():
|
||||||
|
if not cfg.log_path.exists():
|
||||||
|
await _sleep_unless_stopped(poll_interval, stop_event)
|
||||||
|
continue
|
||||||
|
|
||||||
|
stat = cfg.log_path.stat()
|
||||||
|
if stat.st_size < offset:
|
||||||
|
# truncated/rotated — reset.
|
||||||
|
log.warning("forwarder log rotated — resetting offset=0")
|
||||||
|
offset = 0
|
||||||
|
store.set(0)
|
||||||
|
if stat.st_size - offset > cfg.max_lag_bytes:
|
||||||
|
# Catastrophic lag — skip ahead to cap local disk pressure.
|
||||||
|
skip_to = stat.st_size - cfg.max_lag_bytes
|
||||||
|
log.warning(
|
||||||
|
"forwarder lag %d > cap %d — dropping oldest %d bytes",
|
||||||
|
stat.st_size - offset, cfg.max_lag_bytes, skip_to - offset,
|
||||||
|
)
|
||||||
|
offset = skip_to
|
||||||
|
store.set(offset)
|
||||||
|
|
||||||
|
if stat.st_size == offset:
|
||||||
|
await _sleep_unless_stopped(poll_interval, stop_event)
|
||||||
|
continue
|
||||||
|
|
||||||
|
batch: list[tuple[int, str]] = []
|
||||||
|
with open(cfg.log_path, "r", encoding="utf-8", errors="replace") as f:
|
||||||
|
f.seek(offset)
|
||||||
|
while True:
|
||||||
|
line = f.readline()
|
||||||
|
if not line or not line.endswith("\n"):
|
||||||
|
break
|
||||||
|
offset_after = f.tell()
|
||||||
|
batch.append((offset_after, line.rstrip("\n")))
|
||||||
|
if len(batch) >= 500:
|
||||||
|
break
|
||||||
|
if batch:
|
||||||
|
offset = await _send_batch(writer, offset, batch, store)
|
||||||
|
return offset
|
||||||
|
|
||||||
|
|
||||||
|
async def _sleep_unless_stopped(
|
||||||
|
seconds: float, stop_event: Optional[asyncio.Event]
|
||||||
|
) -> None:
|
||||||
|
if stop_event is None:
|
||||||
|
await asyncio.sleep(seconds)
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(stop_event.wait(), timeout=seconds)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Re-exported for CLI convenience
|
||||||
|
DEFAULT_PORT = 6514
|
||||||
|
|
||||||
|
|
||||||
|
def default_master_host() -> Optional[str]:
|
||||||
|
return os.environ.get("DECNET_SWARM_MASTER_HOST")
|
||||||
194
decnet/swarm/log_listener.py
Normal file
194
decnet/swarm/log_listener.py
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
"""Master-side syslog-over-TLS listener (RFC 5425).
|
||||||
|
|
||||||
|
Accepts mTLS-authenticated worker connections on TCP 6514, reads
|
||||||
|
octet-counted frames, parses each as an RFC 5424 line, and appends it to
|
||||||
|
the master's local ingest log files. The existing log_ingestion_worker
|
||||||
|
tails those files and inserts records into the master repo — worker
|
||||||
|
provenance is embedded in the parsed record's ``source_worker`` field.
|
||||||
|
|
||||||
|
Design:
|
||||||
|
* TLS is mandatory. No plaintext fallback. A peer without a CA-signed
|
||||||
|
cert is rejected at the TLS handshake; nothing gets past the kernel.
|
||||||
|
* The listener never trusts the syslog HOSTNAME field for provenance —
|
||||||
|
that's attacker-supplied from the decky. The authoritative source is
|
||||||
|
the peer cert's CN, which the CA controlled at enrollment.
|
||||||
|
* Dropped connections are fine — the worker's forwarder holds the
|
||||||
|
offset and resumes from the same byte on reconnect.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
import ssl
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from cryptography import x509
|
||||||
|
from cryptography.hazmat.primitives import serialization
|
||||||
|
from cryptography.x509.oid import NameOID
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm import pki
|
||||||
|
from decnet.swarm.log_forwarder import read_frame
|
||||||
|
|
||||||
|
log = get_logger("swarm.listener")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ListenerConfig:
|
||||||
|
log_path: pathlib.Path # master's RFC 5424 .log (forensic sink)
|
||||||
|
json_path: pathlib.Path # master's .json (ingester tails this)
|
||||||
|
bind_host: str = "0.0.0.0" # nosec B104 — listener must bind publicly
|
||||||
|
bind_port: int = 6514
|
||||||
|
ca_dir: pathlib.Path = pki.DEFAULT_CA_DIR
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------- TLS context
|
||||||
|
|
||||||
|
|
||||||
|
def build_listener_ssl_context(ca_dir: pathlib.Path) -> ssl.SSLContext:
|
||||||
|
"""Server-side mTLS context: master presents its master cert; clients
|
||||||
|
must present a cert signed by the DECNET CA."""
|
||||||
|
master_dir = ca_dir / "master"
|
||||||
|
ca_cert = master_dir / "ca.crt"
|
||||||
|
cert = master_dir / "worker.crt" # master re-uses the 'worker' bundle layout
|
||||||
|
key = master_dir / "worker.key"
|
||||||
|
for p in (ca_cert, cert, key):
|
||||||
|
if not p.exists():
|
||||||
|
raise RuntimeError(
|
||||||
|
f"master identity missing at {master_dir} — call ensure_master_identity first"
|
||||||
|
)
|
||||||
|
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
|
ctx.load_cert_chain(certfile=str(cert), keyfile=str(key))
|
||||||
|
ctx.load_verify_locations(cafile=str(ca_cert))
|
||||||
|
ctx.verify_mode = ssl.CERT_REQUIRED
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- helpers
|
||||||
|
|
||||||
|
|
||||||
|
def peer_cn(ssl_object: Optional[ssl.SSLObject]) -> str:
|
||||||
|
"""Extract the CN from the TLS peer certificate (worker provenance).
|
||||||
|
|
||||||
|
Falls back to ``"unknown"`` on any parse error — we refuse to crash on
|
||||||
|
malformed cert DNs and instead tag the message for later inspection.
|
||||||
|
"""
|
||||||
|
if ssl_object is None:
|
||||||
|
return "unknown"
|
||||||
|
der = ssl_object.getpeercert(binary_form=True)
|
||||||
|
if der is None:
|
||||||
|
return "unknown"
|
||||||
|
try:
|
||||||
|
cert = x509.load_der_x509_certificate(der)
|
||||||
|
attrs = cert.subject.get_attributes_for_oid(NameOID.COMMON_NAME)
|
||||||
|
return attrs[0].value if attrs else "unknown"
|
||||||
|
except Exception: # nosec B110 — provenance is best-effort
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def fingerprint_from_ssl(ssl_object: Optional[ssl.SSLObject]) -> Optional[str]:
|
||||||
|
if ssl_object is None:
|
||||||
|
return None
|
||||||
|
der = ssl_object.getpeercert(binary_form=True)
|
||||||
|
if der is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
cert = x509.load_der_x509_certificate(der)
|
||||||
|
return pki.fingerprint(cert.public_bytes(serialization.Encoding.PEM))
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------- per-connection handler
|
||||||
|
|
||||||
|
|
||||||
|
async def _handle_connection(
|
||||||
|
reader: asyncio.StreamReader,
|
||||||
|
writer: asyncio.StreamWriter,
|
||||||
|
cfg: ListenerConfig,
|
||||||
|
) -> None:
|
||||||
|
ssl_obj = writer.get_extra_info("ssl_object")
|
||||||
|
cn = peer_cn(ssl_obj)
|
||||||
|
peer = writer.get_extra_info("peername")
|
||||||
|
log.info("listener accepted worker=%s peer=%s", cn, peer)
|
||||||
|
|
||||||
|
# Lazy import to avoid a circular dep if the collector pulls in logger setup.
|
||||||
|
from decnet.collector.worker import parse_rfc5424
|
||||||
|
|
||||||
|
cfg.log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
cfg.json_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(cfg.log_path, "a", encoding="utf-8") as lf, open(
|
||||||
|
cfg.json_path, "a", encoding="utf-8"
|
||||||
|
) as jf:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
frame = await read_frame(reader)
|
||||||
|
except asyncio.IncompleteReadError:
|
||||||
|
break
|
||||||
|
except ValueError as exc:
|
||||||
|
log.warning("listener bad frame worker=%s err=%s", cn, exc)
|
||||||
|
break
|
||||||
|
if frame is None:
|
||||||
|
break
|
||||||
|
if not frame:
|
||||||
|
continue
|
||||||
|
line = frame.decode("utf-8", errors="replace")
|
||||||
|
lf.write(line + "\n")
|
||||||
|
lf.flush()
|
||||||
|
parsed = parse_rfc5424(line)
|
||||||
|
if parsed is not None:
|
||||||
|
parsed["source_worker"] = cn
|
||||||
|
jf.write(json.dumps(parsed) + "\n")
|
||||||
|
jf.flush()
|
||||||
|
else:
|
||||||
|
log.debug("listener malformed RFC5424 worker=%s snippet=%r", cn, line[:80])
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("listener connection error worker=%s err=%s", cn, exc)
|
||||||
|
finally:
|
||||||
|
writer.close()
|
||||||
|
try:
|
||||||
|
await writer.wait_closed()
|
||||||
|
except Exception: # nosec B110 — socket cleanup is best-effort
|
||||||
|
pass
|
||||||
|
log.info("listener closed worker=%s", cn)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------- server
|
||||||
|
|
||||||
|
|
||||||
|
async def run_listener(
|
||||||
|
cfg: ListenerConfig,
|
||||||
|
*,
|
||||||
|
stop_event: Optional[asyncio.Event] = None,
|
||||||
|
) -> None:
|
||||||
|
ctx = build_listener_ssl_context(cfg.ca_dir)
|
||||||
|
|
||||||
|
async def _client_cb(
|
||||||
|
reader: asyncio.StreamReader, writer: asyncio.StreamWriter
|
||||||
|
) -> None:
|
||||||
|
await _handle_connection(reader, writer, cfg)
|
||||||
|
|
||||||
|
server = await asyncio.start_server(
|
||||||
|
_client_cb, host=cfg.bind_host, port=cfg.bind_port, ssl=ctx
|
||||||
|
)
|
||||||
|
sockets = server.sockets or ()
|
||||||
|
log.info(
|
||||||
|
"listener bound host=%s port=%d sockets=%d",
|
||||||
|
cfg.bind_host, cfg.bind_port, len(sockets),
|
||||||
|
)
|
||||||
|
async with server:
|
||||||
|
if stop_event is None:
|
||||||
|
await server.serve_forever()
|
||||||
|
else:
|
||||||
|
serve_task = asyncio.create_task(server.serve_forever())
|
||||||
|
await stop_event.wait()
|
||||||
|
server.close()
|
||||||
|
serve_task.cancel()
|
||||||
|
try:
|
||||||
|
await serve_task
|
||||||
|
except (asyncio.CancelledError, Exception): # nosec B110
|
||||||
|
pass
|
||||||
323
decnet/swarm/pki.py
Normal file
323
decnet/swarm/pki.py
Normal file
@@ -0,0 +1,323 @@
|
|||||||
|
"""DECNET SWARM PKI — self-managed X.509 CA for master↔worker mTLS.
|
||||||
|
|
||||||
|
Used by:
|
||||||
|
* the SWARM controller (master) to issue per-worker server+client certs at
|
||||||
|
enrollment time,
|
||||||
|
* the agent (worker) to present its mTLS identity for both the control-plane
|
||||||
|
HTTPS endpoint and the syslog-over-TLS (RFC 5425) log forwarder,
|
||||||
|
* the master-side syslog-TLS listener to authenticate inbound workers.
|
||||||
|
|
||||||
|
Storage layout (master):
|
||||||
|
|
||||||
|
~/.decnet/ca/
|
||||||
|
ca.key (PEM, 0600 — the CA private key)
|
||||||
|
ca.crt (PEM — self-signed root)
|
||||||
|
workers/<worker-name>/
|
||||||
|
client.crt (issued, signed by CA)
|
||||||
|
|
||||||
|
Worker layout (delivered by /enroll response):
|
||||||
|
|
||||||
|
~/.decnet/agent/
|
||||||
|
ca.crt (master's CA — trust anchor)
|
||||||
|
worker.key (worker's own private key)
|
||||||
|
worker.crt (signed by master CA — used for both TLS
|
||||||
|
server auth *and* syslog client auth)
|
||||||
|
|
||||||
|
The CA is a hard dependency only in swarm mode; unihost installs never
|
||||||
|
touch this module.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import datetime as _dt
|
||||||
|
import hashlib
|
||||||
|
import ipaddress
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from cryptography import x509
|
||||||
|
from cryptography.hazmat.primitives import hashes, serialization
|
||||||
|
from cryptography.hazmat.primitives.asymmetric import rsa
|
||||||
|
from cryptography.x509.oid import NameOID
|
||||||
|
|
||||||
|
DEFAULT_CA_DIR = pathlib.Path(os.path.expanduser("~/.decnet/ca"))
|
||||||
|
DEFAULT_AGENT_DIR = pathlib.Path(os.path.expanduser("~/.decnet/agent"))
|
||||||
|
DEFAULT_SWARMCTL_DIR = pathlib.Path(os.path.expanduser("~/.decnet/swarmctl"))
|
||||||
|
|
||||||
|
CA_KEY_BITS = 4096
|
||||||
|
WORKER_KEY_BITS = 2048
|
||||||
|
CA_VALIDITY_DAYS = 3650 # 10 years — internal CA
|
||||||
|
WORKER_VALIDITY_DAYS = 825 # max permitted by modern TLS clients
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class CABundle:
|
||||||
|
"""The master's CA identity (key is secret, cert is published)."""
|
||||||
|
|
||||||
|
key_pem: bytes
|
||||||
|
cert_pem: bytes
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class IssuedCert:
|
||||||
|
"""A signed worker certificate + its private key, handed to the worker
|
||||||
|
exactly once during enrollment.
|
||||||
|
"""
|
||||||
|
|
||||||
|
key_pem: bytes
|
||||||
|
cert_pem: bytes
|
||||||
|
ca_cert_pem: bytes
|
||||||
|
fingerprint_sha256: str # hex, lowercase
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------- CA ops
|
||||||
|
|
||||||
|
|
||||||
|
def _pem_private(key: rsa.RSAPrivateKey) -> bytes:
|
||||||
|
return key.private_bytes(
|
||||||
|
encoding=serialization.Encoding.PEM,
|
||||||
|
format=serialization.PrivateFormat.PKCS8,
|
||||||
|
encryption_algorithm=serialization.NoEncryption(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _pem_cert(cert: x509.Certificate) -> bytes:
|
||||||
|
return cert.public_bytes(serialization.Encoding.PEM)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_ca(common_name: str = "DECNET SWARM Root CA") -> CABundle:
|
||||||
|
"""Generate a fresh self-signed CA. Does not touch disk."""
|
||||||
|
key = rsa.generate_private_key(public_exponent=65537, key_size=CA_KEY_BITS)
|
||||||
|
subject = issuer = x509.Name(
|
||||||
|
[
|
||||||
|
x509.NameAttribute(NameOID.COMMON_NAME, common_name),
|
||||||
|
x509.NameAttribute(NameOID.ORGANIZATION_NAME, "DECNET"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
now = _dt.datetime.now(_dt.timezone.utc)
|
||||||
|
cert = (
|
||||||
|
x509.CertificateBuilder()
|
||||||
|
.subject_name(subject)
|
||||||
|
.issuer_name(issuer)
|
||||||
|
.public_key(key.public_key())
|
||||||
|
.serial_number(x509.random_serial_number())
|
||||||
|
.not_valid_before(now - _dt.timedelta(minutes=5))
|
||||||
|
.not_valid_after(now + _dt.timedelta(days=CA_VALIDITY_DAYS))
|
||||||
|
.add_extension(x509.BasicConstraints(ca=True, path_length=0), critical=True)
|
||||||
|
.add_extension(
|
||||||
|
x509.KeyUsage(
|
||||||
|
digital_signature=True,
|
||||||
|
content_commitment=False,
|
||||||
|
key_encipherment=False,
|
||||||
|
data_encipherment=False,
|
||||||
|
key_agreement=False,
|
||||||
|
key_cert_sign=True,
|
||||||
|
crl_sign=True,
|
||||||
|
encipher_only=False,
|
||||||
|
decipher_only=False,
|
||||||
|
),
|
||||||
|
critical=True,
|
||||||
|
)
|
||||||
|
.sign(private_key=key, algorithm=hashes.SHA256())
|
||||||
|
)
|
||||||
|
return CABundle(key_pem=_pem_private(key), cert_pem=_pem_cert(cert))
|
||||||
|
|
||||||
|
|
||||||
|
def save_ca(bundle: CABundle, ca_dir: pathlib.Path = DEFAULT_CA_DIR) -> None:
|
||||||
|
ca_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
# 0700 on the dir, 0600 on the key — defence against casual reads.
|
||||||
|
os.chmod(ca_dir, 0o700)
|
||||||
|
key_path = ca_dir / "ca.key"
|
||||||
|
cert_path = ca_dir / "ca.crt"
|
||||||
|
key_path.write_bytes(bundle.key_pem)
|
||||||
|
os.chmod(key_path, 0o600)
|
||||||
|
cert_path.write_bytes(bundle.cert_pem)
|
||||||
|
|
||||||
|
|
||||||
|
def load_ca(ca_dir: pathlib.Path = DEFAULT_CA_DIR) -> CABundle:
|
||||||
|
key_pem = (ca_dir / "ca.key").read_bytes()
|
||||||
|
cert_pem = (ca_dir / "ca.crt").read_bytes()
|
||||||
|
return CABundle(key_pem=key_pem, cert_pem=cert_pem)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_ca(ca_dir: pathlib.Path = DEFAULT_CA_DIR) -> CABundle:
|
||||||
|
"""Load the CA if present, otherwise generate and persist a new one."""
|
||||||
|
if (ca_dir / "ca.key").exists() and (ca_dir / "ca.crt").exists():
|
||||||
|
return load_ca(ca_dir)
|
||||||
|
bundle = generate_ca()
|
||||||
|
save_ca(bundle, ca_dir)
|
||||||
|
return bundle
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------- cert issuance
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_san(value: str) -> x509.GeneralName:
|
||||||
|
"""Parse a SAN entry as IP if possible, otherwise DNS."""
|
||||||
|
try:
|
||||||
|
return x509.IPAddress(ipaddress.ip_address(value))
|
||||||
|
except ValueError:
|
||||||
|
return x509.DNSName(value)
|
||||||
|
|
||||||
|
|
||||||
|
def issue_worker_cert(
|
||||||
|
ca: CABundle,
|
||||||
|
worker_name: str,
|
||||||
|
sans: list[str],
|
||||||
|
validity_days: int = WORKER_VALIDITY_DAYS,
|
||||||
|
) -> IssuedCert:
|
||||||
|
"""Sign a freshly-generated worker keypair.
|
||||||
|
|
||||||
|
The cert is usable as BOTH a TLS server (agent's HTTPS endpoint) and a
|
||||||
|
TLS client (syslog-over-TLS upstream to the master) — extended key usage
|
||||||
|
covers both. ``sans`` should include every address/name the master or
|
||||||
|
workers will use to reach this worker — typically the worker's IP plus
|
||||||
|
its hostname.
|
||||||
|
"""
|
||||||
|
ca_key = serialization.load_pem_private_key(ca.key_pem, password=None)
|
||||||
|
ca_cert = x509.load_pem_x509_certificate(ca.cert_pem)
|
||||||
|
|
||||||
|
worker_key = rsa.generate_private_key(public_exponent=65537, key_size=WORKER_KEY_BITS)
|
||||||
|
subject = x509.Name(
|
||||||
|
[
|
||||||
|
x509.NameAttribute(NameOID.COMMON_NAME, worker_name),
|
||||||
|
x509.NameAttribute(NameOID.ORGANIZATION_NAME, "DECNET"),
|
||||||
|
x509.NameAttribute(NameOID.ORGANIZATIONAL_UNIT_NAME, "swarm-worker"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
now = _dt.datetime.now(_dt.timezone.utc)
|
||||||
|
san_entries: list[x509.GeneralName] = [_parse_san(s) for s in sans] if sans else []
|
||||||
|
# Always include the worker-name as a DNS SAN so cert pinning by CN-as-DNS
|
||||||
|
# works even when the operator forgets to pass an explicit SAN list.
|
||||||
|
if not any(
|
||||||
|
isinstance(e, x509.DNSName) and e.value == worker_name for e in san_entries
|
||||||
|
):
|
||||||
|
san_entries.append(x509.DNSName(worker_name))
|
||||||
|
|
||||||
|
builder = (
|
||||||
|
x509.CertificateBuilder()
|
||||||
|
.subject_name(subject)
|
||||||
|
.issuer_name(ca_cert.subject)
|
||||||
|
.public_key(worker_key.public_key())
|
||||||
|
.serial_number(x509.random_serial_number())
|
||||||
|
.not_valid_before(now - _dt.timedelta(minutes=5))
|
||||||
|
.not_valid_after(now + _dt.timedelta(days=validity_days))
|
||||||
|
.add_extension(x509.BasicConstraints(ca=False, path_length=None), critical=True)
|
||||||
|
.add_extension(
|
||||||
|
x509.KeyUsage(
|
||||||
|
digital_signature=True,
|
||||||
|
content_commitment=False,
|
||||||
|
key_encipherment=True,
|
||||||
|
data_encipherment=False,
|
||||||
|
key_agreement=False,
|
||||||
|
key_cert_sign=False,
|
||||||
|
crl_sign=False,
|
||||||
|
encipher_only=False,
|
||||||
|
decipher_only=False,
|
||||||
|
),
|
||||||
|
critical=True,
|
||||||
|
)
|
||||||
|
.add_extension(
|
||||||
|
x509.ExtendedKeyUsage(
|
||||||
|
[
|
||||||
|
x509.ObjectIdentifier("1.3.6.1.5.5.7.3.1"), # serverAuth
|
||||||
|
x509.ObjectIdentifier("1.3.6.1.5.5.7.3.2"), # clientAuth
|
||||||
|
]
|
||||||
|
),
|
||||||
|
critical=True,
|
||||||
|
)
|
||||||
|
.add_extension(x509.SubjectAlternativeName(san_entries), critical=False)
|
||||||
|
)
|
||||||
|
cert = builder.sign(private_key=ca_key, algorithm=hashes.SHA256())
|
||||||
|
cert_pem = _pem_cert(cert)
|
||||||
|
fp = hashlib.sha256(
|
||||||
|
cert.public_bytes(serialization.Encoding.DER)
|
||||||
|
).hexdigest()
|
||||||
|
return IssuedCert(
|
||||||
|
key_pem=_pem_private(worker_key),
|
||||||
|
cert_pem=cert_pem,
|
||||||
|
ca_cert_pem=ca.cert_pem,
|
||||||
|
fingerprint_sha256=fp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def write_worker_bundle(
|
||||||
|
issued: IssuedCert,
|
||||||
|
agent_dir: pathlib.Path = DEFAULT_AGENT_DIR,
|
||||||
|
) -> None:
|
||||||
|
"""Persist an issued bundle into the worker's agent directory."""
|
||||||
|
agent_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
os.chmod(agent_dir, 0o700)
|
||||||
|
(agent_dir / "ca.crt").write_bytes(issued.ca_cert_pem)
|
||||||
|
(agent_dir / "worker.crt").write_bytes(issued.cert_pem)
|
||||||
|
key_path = agent_dir / "worker.key"
|
||||||
|
key_path.write_bytes(issued.key_pem)
|
||||||
|
os.chmod(key_path, 0o600)
|
||||||
|
|
||||||
|
|
||||||
|
def load_worker_bundle(
|
||||||
|
agent_dir: pathlib.Path = DEFAULT_AGENT_DIR,
|
||||||
|
) -> Optional[IssuedCert]:
|
||||||
|
"""Return the worker's bundle if enrolled; ``None`` otherwise."""
|
||||||
|
ca = agent_dir / "ca.crt"
|
||||||
|
crt = agent_dir / "worker.crt"
|
||||||
|
key = agent_dir / "worker.key"
|
||||||
|
if not (ca.exists() and crt.exists() and key.exists()):
|
||||||
|
return None
|
||||||
|
cert_pem = crt.read_bytes()
|
||||||
|
cert = x509.load_pem_x509_certificate(cert_pem)
|
||||||
|
fp = hashlib.sha256(
|
||||||
|
cert.public_bytes(serialization.Encoding.DER)
|
||||||
|
).hexdigest()
|
||||||
|
return IssuedCert(
|
||||||
|
key_pem=key.read_bytes(),
|
||||||
|
cert_pem=cert_pem,
|
||||||
|
ca_cert_pem=ca.read_bytes(),
|
||||||
|
fingerprint_sha256=fp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_swarmctl_cert(
|
||||||
|
bind_host: str,
|
||||||
|
ca_dir: pathlib.Path = DEFAULT_CA_DIR,
|
||||||
|
swarmctl_dir: pathlib.Path = DEFAULT_SWARMCTL_DIR,
|
||||||
|
extra_sans: Optional[list[str]] = None,
|
||||||
|
) -> tuple[pathlib.Path, pathlib.Path, pathlib.Path]:
|
||||||
|
"""Return (cert_path, key_path, ca_path), auto-issuing if missing.
|
||||||
|
|
||||||
|
Uses the existing DECNET CA (ensuring it exists first) so workers
|
||||||
|
whose bundle already includes ``ca.crt`` can verify the swarmctl
|
||||||
|
endpoint without additional trust configuration. Self-signed is
|
||||||
|
intentionally not the default — a cert signed by the same CA the
|
||||||
|
workers already trust is the friction-free path.
|
||||||
|
|
||||||
|
Callers that want BYOC should skip this and pass their own
|
||||||
|
cert/key paths directly to uvicorn.
|
||||||
|
"""
|
||||||
|
swarmctl_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
os.chmod(swarmctl_dir, 0o700)
|
||||||
|
cert_path = swarmctl_dir / "server.crt"
|
||||||
|
key_path = swarmctl_dir / "server.key"
|
||||||
|
ca_cert_path = ca_dir / "ca.crt"
|
||||||
|
|
||||||
|
if cert_path.exists() and key_path.exists() and ca_cert_path.exists():
|
||||||
|
return cert_path, key_path, ca_cert_path
|
||||||
|
|
||||||
|
ca = ensure_ca(ca_dir)
|
||||||
|
sans = list({bind_host, "127.0.0.1", "localhost", *(extra_sans or [])})
|
||||||
|
issued = issue_worker_cert(ca, "swarmctl", sans)
|
||||||
|
cert_path.write_bytes(issued.cert_pem)
|
||||||
|
key_path.write_bytes(issued.key_pem)
|
||||||
|
os.chmod(key_path, 0o600)
|
||||||
|
# ensure_ca already wrote ca.crt under ca_dir, but save_ca is only
|
||||||
|
# called on generate — re-mirror it here to guarantee the path exists.
|
||||||
|
if not ca_cert_path.exists():
|
||||||
|
ca_cert_path.write_bytes(ca.cert_pem)
|
||||||
|
return cert_path, key_path, ca_cert_path
|
||||||
|
|
||||||
|
|
||||||
|
def fingerprint(cert_pem: bytes) -> str:
|
||||||
|
"""SHA-256 hex fingerprint of a cert (DER-encoded)."""
|
||||||
|
cert = x509.load_pem_x509_certificate(cert_pem)
|
||||||
|
return hashlib.sha256(cert.public_bytes(serialization.Encoding.DER)).hexdigest()
|
||||||
97
decnet/swarm/tar_tree.py
Normal file
97
decnet/swarm/tar_tree.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
"""Build a gzipped tarball of the master's working tree for pushing to workers.
|
||||||
|
|
||||||
|
Always excludes the obvious large / secret / churn paths: ``.venv/``,
|
||||||
|
``__pycache__/``, ``.git/``, ``wiki-checkout/``, ``*.db*``, ``*.log``. The
|
||||||
|
caller can supply additional exclude globs.
|
||||||
|
|
||||||
|
Deliberately does NOT invoke git — the tree is what the operator has on
|
||||||
|
disk (staged + unstaged + untracked). That's the whole point; the scp
|
||||||
|
workflow we're replacing also shipped the live tree.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import fnmatch
|
||||||
|
import io
|
||||||
|
import pathlib
|
||||||
|
import tarfile
|
||||||
|
from typing import Iterable, Optional
|
||||||
|
|
||||||
|
DEFAULT_EXCLUDES = (
|
||||||
|
".venv", ".venv/*",
|
||||||
|
"**/.venv/*",
|
||||||
|
"__pycache__", "**/__pycache__", "**/__pycache__/*",
|
||||||
|
".git", ".git/*",
|
||||||
|
"wiki-checkout", "wiki-checkout/*",
|
||||||
|
"*.pyc", "*.pyo",
|
||||||
|
"*.db", "*.db-wal", "*.db-shm",
|
||||||
|
"*.log",
|
||||||
|
".pytest_cache", ".pytest_cache/*",
|
||||||
|
".mypy_cache", ".mypy_cache/*",
|
||||||
|
".tox", ".tox/*",
|
||||||
|
"*.egg-info", "*.egg-info/*",
|
||||||
|
"decnet-state.json",
|
||||||
|
"master.log", "master.json",
|
||||||
|
"decnet.db*",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_excluded(rel: str, patterns: Iterable[str]) -> bool:
|
||||||
|
parts = pathlib.PurePosixPath(rel).parts
|
||||||
|
for pat in patterns:
|
||||||
|
if fnmatch.fnmatch(rel, pat):
|
||||||
|
return True
|
||||||
|
# Also match the pattern against every leading subpath — this is
|
||||||
|
# what catches nested `.venv/...` without forcing callers to spell
|
||||||
|
# out every `**/` glob.
|
||||||
|
for i in range(1, len(parts) + 1):
|
||||||
|
if fnmatch.fnmatch("/".join(parts[:i]), pat):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def tar_working_tree(
|
||||||
|
root: pathlib.Path,
|
||||||
|
extra_excludes: Optional[Iterable[str]] = None,
|
||||||
|
) -> bytes:
|
||||||
|
"""Return the gzipped tarball bytes of ``root``.
|
||||||
|
|
||||||
|
Entries are added with paths relative to ``root`` (no leading ``/``,
|
||||||
|
no ``..``). The updater rejects unsafe paths on the receiving side.
|
||||||
|
"""
|
||||||
|
patterns = list(DEFAULT_EXCLUDES) + list(extra_excludes or ())
|
||||||
|
buf = io.BytesIO()
|
||||||
|
|
||||||
|
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
|
||||||
|
for path in sorted(root.rglob("*")):
|
||||||
|
rel = path.relative_to(root).as_posix()
|
||||||
|
if _is_excluded(rel, patterns):
|
||||||
|
continue
|
||||||
|
if path.is_symlink():
|
||||||
|
# Symlinks inside a repo tree are rare and often break
|
||||||
|
# portability; skip them rather than ship dangling links.
|
||||||
|
continue
|
||||||
|
if path.is_dir():
|
||||||
|
continue
|
||||||
|
tar.add(path, arcname=rel, recursive=False)
|
||||||
|
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
def detect_git_sha(root: pathlib.Path) -> str:
|
||||||
|
"""Best-effort ``HEAD`` sha. Returns ``""`` if not a git repo."""
|
||||||
|
head = root / ".git" / "HEAD"
|
||||||
|
if not head.is_file():
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
ref = head.read_text().strip()
|
||||||
|
except OSError:
|
||||||
|
return ""
|
||||||
|
if ref.startswith("ref: "):
|
||||||
|
ref_path = root / ".git" / ref[5:]
|
||||||
|
if ref_path.is_file():
|
||||||
|
try:
|
||||||
|
return ref_path.read_text().strip()
|
||||||
|
except OSError:
|
||||||
|
return ""
|
||||||
|
return ""
|
||||||
|
return ref
|
||||||
124
decnet/swarm/updater_client.py
Normal file
124
decnet/swarm/updater_client.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
"""Master-side HTTP client for the worker's self-updater daemon.
|
||||||
|
|
||||||
|
Sibling of ``AgentClient``: same mTLS identity (same DECNET CA, same
|
||||||
|
master client cert) but targets the updater's port (default 8766) and
|
||||||
|
speaks the multipart upload protocol the updater's ``/update`` endpoint
|
||||||
|
expects.
|
||||||
|
|
||||||
|
Kept as its own module — not a subclass of ``AgentClient`` — because the
|
||||||
|
timeouts and failure semantics are genuinely different: pip install +
|
||||||
|
agent probe can take a minute on a slow VM, and ``/update-self`` drops
|
||||||
|
the connection on purpose (the updater re-execs itself mid-response).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import ssl
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm.client import MasterIdentity, ensure_master_identity
|
||||||
|
|
||||||
|
log = get_logger("swarm.updater_client")
|
||||||
|
|
||||||
|
_TIMEOUT_UPDATE = httpx.Timeout(connect=10.0, read=180.0, write=120.0, pool=5.0)
|
||||||
|
_TIMEOUT_CONTROL = httpx.Timeout(connect=5.0, read=30.0, write=10.0, pool=5.0)
|
||||||
|
|
||||||
|
|
||||||
|
class UpdaterClient:
|
||||||
|
"""Async client targeting a worker's ``decnet updater`` daemon."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
host: dict[str, Any] | None = None,
|
||||||
|
*,
|
||||||
|
address: Optional[str] = None,
|
||||||
|
updater_port: int = 8766,
|
||||||
|
identity: Optional[MasterIdentity] = None,
|
||||||
|
):
|
||||||
|
if host is not None:
|
||||||
|
self._address = host["address"]
|
||||||
|
self._host_name = host.get("name")
|
||||||
|
else:
|
||||||
|
if address is None:
|
||||||
|
raise ValueError("UpdaterClient requires host dict or address")
|
||||||
|
self._address = address
|
||||||
|
self._host_name = None
|
||||||
|
self._port = updater_port
|
||||||
|
self._identity = identity or ensure_master_identity()
|
||||||
|
self._client: Optional[httpx.AsyncClient] = None
|
||||||
|
|
||||||
|
def _build_client(self, timeout: httpx.Timeout) -> httpx.AsyncClient:
|
||||||
|
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||||
|
ctx.load_cert_chain(
|
||||||
|
str(self._identity.cert_path), str(self._identity.key_path),
|
||||||
|
)
|
||||||
|
ctx.load_verify_locations(cafile=str(self._identity.ca_cert_path))
|
||||||
|
ctx.verify_mode = ssl.CERT_REQUIRED
|
||||||
|
ctx.check_hostname = False
|
||||||
|
return httpx.AsyncClient(
|
||||||
|
base_url=f"https://{self._address}:{self._port}",
|
||||||
|
verify=ctx,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def __aenter__(self) -> "UpdaterClient":
|
||||||
|
self._client = self._build_client(_TIMEOUT_CONTROL)
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, *exc: Any) -> None:
|
||||||
|
if self._client:
|
||||||
|
await self._client.aclose()
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
def _require(self) -> httpx.AsyncClient:
|
||||||
|
if self._client is None:
|
||||||
|
raise RuntimeError("UpdaterClient used outside `async with` block")
|
||||||
|
return self._client
|
||||||
|
|
||||||
|
# --------------------------------------------------------------- RPCs
|
||||||
|
|
||||||
|
async def health(self) -> dict[str, Any]:
|
||||||
|
r = await self._require().get("/health")
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
async def releases(self) -> dict[str, Any]:
|
||||||
|
r = await self._require().get("/releases")
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
async def update(self, tarball: bytes, sha: str = "") -> httpx.Response:
|
||||||
|
"""POST /update. Returns the Response so the caller can distinguish
|
||||||
|
200 / 409 / 500 — each means something different.
|
||||||
|
"""
|
||||||
|
self._require().timeout = _TIMEOUT_UPDATE
|
||||||
|
try:
|
||||||
|
r = await self._require().post(
|
||||||
|
"/update",
|
||||||
|
files={"tarball": ("tree.tgz", tarball, "application/gzip")},
|
||||||
|
data={"sha": sha},
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
self._require().timeout = _TIMEOUT_CONTROL
|
||||||
|
return r
|
||||||
|
|
||||||
|
async def update_self(self, tarball: bytes, sha: str = "") -> httpx.Response:
|
||||||
|
"""POST /update-self. The updater re-execs itself, so the connection
|
||||||
|
usually drops mid-response; that's not an error. Callers should then
|
||||||
|
poll /health until the new SHA appears.
|
||||||
|
"""
|
||||||
|
self._require().timeout = _TIMEOUT_UPDATE
|
||||||
|
try:
|
||||||
|
r = await self._require().post(
|
||||||
|
"/update-self",
|
||||||
|
files={"tarball": ("tree.tgz", tarball, "application/gzip")},
|
||||||
|
data={"sha": sha, "confirm_self": "true"},
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
self._require().timeout = _TIMEOUT_CONTROL
|
||||||
|
return r
|
||||||
|
|
||||||
|
async def rollback(self) -> httpx.Response:
|
||||||
|
return await self._require().post("/rollback")
|
||||||
@@ -11,16 +11,16 @@ RUN find /opt /usr /etc /home -name "*.xml" -exec sed -i 's/port="5020"/port="50
|
|||||||
RUN (apt-get update && apt-get install -y --no-install-recommends libcap2-bin 2>/dev/null) || (apk add --no-cache libcap 2>/dev/null) || true
|
RUN (apt-get update && apt-get install -y --no-install-recommends libcap2-bin 2>/dev/null) || (apk add --no-cache libcap 2>/dev/null) || true
|
||||||
RUN find /home/conpot/.local/bin /usr /opt -type f -name 'python*' -exec setcap 'cap_net_bind_service+eip' {} \; 2>/dev/null || true
|
RUN find /home/conpot/.local/bin /usr /opt -type f -name 'python*' -exec setcap 'cap_net_bind_service+eip' {} \; 2>/dev/null || true
|
||||||
|
|
||||||
# Bridge conpot's own logger into DECNET's RFC 5424 syslog pipeline.
|
# Bridge conpot's own logger into syslog-relay's RFC 5424 syslog pipeline.
|
||||||
# entrypoint.py is self-contained (inlines the formatter) because the
|
# entrypoint.py is self-contained (inlines the formatter) because the
|
||||||
# conpot base image runs Python 3.6, which cannot import the shared
|
# conpot base image runs Python 3.6, which cannot import the shared
|
||||||
# decnet_logging.py (that file uses 3.9+ / 3.10+ type syntax).
|
# syslog_bridge.py (that file uses 3.9+ / 3.10+ type syntax).
|
||||||
COPY entrypoint.py /home/conpot/entrypoint.py
|
COPY entrypoint.py /home/conpot/entrypoint.py
|
||||||
RUN chown conpot:conpot /home/conpot/entrypoint.py \
|
RUN chown conpot:conpot /home/conpot/entrypoint.py \
|
||||||
&& chmod +x /home/conpot/entrypoint.py
|
&& chmod +x /home/conpot/entrypoint.py
|
||||||
|
|
||||||
# The upstream image already runs as non-root 'conpot'.
|
# The upstream image already runs as non-root 'conpot'.
|
||||||
# We do NOT switch to a 'decnet' user — doing so breaks pkg_resources
|
# We do NOT switch to a 'logrelay' user — doing so breaks pkg_resources
|
||||||
# because conpot's eggs live under /home/conpot/.local and are only on
|
# because conpot's eggs live under /home/conpot/.local and are only on
|
||||||
# the Python path for that user.
|
# the Python path for that user.
|
||||||
USER conpot
|
USER conpot
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
Entrypoint wrapper for the Conpot ICS/SCADA honeypot.
|
Entrypoint wrapper for the Conpot ICS/SCADA honeypot.
|
||||||
|
|
||||||
Launches conpot as a child process and bridges its log output into the
|
Launches conpot as a child process and bridges its log output into the
|
||||||
DECNET structured syslog pipeline. Each line from conpot stdout/stderr
|
syslog-relay structured syslog pipeline. Each line from conpot stdout/stderr
|
||||||
is classified and emitted as an RFC 5424 syslog line so the host-side
|
is classified and emitted as an RFC 5424 syslog line so the host-side
|
||||||
collector can ingest it alongside every other service.
|
collector can ingest it alongside every other service.
|
||||||
|
|
||||||
@@ -21,7 +21,7 @@ from datetime import datetime, timezone
|
|||||||
# ── RFC 5424 inline formatter (Python 3.6-compatible) ─────────────────────────
|
# ── RFC 5424 inline formatter (Python 3.6-compatible) ─────────────────────────
|
||||||
|
|
||||||
_FACILITY_LOCAL0 = 16
|
_FACILITY_LOCAL0 = 16
|
||||||
_SD_ID = "decnet@55555"
|
_SD_ID = "relay@55555"
|
||||||
_NILVALUE = "-"
|
_NILVALUE = "-"
|
||||||
|
|
||||||
SEVERITY_INFO = 6
|
SEVERITY_INFO = 6
|
||||||
@@ -1,15 +1,15 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Shared RFC 5424 syslog helper for DECNET service templates.
|
Shared RFC 5424 syslog helper used by service containers.
|
||||||
|
|
||||||
Services call syslog_line() to format an RFC 5424 message, then
|
Services call syslog_line() to format an RFC 5424 message, then
|
||||||
write_syslog_file() to emit it to stdout — Docker captures it, and the
|
write_syslog_file() to emit it to stdout — the container runtime
|
||||||
host-side collector streams it into the log file.
|
captures it, and the host-side collector streams it into the log file.
|
||||||
|
|
||||||
RFC 5424 structure:
|
RFC 5424 structure:
|
||||||
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
|
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
|
||||||
|
|
||||||
Facility: local0 (16), PEN for SD element ID: decnet@55555
|
Facility: local0 (16). SD element ID uses PEN 55555.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
@@ -18,7 +18,7 @@ from typing import Any
|
|||||||
# ─── Constants ────────────────────────────────────────────────────────────────
|
# ─── Constants ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
_FACILITY_LOCAL0 = 16
|
_FACILITY_LOCAL0 = 16
|
||||||
_SD_ID = "decnet@55555"
|
_SD_ID = "relay@55555"
|
||||||
_NILVALUE = "-"
|
_NILVALUE = "-"
|
||||||
|
|
||||||
SEVERITY_EMERG = 0
|
SEVERITY_EMERG = 0
|
||||||
@@ -62,7 +62,7 @@ def syslog_line(
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
service: APP-NAME (e.g. "http", "mysql")
|
service: APP-NAME (e.g. "http", "mysql")
|
||||||
hostname: HOSTNAME (decky node name)
|
hostname: HOSTNAME (node name)
|
||||||
event_type: MSGID (e.g. "request", "login_attempt")
|
event_type: MSGID (e.g. "request", "login_attempt")
|
||||||
severity: Syslog severity integer (default: INFO=6)
|
severity: Syslog severity integer (default: INFO=6)
|
||||||
timestamp: UTC datetime; defaults to now
|
timestamp: UTC datetime; defaults to now
|
||||||
@@ -80,10 +80,10 @@ def syslog_line(
|
|||||||
|
|
||||||
|
|
||||||
def write_syslog_file(line: str) -> None:
|
def write_syslog_file(line: str) -> None:
|
||||||
"""Emit a syslog line to stdout for Docker log capture."""
|
"""Emit a syslog line to stdout for container log capture."""
|
||||||
print(line, flush=True)
|
print(line, flush=True)
|
||||||
|
|
||||||
|
|
||||||
def forward_syslog(line: str, log_target: str) -> None:
|
def forward_syslog(line: str, log_target: str) -> None:
|
||||||
"""No-op stub. TCP forwarding is now handled by rsyslog, not by service containers."""
|
"""No-op stub. TCP forwarding is handled by rsyslog, not by service containers."""
|
||||||
pass
|
pass
|
||||||
@@ -7,7 +7,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
git authbind \
|
git authbind \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN useradd -r -s /bin/false -d /opt decnet \
|
RUN useradd -r -s /bin/false -d /opt logrelay \
|
||||||
&& apt-get update && apt-get install -y --no-install-recommends libcap2-bin \
|
&& apt-get update && apt-get install -y --no-install-recommends libcap2-bin \
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
&& (find /usr/bin/ -maxdepth 1 -name 'python3*' -type f -exec setcap 'cap_net_bind_service+eip' {} \; 2>/dev/null || true)
|
&& (find /usr/bin/ -maxdepth 1 -name 'python3*' -type f -exec setcap 'cap_net_bind_service+eip' {} \; 2>/dev/null || true)
|
||||||
@@ -18,5 +18,5 @@ RUN chmod +x /entrypoint.sh
|
|||||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
||||||
CMD kill -0 1 || exit 1
|
CMD kill -0 1 || exit 1
|
||||||
|
|
||||||
USER decnet
|
USER logrelay
|
||||||
ENTRYPOINT ["/entrypoint.sh"]
|
ENTRYPOINT ["/entrypoint.sh"]
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user