Compare commits
265 Commits
c3c1cd2fa6
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
| 201d246c07 | |||
| 47cd200e1d | |||
| 096a35b24a | |||
| 8a2876fe86 | |||
| 3e8e4c9e1c | |||
| 64bc6fcb1d | |||
| af9d59d3ee | |||
| 4197441c01 | |||
| 1b70d6db87 | |||
| 038596776a | |||
| 692ac35ee4 | |||
| f064690452 | |||
| dd82cd3f39 | |||
| ff3e376726 | |||
| 47f2ca8d5f | |||
| da3e675f86 | |||
| 2febd921bc | |||
| 12b5c25cd7 | |||
| 5b70a34c94 | |||
| 4abfac1a98 | |||
| 9eca33938d | |||
| 195580c74d | |||
| 262a84ca53 | |||
| d1b7e94325 | |||
| 33d954a61c | |||
| bf01804736 | |||
| 62f7c88b90 | |||
| e411063075 | |||
| 148e51011c | |||
| 3ebd206bca | |||
| f576564f02 | |||
| 00d5799a79 | |||
| 14250cacad | |||
| 9d68bb45c7 | |||
| 07ec4bc269 | |||
| a63301c7a3 | |||
| df18cb44cc | |||
| 91549e6936 | |||
| e8e11b2896 | |||
| 585541016f | |||
| 5dad1bb315 | |||
| 6708f26e6b | |||
| 2bef3edb72 | |||
| d2cf1e8b3a | |||
| 6d7877c679 | |||
| ee9ade4cd5 | |||
| dad29249de | |||
| f91ba9a16e | |||
| 43b92c7bd6 | |||
| a0a241f65d | |||
| 42b5e4cd06 | |||
| 6245786289 | |||
| 5df995fda1 | |||
| 6d7567b6bb | |||
| dbaccde143 | |||
| b883f24ba2 | |||
| 79db999030 | |||
| cb1a1d1270 | |||
| 899ea559d9 | |||
| e67b6d7f73 | |||
| bc5f43c3f7 | |||
| ff4c993617 | |||
| e32fdf9cbf | |||
| 95ae175e1b | |||
| b4df9ea0a1 | |||
| 02f07c7962 | |||
| c6f7de30d2 | |||
| 37b22b76a5 | |||
| 43f140a87a | |||
| 3223bec615 | |||
| 2b1b962849 | |||
| 65fc9ac2b9 | |||
| 1e8b73c361 | |||
| 9b1299458d | |||
| 7894b9e073 | |||
| a266d6b17e | |||
| f5a5fec607 | |||
| 40d3e86e55 | |||
| ebeaf08a49 | |||
| 7765b36c50 | |||
| 8914c27220 | |||
| 4db9c7464c | |||
| 411a797120 | |||
| 3da5a2c4ee | |||
| bfc7af000a | |||
| 1e8ca4cc05 | |||
| a6430cac4c | |||
| 39d2077a3a | |||
| e2d6f857b5 | |||
| 811136e600 | |||
| 63b0a58527 | |||
| cd0057c129 | |||
| 0c77cdab32 | |||
| 8257bcc031 | |||
| d3b90679c5 | |||
| 6657d3e097 | |||
| 293da364a6 | |||
| d5e6ca1949 | |||
| a97696fa23 | |||
| 7864c72948 | |||
| 47a0480994 | |||
| 2bf886e18e | |||
| 8bdc5b98c9 | |||
| aa39be909a | |||
| 41fd496128 | |||
| 39dafaf384 | |||
| b0e00a6cc4 | |||
| 2843aafa1a | |||
| 766eeb3d83 | |||
| f462835373 | |||
| e356829234 | |||
| a5d6860124 | |||
| 8dd4c78b33 | |||
| 69510fb880 | |||
| 09d9f8595e | |||
| bfb3edbd4a | |||
| a773dddd5c | |||
| edc5c59f93 | |||
| 1f758a3669 | |||
| 6c22f9ba59 | |||
| 20fa1f9a63 | |||
| fb69a06ab3 | |||
| 1446f6da94 | |||
| e967aaabfb | |||
| 255c2e5eb7 | |||
| 2dd86fb3bb | |||
| 3106d03135 | |||
| 3cc5ba36e8 | |||
| 6301504c0e | |||
| de4b64d857 | |||
| b5d7bf818f | |||
| 257f780d0f | |||
| a10aee282f | |||
| 11b9e85874 | |||
| 45039bd621 | |||
| 4ea1c2ff4f | |||
| bb8d782e42 | |||
| 342916ca63 | |||
| d3f4bbb62b | |||
| 32340bea0d | |||
| f1e14280c0 | |||
| 931f33fb06 | |||
| 467511e997 | |||
| 3945e72e11 | |||
| bd406090a7 | |||
| e22d057e68 | |||
| cb12e7c475 | |||
| c29ca977fd | |||
| bf4afac70f | |||
| 4b15b7eb35 | |||
| 140d2fbaad | |||
| 064c8760b6 | |||
| 6572c5cbaf | |||
| ba448bae13 | |||
| 1a18377b0a | |||
| 319c1dbb61 | |||
| c1d8102253 | |||
| 49f3002c94 | |||
| 9b59f8672e | |||
| 296979003d | |||
| 89099b903d | |||
| 29578d9d99 | |||
| 70d8ffc607 | |||
| 04db13afae | |||
| d1a88e75bd | |||
| 65ddb0b359 | |||
| b437bc8eec | |||
| a1ca5d699b | |||
| e9d151734d | |||
| 0ab97d0ade | |||
| 60de16be84 | |||
| 82ec7f3117 | |||
| 11d749f13d | |||
| a4798946c1 | |||
| d869eb3d23 | |||
| 89887ec6fd | |||
| 02e73a19d5 | |||
| b3efd646f6 | |||
| 2ec64ef2ef | |||
| e67624452e | |||
| e05b632e56 | |||
| c8f05df4d9 | |||
| 935a9a58d2 | |||
| 63efe6c7ba | |||
| 314e6c6388 | |||
| 12aa98a83c | |||
| 7dbc71d664 | |||
| dae3687089 | |||
| 187194786f | |||
| 9de320421e | |||
| dd4e2aad91 | |||
| 7d10b78d50 | |||
| ddfb232590 | |||
| d7da3a7fc7 | |||
| 947efe7bd1 | |||
| c603531fd2 | |||
| a78126b1ba | |||
| 0ee23b8700 | |||
| 0952a0b71e | |||
| 4683274021 | |||
| ab187f70a1 | |||
| 172a002d41 | |||
| f6cb90ee66 | |||
| 2d65d74069 | |||
| d5eb60cb41 | |||
| 47f2da1d50 | |||
| 53fdeee208 | |||
| a2ba7a7f3c | |||
| 3eab6e8773 | |||
| 5a7ff285cd | |||
| 1d73957832 | |||
| c2eceb147d | |||
| 09d9c0ec74 | |||
| 2dcf47985e | |||
| 5585e4ec58 | |||
| ce2699455b | |||
| df3f04c10e | |||
| 7ff5703250 | |||
| a6c7cfdf66 | |||
| 7ecb126c8e | |||
| f3bb0b31ae | |||
| 8c249f6987 | |||
| 24e0d98425 | |||
| 7756747787 | |||
| e312e072e4 | |||
| 5631d09aa8 | |||
| c2f7622fbb | |||
| 8335c5dc4c | |||
| b71db65149 | |||
| fd62413935 | |||
| ea340065c6 | |||
| a022b4fed6 | |||
| 3dc5b509f6 | |||
| c9be447a38 | |||
| 62db686b42 | |||
| 57d395d6d7 | |||
| ac094965b5 | |||
| 435c004760 | |||
| 89a2132c61 | |||
| 3d01ca2c2a | |||
| 8124424e96 | |||
| a4da9b8f32 | |||
| 448cb9cee0 | |||
| 035499f255 | |||
| 0706919469 | |||
| f2cc585d72 | |||
| 89abb6ecc6 | |||
| 03f5a7826f | |||
| a5eaa3291e | |||
| b2e4706a14 | |||
| 6095d0d2ed | |||
| 04685ba1c4 | |||
| 2ce3f7ee90 | |||
| cb4bac4b42 | |||
| 8d5944f775 | |||
| ea9f7e734b | |||
| fe18575a9c | |||
| 0f63820ee6 | |||
| fdc404760f | |||
| 95190946e0 | |||
| 1692df7360 | |||
| aac39e818e | |||
| ff38d58508 | |||
| f78104e1c8 | |||
| 99be4e64ad |
@@ -1,27 +0,0 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"mcp__plugin_context-mode_context-mode__ctx_batch_execute",
|
||||
"mcp__plugin_context-mode_context-mode__ctx_search",
|
||||
"Bash(grep:*)",
|
||||
"Bash(python -m pytest --tb=short -q)",
|
||||
"Bash(pip install:*)",
|
||||
"Bash(pip show:*)",
|
||||
"Bash(python:*)",
|
||||
"Bash(DECNET_JWT_SECRET=\"test-secret-xyz-1234!\" DECNET_ADMIN_PASSWORD=\"test-pass-xyz-1234!\" python:*)",
|
||||
"Bash(ls /home/anti/Tools/DECNET/*.db* /home/anti/Tools/DECNET/test_*.db*)",
|
||||
"mcp__plugin_context-mode_context-mode__ctx_execute_file",
|
||||
"Bash(nc)",
|
||||
"Bash(nmap:*)",
|
||||
"Bash(ping -c1 -W2 192.168.1.200)",
|
||||
"Bash(xxd)",
|
||||
"Bash(curl -s http://192.168.1.200:2375/version)",
|
||||
"Bash(python3 -m json.tool)",
|
||||
"Bash(curl -s http://192.168.1.200:9200/)",
|
||||
"Bash(docker image:*)",
|
||||
"Read(//home/anti/Tools/cowrie/src/cowrie/data/txtcmds/**)",
|
||||
"Read(//home/anti/Tools/cowrie/src/cowrie/data/txtcmds/bin/**)",
|
||||
"mcp__plugin_context-mode_context-mode__ctx_index"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,7 @@ name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [dev, testing]
|
||||
branches: [dev, testing, "temp/merge-*"]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
@@ -19,20 +19,6 @@ jobs:
|
||||
- run: pip install ruff
|
||||
- run: ruff check .
|
||||
|
||||
test:
|
||||
name: Test (pytest)
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11", "3.12"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- run: pip install -e .[dev]
|
||||
- run: pytest tests/ -v --tb=short
|
||||
|
||||
bandit:
|
||||
name: SAST (bandit)
|
||||
runs-on: ubuntu-latest
|
||||
@@ -42,7 +28,7 @@ jobs:
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- run: pip install bandit
|
||||
- run: bandit -r decnet/ -ll -x decnet/services/registry.py
|
||||
- run: bandit -r decnet/ -ll -x decnet/services/registry.py -x decnet/templates/
|
||||
|
||||
pip-audit:
|
||||
name: Dependency audit (pip-audit)
|
||||
@@ -54,36 +40,136 @@ jobs:
|
||||
python-version: "3.11"
|
||||
- run: pip install pip-audit
|
||||
- run: pip install -e .[dev]
|
||||
- run: pip-audit --skip-editable
|
||||
- run: pip-audit --skip-editable --ignore-vuln CVE-2025-65896
|
||||
|
||||
open-pr:
|
||||
name: Open PR to main
|
||||
test-standard:
|
||||
name: Test (Standard)
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint, test, bandit, pip-audit]
|
||||
needs: [lint, bandit, pip-audit]
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- run: pip install -e .[dev]
|
||||
- run: pytest
|
||||
|
||||
test-live:
|
||||
name: Test (Live)
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard]
|
||||
services:
|
||||
mysql:
|
||||
image: mysql:8.0
|
||||
env:
|
||||
MYSQL_ROOT_PASSWORD: root
|
||||
MYSQL_DATABASE: decnet_test
|
||||
ports:
|
||||
- 3307:3306
|
||||
options: >-
|
||||
--health-cmd="mysqladmin ping -h 127.0.0.1"
|
||||
--health-interval=10s
|
||||
--health-timeout=5s
|
||||
--health-retries=5
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- run: pip install -e .[dev]
|
||||
- run: pytest -m live
|
||||
env:
|
||||
DECNET_MYSQL_HOST: 127.0.0.1
|
||||
DECNET_MYSQL_PORT: 3307
|
||||
DECNET_MYSQL_USER: root
|
||||
DECNET_MYSQL_PASSWORD: root
|
||||
DECNET_MYSQL_DATABASE: decnet_test
|
||||
|
||||
test-fuzz:
|
||||
name: Test (Fuzz)
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-live]
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- run: pip install -e .[dev]
|
||||
- run: pytest -m fuzz
|
||||
env:
|
||||
SCHEMATHESIS_CONFIG: schemathesis.ci.toml
|
||||
|
||||
merge-to-testing:
|
||||
name: Merge dev → testing
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live, test-fuzz]
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- name: Open PR via Gitea API
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
run: |
|
||||
echo "--- Checking for existing open PRs ---"
|
||||
LIST_RESPONSE=$(curl -s \
|
||||
-H "Authorization: token ${{ secrets.DECNET_PR_TOKEN }}" \
|
||||
"https://git.resacachile.cl/api/v1/repos/anti/DECNET/pulls?state=open&head=anti:dev&base=main&limit=5")
|
||||
echo "$LIST_RESPONSE"
|
||||
EXISTING=$(echo "$LIST_RESPONSE" | python3 -c "import sys, json; print(len(json.load(sys.stdin)))")
|
||||
echo "Open PRs found: $EXISTING"
|
||||
if [ "$EXISTING" -gt "0" ]; then
|
||||
echo "PR already open, skipping."
|
||||
exit 0
|
||||
fi
|
||||
echo "--- Creating PR ---"
|
||||
CREATE_RESPONSE=$(curl -s -X POST \
|
||||
-H "Authorization: token ${{ secrets.DECNET_PR_TOKEN }}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"title": "Auto PR: dev → main",
|
||||
"head": "dev",
|
||||
"base": "main",
|
||||
"body": "All CI and security checks passed. Review and merge when ready."
|
||||
}' \
|
||||
"https://git.resacachile.cl/api/v1/repos/anti/DECNET/pulls")
|
||||
echo "$CREATE_RESPONSE"
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge dev into testing
|
||||
run: |
|
||||
git fetch origin testing
|
||||
git checkout testing
|
||||
git merge origin/dev --no-ff -m "ci: auto-merge dev → testing [skip ci]"
|
||||
git push origin testing
|
||||
|
||||
prepare-merge-to-main:
|
||||
name: Prepare Merge to Main
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live, test-fuzz]
|
||||
if: github.ref == 'refs/heads/testing'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Create temp branch and sync with main
|
||||
run: |
|
||||
git fetch origin main
|
||||
git checkout -b temp/merge-testing-to-main
|
||||
echo "--- Switched to temp branch, merging main into it ---"
|
||||
git merge origin/main --no-edit || { echo "CONFLICT: Manual resolution required"; exit 1; }
|
||||
git push origin temp/merge-testing-to-main --force
|
||||
|
||||
finalize-merge-to-main:
|
||||
name: Finalize Merge to Main
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live, test-fuzz]
|
||||
if: startsWith(github.ref, 'refs/heads/temp/merge-')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge RC into main
|
||||
run: |
|
||||
git fetch origin main
|
||||
git checkout main
|
||||
git merge ${{ github.ref }} --no-ff -m "ci: auto-merge testing → main"
|
||||
git push origin main
|
||||
echo "--- Cleaning up temp branch ---"
|
||||
git push origin --delete ${{ github.ref_name }}
|
||||
|
||||
@@ -30,5 +30,28 @@ jobs:
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- run: pip install -e .
|
||||
- run: pip install -e .[dev]
|
||||
- run: pytest tests/ -v --tb=short
|
||||
|
||||
bandit:
|
||||
name: SAST (bandit)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- run: pip install bandit
|
||||
- run: bandit -r decnet/ -ll -x decnet/services/registry.py
|
||||
|
||||
pip-audit:
|
||||
name: Dependency audit (pip-audit)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- run: pip install pip-audit
|
||||
- run: pip install -e .[dev]
|
||||
- run: pip-audit --skip-editable
|
||||
|
||||
@@ -22,27 +22,42 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
|
||||
- name: Extract version from pyproject.toml
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
|
||||
- name: Bump version and Tag
|
||||
id: version
|
||||
run: |
|
||||
VERSION=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); d=tomllib.load(f); print(d['project']['version'])")
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
# Calculate next version (v0.x)
|
||||
LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
|
||||
NEXT_VER=$(python3 -c "
|
||||
tag = '$LATEST_TAG'.lstrip('v')
|
||||
parts = tag.split('.')
|
||||
major = int(parts[0]) if parts[0] else 0
|
||||
minor = int(parts[1]) if len(parts) > 1 else 0
|
||||
print(f'{major}.{minor + 1}.0')
|
||||
")
|
||||
|
||||
- name: Create tag if not exists
|
||||
id: tag
|
||||
run: |
|
||||
VERSION=${{ steps.version.outputs.version }}
|
||||
if git rev-parse "v$VERSION" >/dev/null 2>&1; then
|
||||
echo "Tag v$VERSION already exists, skipping."
|
||||
echo "created=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
git config user.name "gitea-actions"
|
||||
git config user.email "actions@git.resacachile.cl"
|
||||
git tag -a "v$VERSION" -m "Release v$VERSION"
|
||||
git push origin "v$VERSION"
|
||||
echo "Next version: $NEXT_VER (calculated from $LATEST_TAG)"
|
||||
|
||||
# Update pyproject.toml
|
||||
sed -i "s/^version = \".*\"/version = \"$NEXT_VER\"/" pyproject.toml
|
||||
|
||||
git add pyproject.toml
|
||||
git commit -m "chore: auto-release v$NEXT_VER [skip ci]" || echo "No changes to commit"
|
||||
CHANGELOG=$(git log ${LATEST_TAG}..HEAD --oneline --no-decorate --no-merges)
|
||||
git tag -a "v$NEXT_VER" -m "Auto-release v$NEXT_VER
|
||||
|
||||
Changes since $LATEST_TAG:
|
||||
$CHANGELOG"
|
||||
git push origin main --follow-tags
|
||||
|
||||
echo "version=$NEXT_VER" >> $GITHUB_OUTPUT
|
||||
echo "created=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
docker:
|
||||
name: Build, scan & push ${{ matrix.service }}
|
||||
@@ -52,7 +67,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
service:
|
||||
- cowrie
|
||||
- conpot
|
||||
- docker_api
|
||||
- elasticsearch
|
||||
- ftp
|
||||
@@ -69,11 +84,12 @@ jobs:
|
||||
- postgres
|
||||
- rdp
|
||||
- redis
|
||||
- real_ssh
|
||||
- sip
|
||||
- smb
|
||||
- smtp
|
||||
- snmp
|
||||
- ssh
|
||||
- telnet
|
||||
- tftp
|
||||
- vnc
|
||||
steps:
|
||||
@@ -99,13 +115,13 @@ jobs:
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Install Trivy
|
||||
run: |
|
||||
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
|
||||
|
||||
- name: Scan with Trivy
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
image-ref: decnet-${{ matrix.service }}:scan
|
||||
exit-code: "1"
|
||||
severity: CRITICAL
|
||||
ignore-unfixed: true
|
||||
run: |
|
||||
trivy image --exit-code 1 --severity CRITICAL --ignore-unfixed decnet-${{ matrix.service }}:scan
|
||||
|
||||
- name: Push image
|
||||
if: success()
|
||||
|
||||
14
.gitignore
vendored
14
.gitignore
vendored
@@ -1,5 +1,7 @@
|
||||
.venv/
|
||||
.claude/
|
||||
logs/
|
||||
.claude/*
|
||||
CLAUDE.md
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
@@ -9,7 +11,6 @@ build/
|
||||
decnet-compose.yml
|
||||
decnet-state.json
|
||||
*.ini
|
||||
.env
|
||||
decnet.log*
|
||||
*.loggy
|
||||
*.nmap
|
||||
@@ -17,6 +18,13 @@ linterfails.log
|
||||
webmail
|
||||
windows1
|
||||
*.db
|
||||
*.db-shm
|
||||
*.db-wal
|
||||
decnet.*.log
|
||||
decnet.json
|
||||
.env
|
||||
.env*
|
||||
.env.local
|
||||
.coverage
|
||||
.hypothesis/
|
||||
profiles/*
|
||||
tests/test_decnet.db*
|
||||
|
||||
37
BUGS.md
37
BUGS.md
@@ -1,37 +0,0 @@
|
||||
# BUGS
|
||||
|
||||
Active bugs detected during development. Do not fix until noted otherwise.
|
||||
|
||||
---
|
||||
|
||||
## BUG-001 — Split-brain model imports across router files (Gemini SQLModel migration)
|
||||
|
||||
**Detected:** 2026-04-09
|
||||
**Status:** Open — do not fix, migration in progress
|
||||
|
||||
**Symptom:** `from decnet.web.api import app` fails with `ModuleNotFoundError: No module named 'decnet.web.models'`
|
||||
|
||||
**Root cause:** Gemini's SQLModel migration is partially complete. Models were moved to `decnet/web/db/models.py`, but three router files were not updated and still import from the old `decnet.web.models` path:
|
||||
|
||||
| File | Stale import |
|
||||
|------|--------------|
|
||||
| `decnet/web/router/auth/api_login.py:12` | `from decnet.web.models import LoginRequest, Token` |
|
||||
| `decnet/web/router/auth/api_change_pass.py:7` | `from decnet.web.models import ChangePasswordRequest` |
|
||||
| `decnet/web/router/stats/api_get_stats.py:6` | `from decnet.web.models import StatsResponse` |
|
||||
|
||||
**Fix:** Update those three files to import from `decnet.web.db.models` (consistent with the other router files already migrated).
|
||||
|
||||
**Impact:** All `tests/api/` tests fail to collect. Web server cannot start.
|
||||
|
||||
---
|
||||
|
||||
## BUG-002 — `decnet/web/db/sqlite/repository.py` depends on `sqlalchemy` directly
|
||||
|
||||
**Detected:** 2026-04-09
|
||||
**Status:** Resolved (dependency installed via `pip install -e ".[dev]"`)
|
||||
|
||||
**Symptom:** `ModuleNotFoundError: No module named 'sqlalchemy'` before `sqlmodel` was installed.
|
||||
|
||||
**Root cause:** `sqlmodel>=0.0.16` was added to `pyproject.toml` but `pip install -e .` had not been re-run in the dev environment.
|
||||
|
||||
**Fix:** Run `pip install -e ".[dev]"`. Already applied.
|
||||
57
CLAUDE.md
57
CLAUDE.md
@@ -1,57 +0,0 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Install (dev)
|
||||
pip install -e .
|
||||
|
||||
# List registered service plugins
|
||||
decnet services
|
||||
|
||||
# Dry-run (generates compose, no containers)
|
||||
decnet deploy --mode unihost --deckies 3 --randomize-services --dry-run
|
||||
|
||||
# Full deploy (requires root for MACVLAN)
|
||||
sudo decnet deploy --mode unihost --deckies 5 --interface eth0 --randomize-services
|
||||
sudo decnet deploy --mode unihost --deckies 3 --services ssh,smb --log-target 192.168.1.5:5140
|
||||
|
||||
# Status / teardown
|
||||
decnet status
|
||||
sudo decnet teardown --all
|
||||
sudo decnet teardown --id decky-01
|
||||
```
|
||||
|
||||
## Project Overview
|
||||
|
||||
DECNET is a honeypot/deception network framework. It deploys fake machines (called **deckies**) with realistic services (RDP, SMB, SSH, FTP, etc.) to lure and profile attackers. All attacker interactions are aggregated to an isolated logging network (ELK stack / SIEM).
|
||||
|
||||
## Deployment Models
|
||||
|
||||
**UNIHOST** — one real host spins up _n_ deckies via a container orchestrator. Simpler, single-machine deployment.
|
||||
|
||||
**SWARM (MULTIHOST)** — _n_ real hosts each running deckies. Orchestrated via Ansible/sshpass or similar tooling.
|
||||
|
||||
## Core Technology Choices
|
||||
|
||||
- **Containers**: Docker Compose is the starting point but other orchestration frameworks should be evaluated if they serve the project better. `debian:bookworm-slim` is the default base image; mixing in Ubuntu, CentOS, or other distros is encouraged to make the decoy network look heterogeneous.
|
||||
- **Networking**: Deckies need to appear as real machines on the LAN (own MACs/IPs). MACVLAN and IPVLAN are candidates; the right driver depends on the host environment. WSL has known limitations — bare metal or a VM is preferred for testing.
|
||||
- **Log pipeline**: Logstash → ELK stack → SIEM (isolated network, not reachable from decoy network)
|
||||
|
||||
## Architecture Constraints
|
||||
|
||||
- The decoy network must be reachable from the outside (attacker-facing).
|
||||
- The logging/aggregation network must be isolated from the decoy network.
|
||||
- A publicly accessible real server acts as the bridge between the two networks.
|
||||
- Deckies should differ in exposed services and OS fingerprints to appear as a heterogeneous network.
|
||||
|
||||
## Development and testing
|
||||
|
||||
- For every new feature, pytests must me made.
|
||||
- Pytest is the main testing framework in use.
|
||||
- NEVER pass broken code to the user.
|
||||
- Broken means: not running, not passing 100% tests, etc.
|
||||
- After tests pass with 100%, always git commit your changes.
|
||||
- NEVER add "Co-Authored-By" or any Claude attribution lines to git commit messages.
|
||||
103
GEMINI.md
103
GEMINI.md
@@ -1,103 +0,0 @@
|
||||
# DECNET (Deception Network) Project Context
|
||||
|
||||
DECNET is a high-fidelity honeypot framework designed to deploy heterogeneous fleets of fake machines (called **deckies**) that appear as real hosts on a local network.
|
||||
|
||||
## Project Overview
|
||||
|
||||
- **Core Purpose:** To lure, profile, and log attacker interactions within a controlled, deceptive environment.
|
||||
- **Key Technology:** Linux-native container networking (MACVLAN/IPvlan) combined with Docker to give each decoy its own MAC address, IP, and realistic TCP/IP stack behavior.
|
||||
- **Main Components:**
|
||||
- **Deckies:** Group of containers sharing a network namespace (one base container + multiple service containers).
|
||||
- **Archetypes:** Pre-defined machine profiles (e.g., `windows-workstation`, `linux-server`) that bundle services and OS fingerprints.
|
||||
- **Services:** Modular honeypot plugins (SSH, SMB, RDP, etc.) built as `BaseService` subclasses.
|
||||
- **OS Fingerprinting:** Sysctl-based TCP/IP stack tuning to spoof OS detection (nmap).
|
||||
- **Logging Pipeline:** RFC 5424 syslog forwarding to an isolated SIEM/ELK stack.
|
||||
|
||||
## Technical Stack
|
||||
|
||||
- **Language:** Python 3.11+
|
||||
- **CLI Framework:** [Typer](https://typer.tiangolo.com/)
|
||||
- **Data Validation:** [Pydantic v2](https://docs.pydantic.dev/)
|
||||
- **Orchestration:** Docker Engine 24+ (via Docker SDK for Python)
|
||||
- **Networking:** MACVLAN (default) or IPvlan L2 (for WiFi/restricted environments).
|
||||
- **Testing:** Pytest (100% pass requirement).
|
||||
- **Formatting/Linting:** Ruff, Bandit (SAST), pip-audit.
|
||||
|
||||
## Architecture
|
||||
|
||||
```text
|
||||
Host NIC (eth0)
|
||||
└── MACVLAN Bridge
|
||||
├── Decky-01 (192.168.1.10) -> [Base] + [SSH] + [HTTP]
|
||||
├── Decky-02 (192.168.1.11) -> [Base] + [SMB] + [RDP]
|
||||
└── ...
|
||||
```
|
||||
|
||||
- **Base Container:** Owns the IP/MAC, sets `sysctls` for OS spoofing, and runs `sleep infinity`.
|
||||
- **Service Containers:** Use `network_mode: service:<base>` to share the identity and networking of the base container.
|
||||
- **Isolation:** Decoy traffic is strictly separated from the logging network.
|
||||
|
||||
## Key Commands
|
||||
|
||||
### Development & Maintenance
|
||||
- **Install (Dev):**
|
||||
- `rm .venv -rf`
|
||||
- `python3 -m venv .venv`
|
||||
- `source .venv/bin/activate`
|
||||
- `pip install -e .`
|
||||
- **Run Tests:** `pytest` (Run before any commit)
|
||||
- **Linting:** `ruff check .`
|
||||
- **Security Scan:** `bandit -r decnet/`
|
||||
- **Web Git:** git.resacachile.cl (Gitea)
|
||||
|
||||
### CLI Usage
|
||||
- **List Services:** `decnet services`
|
||||
- **List Archetypes:** `decnet archetypes`
|
||||
- **Dry Run (Compose Gen):** `decnet deploy --deckies 3 --randomize-services --dry-run`
|
||||
- **Deploy (Full):** `sudo .venv/bin/decnet deploy --interface eth0 --deckies 5 --randomize-services`
|
||||
- **Status:** `decnet status`
|
||||
- **Teardown:** `sudo .venv/bin/decnet teardown --all`
|
||||
|
||||
## Development Conventions
|
||||
|
||||
- **Code Style:**
|
||||
- Strict adherence to Ruff/PEP8.
|
||||
- **Always use typed variables**. If any non-types variables are found, they must be corrected.
|
||||
- The correct way is `x: int = 1`, never `x : int = 1`.
|
||||
- If assignment is present, always use a space between the type and the equal sign `x: int = 1`.
|
||||
- **Never** use lowercase L (l), uppercase o (O) or uppercase i (i) in single-character names.
|
||||
- **Internal vars are to be declared with an underscore** (_internal_variable_name).
|
||||
- **Internal to internal vars are to be declared with double underscore** (__internal_variable_name).
|
||||
- Always use snake_case for code.
|
||||
- Always use PascalCase for classes and generics.
|
||||
- **Testing:** New features MUST include a `pytest` case. 100% test pass rate is mandatory before merging.
|
||||
- **Plugin System:**
|
||||
- New services go in `decnet/services/<name>.py`.
|
||||
- Subclass `decnet.services.base.BaseService`.
|
||||
- The registry uses auto-discovery; no manual registration required.
|
||||
- **Configuration:**
|
||||
- Use Pydantic models in `decnet/config.py` for any new settings.
|
||||
- INI file parsing is handled in `decnet/ini_loader.py`.
|
||||
- **State Management:**
|
||||
- Runtime state is persisted in `decnet-state.json`.
|
||||
- Do not modify this file manually.
|
||||
- **General Development Guidelines**:
|
||||
- **Never** commit broken code, or before running `pytest`s or `bandit` at the project level.
|
||||
- **No matter how small** the changes, they must be committed.
|
||||
- **If new features are addedd** new tests must be added, too.
|
||||
- **Never present broken code to the user**. Test, validate, then present.
|
||||
- **Extensive testing** for every function must be created.
|
||||
- **Always develop in the `dev` branch, never in `main`.**
|
||||
- **Test in the `testing` branch.**
|
||||
|
||||
## Directory Structure
|
||||
|
||||
- `decnet/`: Main source code.
|
||||
- `services/`: Honeypot service implementations.
|
||||
- `logging/`: Syslog formatting and forwarding logic.
|
||||
- `correlation/`: (In Progress) Logic for grouping attacker events.
|
||||
- `templates/`: Dockerfiles and entrypoint scripts for services.
|
||||
- `tests/`: Pytest suite.
|
||||
- `pyproject.toml`: Dependency and entry point definitions.
|
||||
- `CLAUDE.md`: Claude-specific environment guidance.
|
||||
- `DEVELOPMENT.md`: Roadmap and TODOs.
|
||||
110
README.md
110
README.md
@@ -508,6 +508,10 @@ DECNET_WEB_HOST=0.0.0.0
|
||||
DECNET_WEB_PORT=8080
|
||||
DECNET_ADMIN_USER=admin
|
||||
DECNET_ADMIN_PASSWORD=admin
|
||||
|
||||
# Database pool tuning (applies to both SQLite and MySQL)
|
||||
DECNET_DB_POOL_SIZE=20 # base pool connections (default: 20)
|
||||
DECNET_DB_MAX_OVERFLOW=40 # extra connections under burst (default: 40)
|
||||
```
|
||||
|
||||
Copy `.env.example` to `.env.local` and modify it to suit your environment.
|
||||
@@ -676,6 +680,112 @@ The test suite covers:
|
||||
|
||||
Every new feature requires passing tests before merging.
|
||||
|
||||
### Stress Testing
|
||||
|
||||
A [Locust](https://locust.io)-based stress test suite lives in `tests/stress/`. It hammers every API endpoint with realistic traffic patterns to find throughput ceilings and latency degradation.
|
||||
|
||||
```bash
|
||||
# Run via pytest (starts its own server)
|
||||
pytest -m stress tests/stress/ -v -x -n0 -s
|
||||
|
||||
# Crank it up
|
||||
STRESS_USERS=2000 STRESS_SPAWN_RATE=200 STRESS_DURATION=120 pytest -m stress tests/stress/ -v -x -n0 -s
|
||||
|
||||
# Standalone Locust web UI against a running server
|
||||
locust -f tests/stress/locustfile.py --host http://localhost:8000
|
||||
```
|
||||
|
||||
| Env var | Default | Description |
|
||||
|---|---|---|
|
||||
| `STRESS_USERS` | `500` | Total simulated users |
|
||||
| `STRESS_SPAWN_RATE` | `50` | Users spawned per second |
|
||||
| `STRESS_DURATION` | `60` | Test duration in seconds |
|
||||
| `STRESS_WORKERS` | CPU count (max 4) | Uvicorn workers for the test server |
|
||||
| `STRESS_MIN_RPS` | `500` | Minimum RPS to pass baseline test |
|
||||
| `STRESS_MAX_P99_MS` | `200` | Maximum p99 latency (ms) to pass |
|
||||
| `STRESS_SPIKE_USERS` | `1000` | Users for thundering herd test |
|
||||
| `STRESS_SUSTAINED_USERS` | `200` | Users for sustained load test |
|
||||
|
||||
#### Measured baseline
|
||||
|
||||
Reference numbers from recent Locust runs against a MySQL backend
|
||||
(asyncmy driver). All runs hold zero failures throughout.
|
||||
|
||||
**Single worker** (unless noted):
|
||||
|
||||
| Metric | 500u, tracing on | 1500u, tracing on | 1500u, tracing **off** | 1500u, tracing off, **pinned to 1 core** | 1500u, tracing off, **12 workers** |
|
||||
|---|---|---|---|---|---|
|
||||
| Requests served | 396,672 | 232,648 | 277,214 | 3,532 | 308,024 |
|
||||
| Failures | 0 | 0 | 0 | 0 | 0 |
|
||||
| Throughput (current RPS) | ~960 | ~880 | ~990 | ~46 | ~1,585 |
|
||||
| Average latency | 465 ms | 1,774 ms | 1,489 ms | 21.7 s | 930 ms |
|
||||
| Median (p50) | 100 ms | 690 ms | 340 ms | 270 ms | 700 ms |
|
||||
| p95 | 1.9 s | 6.5 s | 5.7 s | 115 s | 2.7 s |
|
||||
| p99 | 2.9 s | 9.5 s | 8.4 s | 122 s | 4.2 s |
|
||||
| Max observed | 8.3 s | 24.4 s | 20.9 s | 124.5 s | 16.5 s |
|
||||
|
||||
Ramp is 15 users/s for the 500u column, 40 users/s otherwise.
|
||||
|
||||
Takeaways:
|
||||
|
||||
- **Tracing off**: at 1500 users, flipping `DECNET_TRACING=false`
|
||||
halves p50 (690 → 340 ms) and pushes RPS from ~880 past the
|
||||
500-user figure on a single worker.
|
||||
- **12 workers**: RPS scales ~1.6× over a single worker (~990 →
|
||||
~1585). Sublinear because the workload is DB-bound — MySQL and the
|
||||
connection pool become the new ceiling, not Python. p99 drops from
|
||||
8.4 s to 4.2 s.
|
||||
- **Connection math**: `DECNET_DB_POOL_SIZE=20` × `DECNET_DB_MAX_OVERFLOW=40`
|
||||
× 12 workers = 720 connections at peak. MySQL's default
|
||||
`max_connections=151` needs bumping (we used 2000) before running
|
||||
multi-worker load.
|
||||
- **Single-core pinning**: ~46 RPS with p95 near two minutes. Interesting
|
||||
as a "physics floor" datapoint — not a production config.
|
||||
|
||||
Top endpoints by volume: `/api/v1/attackers`, `/api/v1/deckies`,
|
||||
`/api/v1/bounty`, `/api/v1/logs/histogram`, `/api/v1/config`,
|
||||
`/api/v1/health`, `/api/v1/auth/login`, `/api/v1/logs`.
|
||||
|
||||
Notes on tuning:
|
||||
|
||||
- **Python 3.14 is currently a no-go for the API server.** Under heavy
|
||||
concurrent async load the reworked 3.14 GC segfaults inside
|
||||
`mark_all_reachable` (observed in `_PyGC_Collect` during pending-GC
|
||||
on 3.14.3). Stick to Python 3.11–3.13 until upstream stabilises.
|
||||
- Router-level TTL caches on hot count/stats endpoints (`/stats`,
|
||||
`/logs` count, `/attackers` count, `/bounty`, `/logs/histogram`,
|
||||
`/deckies`, `/config`) collapse concurrent duplicate work onto a
|
||||
single DB hit per window — essential to reach this RPS on one worker.
|
||||
- Turning off request tracing (`DECNET_TRACING=false`) is the next
|
||||
free headroom: tracing was still on during the run above.
|
||||
- On SQLite, `DECNET_DB_POOL_PRE_PING=false` skips the per-checkout
|
||||
`SELECT 1`. On MySQL, keep it `true` — network disconnects are real.
|
||||
|
||||
#### System tuning: open file limit
|
||||
|
||||
Under heavy load (500+ concurrent users), the server will exhaust the default Linux open file limit (`ulimit -n`), causing `OSError: [Errno 24] Too many open files`. Most distros default to **1024**, which is far too low for stress testing or production use.
|
||||
|
||||
**Before running stress tests:**
|
||||
|
||||
```bash
|
||||
# Check current limit
|
||||
ulimit -n
|
||||
|
||||
# Bump for this shell session
|
||||
ulimit -n 65536
|
||||
```
|
||||
|
||||
**Permanent fix** — add to `/etc/security/limits.conf`:
|
||||
|
||||
```
|
||||
* soft nofile 65536
|
||||
* hard nofile 65536
|
||||
```
|
||||
|
||||
Or for systemd-managed services, add `LimitNOFILE=65536` to the unit file.
|
||||
|
||||
> This applies to production deployments too — any server handling hundreds of concurrent connections needs a raised file descriptor limit.
|
||||
|
||||
# AI Disclosure
|
||||
|
||||
This project has been made with lots, and I mean lots of help from AIs. While most of the design was made by me, most of the coding was done by AI models.
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
# arche-test.ini
|
||||
# OS fingerprint smoke-test fleet.
|
||||
#
|
||||
# One group per OS family, each spinning up 2 deckies.
|
||||
# Deploy with:
|
||||
# sudo .venv/bin/decnet deploy --config arche-test.ini --dry-run
|
||||
# sudo .venv/bin/decnet deploy --config arche-test.ini --interface eth0
|
||||
#
|
||||
# After deploy, verify with:
|
||||
# sudo nmap -O --osscan-guess <ip>
|
||||
# sudo p0f -i <iface> -p
|
||||
# ---- Linux (TTL 64, timestamps on, ECN offer) ----
|
||||
[os-linux]
|
||||
nmap_os=linux
|
||||
services=ssh,http
|
||||
amount=2
|
||||
|
||||
# ---- Windows (TTL 128, timestamps off, no ECN) ----
|
||||
[os-windows]
|
||||
nmap_os=windows
|
||||
services=smb,rdp
|
||||
amount=2
|
||||
|
||||
# ---- BSD (TTL 64, timestamps on, no ECN) ----
|
||||
[os-bsd]
|
||||
nmap_os=bsd
|
||||
services=ssh,http
|
||||
amount=2
|
||||
|
||||
# ---- Embedded (TTL 255, timestamps off, no SACK, no window scaling) ----
|
||||
[os-embedded]
|
||||
nmap_os=embedded
|
||||
services=snmp
|
||||
amount=2
|
||||
|
||||
# ---- Cisco (TTL 255, timestamps off, no SACK, ip_no_pmtu_disc on) ----
|
||||
[os-cisco]
|
||||
nmap_os=cisco
|
||||
services=snmp
|
||||
amount=2
|
||||
64
decnet.ini.example
Normal file
64
decnet.ini.example
Normal file
@@ -0,0 +1,64 @@
|
||||
; /etc/decnet/decnet.ini — DECNET host configuration
|
||||
;
|
||||
; Copy to /etc/decnet/decnet.ini and edit. Values here seed os.environ at
|
||||
; CLI startup via setdefault() — real env vars still win, so you can
|
||||
; override any value on the shell without editing this file.
|
||||
;
|
||||
; A missing file is fine; every daemon has sensible defaults. The main
|
||||
; reason to use this file is to skip typing the same flags on every
|
||||
; `decnet` invocation and to pin a host's role via `mode`.
|
||||
|
||||
[decnet]
|
||||
; mode = agent | master
|
||||
; agent — worker host (runs `decnet agent`, `decnet forwarder`, `decnet updater`).
|
||||
; Master-only commands (api, swarmctl, swarm, deploy, teardown, ...)
|
||||
; are hidden from `decnet --help` and refuse to run.
|
||||
; master — central server (runs `decnet api`, `decnet web`, `decnet swarmctl`,
|
||||
; `decnet listener`). All commands visible.
|
||||
mode = agent
|
||||
|
||||
; disallow-master = true (default when mode=agent)
|
||||
; Set to false for hybrid dev hosts that legitimately run both roles.
|
||||
disallow-master = true
|
||||
|
||||
; log-directory — root for DECNET's per-component logs. Systemd units set
|
||||
; DECNET_SYSTEM_LOGS=<log-directory>/decnet.<component>.log so agent, forwarder,
|
||||
; and engine each get their own file. The forwarder tails decnet.log.
|
||||
log-directory = /var/log/decnet
|
||||
|
||||
|
||||
; ─── Agent-only settings (read when mode=agent) ───────────────────────────
|
||||
[agent]
|
||||
; Where the master's syslog-TLS listener lives. DECNET_SWARM_MASTER_HOST.
|
||||
master-host = 192.168.1.50
|
||||
; Master listener port (RFC 5425 default 6514). DECNET_SWARM_SYSLOG_PORT.
|
||||
swarm-syslog-port = 6514
|
||||
; Bind address/port for this worker's agent API (mTLS).
|
||||
agent-port = 8765
|
||||
; Cert bundle dir — must contain ca.crt, worker.crt, worker.key from enroll.
|
||||
; DECNET_AGENT_DIR — honored by the forwarder child as well.
|
||||
agent-dir = /home/anti/.decnet/agent
|
||||
; Updater cert bundle (required for `decnet updater`).
|
||||
updater-dir = /home/anti/.decnet/updater
|
||||
|
||||
|
||||
; ─── Master-only settings (read when mode=master) ─────────────────────────
|
||||
[master]
|
||||
; Main API (REST for the React dashboard). DECNET_API_HOST / _PORT.
|
||||
api-host = 0.0.0.0
|
||||
api-port = 8000
|
||||
; React dev-server dashboard (`decnet web`). DECNET_WEB_HOST / _PORT.
|
||||
web-host = 0.0.0.0
|
||||
web-port = 8080
|
||||
; Swarm controller (master-internal). DECNET_SWARMCTL_HOST isn't exposed
|
||||
; under that name today — this block is the forward-compatible spelling.
|
||||
; swarmctl-host = 127.0.0.1
|
||||
; swarmctl-port = 8770
|
||||
; Syslog-over-TLS listener bind address and port. DECNET_LISTENER_HOST and
|
||||
; DECNET_SWARM_SYSLOG_PORT. The listener is auto-spawned by `decnet swarmctl`.
|
||||
listener-host = 0.0.0.0
|
||||
swarm-syslog-port = 6514
|
||||
; Master CA dir (for enroll / swarm cert issuance).
|
||||
; ca-dir = /home/anti/.decnet/ca
|
||||
; JWT secret for the web API. MUST be set; 32+ bytes. Keep out of git.
|
||||
; jwt-secret = REPLACE_ME_WITH_A_32_BYTE_SECRET
|
||||
@@ -0,0 +1,12 @@
|
||||
"""DECNET — honeypot deception-network framework.
|
||||
|
||||
This __init__ runs once, on the first `import decnet.*`. It seeds
|
||||
os.environ from /etc/decnet/decnet.ini (if present) so that later
|
||||
module-level reads in decnet.env pick up the INI values as if they had
|
||||
been exported by the shell. Real env vars always win via setdefault().
|
||||
|
||||
Kept minimal on purpose — any heavier work belongs in a submodule.
|
||||
"""
|
||||
from decnet.config_ini import load_ini_config as _load_ini_config
|
||||
|
||||
_load_ini_config()
|
||||
|
||||
7
decnet/agent/__init__.py
Normal file
7
decnet/agent/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""DECNET worker agent — runs on every SWARM worker host.
|
||||
|
||||
Exposes an mTLS-protected FastAPI service the master's SWARM controller
|
||||
calls to deploy, mutate, and tear down deckies locally. The agent reuses
|
||||
the existing `decnet.engine.deployer` code path unchanged, so a worker runs
|
||||
deckies the same way `decnet deploy --mode unihost` does today.
|
||||
"""
|
||||
144
decnet/agent/app.py
Normal file
144
decnet/agent/app.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""Worker-side FastAPI app.
|
||||
|
||||
Protected by mTLS at the ASGI/uvicorn transport layer: uvicorn is started
|
||||
with ``--ssl-ca-certs`` + ``--ssl-cert-reqs 2`` (CERT_REQUIRED), so any
|
||||
client that cannot prove a cert signed by the DECNET CA is rejected before
|
||||
reaching a handler. Once past the TLS handshake, all peers are trusted
|
||||
equally (the only entity holding a CA-signed cert is the master
|
||||
controller).
|
||||
|
||||
Endpoints mirror the existing unihost CLI verbs:
|
||||
|
||||
* ``POST /deploy`` — body: serialized ``DecnetConfig``
|
||||
* ``POST /teardown`` — body: optional ``{"decky_id": "..."}``
|
||||
* ``POST /mutate`` — body: ``{"decky_id": "...", "services": [...]}``
|
||||
* ``GET /status`` — deployment snapshot
|
||||
* ``GET /health`` — liveness probe, does NOT require mTLS? No — mTLS
|
||||
still required; master pings it with its cert.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from decnet.agent import executor as _exec
|
||||
from decnet.agent import heartbeat as _heartbeat
|
||||
from decnet.config import DecnetConfig
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("agent.app")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _lifespan(app: FastAPI):
|
||||
# Best-effort: if identity/bundle plumbing isn't configured (e.g. dev
|
||||
# runs or non-enrolled hosts), heartbeat.start() is a silent no-op.
|
||||
_heartbeat.start()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
await _heartbeat.stop()
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="DECNET SWARM Agent",
|
||||
version="0.1.0",
|
||||
docs_url=None, # no interactive docs on worker — narrow attack surface
|
||||
redoc_url=None,
|
||||
openapi_url=None,
|
||||
lifespan=_lifespan,
|
||||
responses={
|
||||
400: {"description": "Malformed request body"},
|
||||
500: {"description": "Executor error"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ schemas
|
||||
|
||||
class DeployRequest(BaseModel):
|
||||
config: DecnetConfig = Field(..., description="Full DecnetConfig to materialise on this worker")
|
||||
dry_run: bool = False
|
||||
no_cache: bool = False
|
||||
|
||||
|
||||
class TeardownRequest(BaseModel):
|
||||
decky_id: Optional[str] = None
|
||||
|
||||
|
||||
class MutateRequest(BaseModel):
|
||||
decky_id: str
|
||||
services: list[str]
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ routes
|
||||
|
||||
@app.get("/health")
|
||||
async def health() -> dict[str, str]:
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.get("/status")
|
||||
async def status() -> dict:
|
||||
return await _exec.status()
|
||||
|
||||
|
||||
@app.post(
|
||||
"/deploy",
|
||||
responses={500: {"description": "Deployer raised an exception materialising the config"}},
|
||||
)
|
||||
async def deploy(req: DeployRequest) -> dict:
|
||||
try:
|
||||
await _exec.deploy(req.config, dry_run=req.dry_run, no_cache=req.no_cache)
|
||||
except Exception as exc:
|
||||
log.exception("agent.deploy failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
return {"status": "deployed", "deckies": len(req.config.deckies)}
|
||||
|
||||
|
||||
@app.post(
|
||||
"/teardown",
|
||||
responses={500: {"description": "Teardown raised an exception"}},
|
||||
)
|
||||
async def teardown(req: TeardownRequest) -> dict:
|
||||
try:
|
||||
await _exec.teardown(req.decky_id)
|
||||
except Exception as exc:
|
||||
log.exception("agent.teardown failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
return {"status": "torn_down", "decky_id": req.decky_id}
|
||||
|
||||
|
||||
@app.post(
|
||||
"/self-destruct",
|
||||
responses={500: {"description": "Reaper could not be scheduled"}},
|
||||
)
|
||||
async def self_destruct() -> dict:
|
||||
"""Stop all DECNET services on this worker and delete the install
|
||||
footprint. Called by the master during decommission. Logs under
|
||||
/var/log/decnet* are preserved. Fire-and-forget — returns 202 before
|
||||
the reaper starts deleting files."""
|
||||
try:
|
||||
await _exec.self_destruct()
|
||||
except Exception as exc:
|
||||
log.exception("agent.self_destruct failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
return {"status": "self_destruct_scheduled"}
|
||||
|
||||
|
||||
@app.post(
|
||||
"/mutate",
|
||||
responses={501: {"description": "Worker-side mutate not yet implemented"}},
|
||||
)
|
||||
async def mutate(req: MutateRequest) -> dict:
|
||||
# TODO: implement worker-side mutate. Currently the master performs
|
||||
# mutation by re-sending a full /deploy with the updated DecnetConfig;
|
||||
# this avoids duplicating mutation logic on the worker for v1. When
|
||||
# ready, replace the 501 with a real redeploy-of-a-single-decky path.
|
||||
raise HTTPException(
|
||||
status_code=501,
|
||||
detail="Per-decky mutate is performed via /deploy with updated services",
|
||||
)
|
||||
223
decnet/agent/executor.py
Normal file
223
decnet/agent/executor.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""Thin adapter between the agent's HTTP endpoints and the existing
|
||||
``decnet.engine.deployer`` code path.
|
||||
|
||||
Kept deliberately small: the agent does not re-implement deployment logic,
|
||||
it only translates a master RPC into the same function calls the unihost
|
||||
CLI already uses. Everything runs in a worker thread (the deployer is
|
||||
blocking) so the FastAPI event loop stays responsive.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from ipaddress import IPv4Network
|
||||
from typing import Any
|
||||
|
||||
from decnet.engine import deployer as _deployer
|
||||
from decnet.config import DecnetConfig, load_state, clear_state
|
||||
from decnet.logging import get_logger
|
||||
from decnet.network import (
|
||||
allocate_ips,
|
||||
detect_interface,
|
||||
detect_subnet,
|
||||
get_host_ip,
|
||||
)
|
||||
|
||||
log = get_logger("agent.executor")
|
||||
|
||||
|
||||
def _relocalize(config: DecnetConfig) -> DecnetConfig:
|
||||
"""Rewrite a master-built config to the worker's local network reality.
|
||||
|
||||
The master populates ``interface``/``subnet``/``gateway`` from its own
|
||||
box before dispatching, which blows up the deployer on any worker whose
|
||||
NIC name differs (common in heterogeneous fleets — master on ``wlp6s0``,
|
||||
worker on ``enp0s3``). We always re-detect locally; if the worker sits
|
||||
on a different subnet than the master, decky IPs are re-allocated from
|
||||
the worker's subnet so they're actually reachable.
|
||||
"""
|
||||
local_iface = detect_interface()
|
||||
local_subnet, local_gateway = detect_subnet(local_iface)
|
||||
local_host_ip = get_host_ip(local_iface)
|
||||
|
||||
updates: dict[str, Any] = {
|
||||
"interface": local_iface,
|
||||
"subnet": local_subnet,
|
||||
"gateway": local_gateway,
|
||||
}
|
||||
|
||||
master_net = IPv4Network(config.subnet, strict=False) if config.subnet else None
|
||||
local_net = IPv4Network(local_subnet, strict=False)
|
||||
if master_net is None or master_net != local_net:
|
||||
log.info(
|
||||
"agent.deploy subnet mismatch master=%s local=%s — re-allocating decky IPs",
|
||||
config.subnet, local_subnet,
|
||||
)
|
||||
fresh_ips = allocate_ips(
|
||||
subnet=local_subnet,
|
||||
gateway=local_gateway,
|
||||
host_ip=local_host_ip,
|
||||
count=len(config.deckies),
|
||||
)
|
||||
new_deckies = [d.model_copy(update={"ip": ip}) for d, ip in zip(config.deckies, fresh_ips)]
|
||||
updates["deckies"] = new_deckies
|
||||
|
||||
return config.model_copy(update=updates)
|
||||
|
||||
|
||||
async def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False) -> None:
|
||||
"""Run the blocking deployer off-loop. The deployer itself calls
|
||||
save_state() internally once the compose file is materialised."""
|
||||
log.info(
|
||||
"agent.deploy mode=%s deckies=%d interface=%s (incoming)",
|
||||
config.mode, len(config.deckies), config.interface,
|
||||
)
|
||||
if config.mode == "swarm":
|
||||
config = _relocalize(config)
|
||||
log.info(
|
||||
"agent.deploy relocalized interface=%s subnet=%s gateway=%s",
|
||||
config.interface, config.subnet, config.gateway,
|
||||
)
|
||||
await asyncio.to_thread(_deployer.deploy, config, dry_run, no_cache, False)
|
||||
|
||||
|
||||
async def teardown(decky_id: str | None = None) -> None:
|
||||
log.info("agent.teardown decky_id=%s", decky_id)
|
||||
await asyncio.to_thread(_deployer.teardown, decky_id)
|
||||
if decky_id is None:
|
||||
await asyncio.to_thread(clear_state)
|
||||
|
||||
|
||||
def _decky_runtime_states(config: DecnetConfig) -> dict[str, dict[str, Any]]:
|
||||
"""Map decky_name → {"running": bool, "services": {svc: container_state}}.
|
||||
|
||||
Queried so the master can tell, after a partial-failure deploy, which
|
||||
deckies actually came up instead of tainting the whole shard as failed.
|
||||
Best-effort: a docker error returns an empty map, not an exception.
|
||||
"""
|
||||
try:
|
||||
import docker # local import — agent-only path
|
||||
client = docker.from_env()
|
||||
live = {c.name: c.status for c in client.containers.list(all=True, ignore_removed=True)}
|
||||
except Exception: # pragma: no cover — defensive
|
||||
log.exception("_decky_runtime_states: docker query failed")
|
||||
return {}
|
||||
|
||||
out: dict[str, dict[str, Any]] = {}
|
||||
for d in config.deckies:
|
||||
svc_states = {
|
||||
svc: live.get(f"{d.name}-{svc.replace('_', '-')}", "absent")
|
||||
for svc in d.services
|
||||
}
|
||||
out[d.name] = {
|
||||
"running": bool(svc_states) and all(s == "running" for s in svc_states.values()),
|
||||
"services": svc_states,
|
||||
}
|
||||
return out
|
||||
|
||||
|
||||
_REAPER_SCRIPT = r"""#!/bin/bash
|
||||
# DECNET agent self-destruct reaper.
|
||||
# Runs detached from the agent process so it survives the agent's death.
|
||||
# Waits briefly for the HTTP response to drain, then stops services,
|
||||
# wipes install paths, and preserves logs.
|
||||
set +e
|
||||
|
||||
sleep 3
|
||||
|
||||
# Stop decky containers started by the local deployer (best-effort).
|
||||
if command -v docker >/dev/null 2>&1; then
|
||||
docker ps -q --filter "label=com.docker.compose.project=decnet" | xargs -r docker stop
|
||||
docker ps -aq --filter "label=com.docker.compose.project=decnet" | xargs -r docker rm -f
|
||||
docker network rm decnet_lan 2>/dev/null
|
||||
fi
|
||||
|
||||
# Stop+disable every systemd unit the installer may have dropped.
|
||||
for unit in decnet-agent decnet-engine decnet-collector decnet-forwarder decnet-prober decnet-sniffer decnet-updater; do
|
||||
systemctl stop "$unit" 2>/dev/null
|
||||
systemctl disable "$unit" 2>/dev/null
|
||||
done
|
||||
|
||||
# Nuke install paths. Logs under /var/log/decnet* are intentionally
|
||||
# preserved — the operator typically wants them for forensic review.
|
||||
rm -rf /opt/decnet* /var/lib/decnet/* /usr/local/bin/decnet* /etc/decnet
|
||||
rm -f /etc/systemd/system/decnet-*.service /etc/systemd/system/decnet-*.timer
|
||||
|
||||
systemctl daemon-reload 2>/dev/null
|
||||
rm -f "$0"
|
||||
"""
|
||||
|
||||
|
||||
async def self_destruct() -> None:
|
||||
"""Tear down deckies, then spawn a detached reaper that wipes the
|
||||
install footprint. Returns immediately so the HTTP response can drain
|
||||
before the reaper starts deleting files out from under the agent."""
|
||||
import os
|
||||
import shutil
|
||||
import subprocess # nosec B404
|
||||
import tempfile
|
||||
|
||||
# Best-effort teardown first — the reaper also runs docker stop, but
|
||||
# going through the deployer gives the host-macvlan/ipvlan helper a
|
||||
# chance to clean up routes cleanly.
|
||||
try:
|
||||
await asyncio.to_thread(_deployer.teardown, None)
|
||||
await asyncio.to_thread(clear_state)
|
||||
except Exception:
|
||||
log.exception("self_destruct: pre-reap teardown failed — reaper will force-stop containers")
|
||||
|
||||
# Reaper lives under /tmp so it survives rm -rf /opt/decnet*.
|
||||
fd, path = tempfile.mkstemp(prefix="decnet-reaper-", suffix=".sh", dir="/tmp") # nosec B108 — reaper must outlive /opt/decnet removal
|
||||
try:
|
||||
os.write(fd, _REAPER_SCRIPT.encode())
|
||||
finally:
|
||||
os.close(fd)
|
||||
os.chmod(path, 0o700) # nosec B103 — root-owned reaper, needs exec
|
||||
|
||||
# The reaper MUST run outside decnet-agent.service's cgroup — otherwise
|
||||
# `systemctl stop decnet-agent` SIGTERMs the whole cgroup (reaper included)
|
||||
# before rm -rf completes. `start_new_session=True` gets us a fresh POSIX
|
||||
# session but does NOT escape the systemd cgroup. So we prefer
|
||||
# `systemd-run --scope` (launches the command in a transient scope
|
||||
# detached from the caller's service), falling back to a bare Popen if
|
||||
# systemd-run is unavailable (non-systemd host / container).
|
||||
systemd_run = shutil.which("systemd-run")
|
||||
if systemd_run:
|
||||
argv = [
|
||||
systemd_run,
|
||||
"--collect",
|
||||
"--unit", f"decnet-reaper-{os.getpid()}",
|
||||
"--description", "DECNET agent self-destruct reaper",
|
||||
"/bin/bash", path,
|
||||
]
|
||||
spawn_kwargs = {"start_new_session": True}
|
||||
else:
|
||||
argv = ["/bin/bash", path]
|
||||
spawn_kwargs = {"start_new_session": True}
|
||||
|
||||
subprocess.Popen( # nosec B603
|
||||
argv,
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
close_fds=True,
|
||||
**spawn_kwargs,
|
||||
)
|
||||
log.warning(
|
||||
"self_destruct: reaper spawned path=%s via=%s — agent will die in ~3s",
|
||||
path, "systemd-run" if systemd_run else "popen",
|
||||
)
|
||||
|
||||
|
||||
async def status() -> dict[str, Any]:
|
||||
state = await asyncio.to_thread(load_state)
|
||||
if state is None:
|
||||
return {"deployed": False, "deckies": []}
|
||||
config, _compose_path = state
|
||||
runtime = await asyncio.to_thread(_decky_runtime_states, config)
|
||||
return {
|
||||
"deployed": True,
|
||||
"mode": config.mode,
|
||||
"compose_path": str(_compose_path),
|
||||
"deckies": [d.model_dump() for d in config.deckies],
|
||||
"runtime": runtime,
|
||||
}
|
||||
134
decnet/agent/heartbeat.py
Normal file
134
decnet/agent/heartbeat.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""Agent → master liveness heartbeat loop.
|
||||
|
||||
Every ``INTERVAL_S`` seconds the worker posts ``executor.status()`` to
|
||||
``POST <master>/swarm/heartbeat`` over mTLS. The master pins the
|
||||
presented client cert's SHA-256 against the ``SwarmHost`` row for the
|
||||
claimed ``host_uuid``; a match refreshes ``last_heartbeat`` + each
|
||||
``DeckyShard``'s snapshot + runtime state.
|
||||
|
||||
Identity comes from ``/etc/decnet/decnet.ini`` (seeded by the enroll
|
||||
bundle) — specifically ``DECNET_HOST_UUID`` and ``DECNET_MASTER_HOST``.
|
||||
The worker's existing ``~/.decnet/agent/`` bundle (or
|
||||
``/etc/decnet/agent/``) provides the mTLS client cert.
|
||||
|
||||
Started/stopped via the agent FastAPI app's lifespan. If identity
|
||||
plumbing is missing (pre-enrollment dev runs) the loop logs at DEBUG and
|
||||
declines to start — callers don't have to guard it.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import pathlib
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from decnet.agent import executor as _exec
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm import pki
|
||||
from decnet.swarm.log_forwarder import build_worker_ssl_context
|
||||
|
||||
log = get_logger("agent.heartbeat")
|
||||
|
||||
INTERVAL_S = 30.0
|
||||
_TIMEOUT = httpx.Timeout(connect=5.0, read=10.0, write=5.0, pool=5.0)
|
||||
|
||||
_task: Optional[asyncio.Task] = None
|
||||
|
||||
|
||||
def _resolve_agent_dir() -> pathlib.Path:
|
||||
"""Match the agent-dir resolution order used by the agent server:
|
||||
DECNET_AGENT_DIR env, else /etc/decnet/agent (production install),
|
||||
else ~/.decnet/agent (dev)."""
|
||||
import os
|
||||
env = os.environ.get("DECNET_AGENT_DIR")
|
||||
if env:
|
||||
return pathlib.Path(env)
|
||||
system = pathlib.Path("/etc/decnet/agent")
|
||||
if system.exists():
|
||||
return system
|
||||
return pki.DEFAULT_AGENT_DIR
|
||||
|
||||
|
||||
async def _tick(client: httpx.AsyncClient, url: str, host_uuid: str, agent_version: str) -> None:
|
||||
snap = await _exec.status()
|
||||
resp = await client.post(
|
||||
url,
|
||||
json={
|
||||
"host_uuid": host_uuid,
|
||||
"agent_version": agent_version,
|
||||
"status": snap,
|
||||
},
|
||||
)
|
||||
# 403 / 404 are terminal-ish — we still keep looping because an
|
||||
# operator may re-enrol the host mid-session, but we log loudly so
|
||||
# prod ops can spot cert-pinning drift.
|
||||
if resp.status_code == 204:
|
||||
return
|
||||
log.warning(
|
||||
"heartbeat rejected status=%d body=%s",
|
||||
resp.status_code, resp.text[:200],
|
||||
)
|
||||
|
||||
|
||||
async def _loop(url: str, host_uuid: str, agent_version: str, ssl_ctx) -> None:
|
||||
log.info("heartbeat loop starting url=%s host_uuid=%s interval=%ss",
|
||||
url, host_uuid, INTERVAL_S)
|
||||
async with httpx.AsyncClient(verify=ssl_ctx, timeout=_TIMEOUT) as client:
|
||||
while True:
|
||||
try:
|
||||
await _tick(client, url, host_uuid, agent_version)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception:
|
||||
log.exception("heartbeat tick failed — will retry in %ss", INTERVAL_S)
|
||||
await asyncio.sleep(INTERVAL_S)
|
||||
|
||||
|
||||
def start() -> Optional[asyncio.Task]:
|
||||
"""Kick off the background heartbeat task. No-op if identity is
|
||||
unconfigured (dev mode) — the caller doesn't need to check."""
|
||||
global _task
|
||||
from decnet.env import (
|
||||
DECNET_HOST_UUID,
|
||||
DECNET_MASTER_HOST,
|
||||
DECNET_SWARMCTL_PORT,
|
||||
)
|
||||
|
||||
if _task is not None and not _task.done():
|
||||
return _task
|
||||
if not DECNET_HOST_UUID or not DECNET_MASTER_HOST:
|
||||
log.debug("heartbeat not starting — DECNET_HOST_UUID or DECNET_MASTER_HOST unset")
|
||||
return None
|
||||
|
||||
agent_dir = _resolve_agent_dir()
|
||||
try:
|
||||
ssl_ctx = build_worker_ssl_context(agent_dir)
|
||||
except Exception:
|
||||
log.exception("heartbeat not starting — worker SSL context unavailable at %s", agent_dir)
|
||||
return None
|
||||
|
||||
try:
|
||||
from decnet import __version__ as _v
|
||||
agent_version = _v
|
||||
except Exception:
|
||||
agent_version = "unknown"
|
||||
|
||||
url = f"https://{DECNET_MASTER_HOST}:{DECNET_SWARMCTL_PORT}/swarm/heartbeat"
|
||||
_task = asyncio.create_task(
|
||||
_loop(url, DECNET_HOST_UUID, agent_version, ssl_ctx),
|
||||
name="agent-heartbeat",
|
||||
)
|
||||
return _task
|
||||
|
||||
|
||||
async def stop() -> None:
|
||||
global _task
|
||||
if _task is None:
|
||||
return
|
||||
_task.cancel()
|
||||
try:
|
||||
await _task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
_task = None
|
||||
70
decnet/agent/server.py
Normal file
70
decnet/agent/server.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Worker-agent uvicorn launcher.
|
||||
|
||||
Starts ``decnet.agent.app:app`` over HTTPS with mTLS enforcement. The
|
||||
worker must already have a bundle in ``~/.decnet/agent/`` (delivered by
|
||||
``decnet swarm enroll`` from the master); if it does not, we refuse to
|
||||
start — unauthenticated agents are not a supported mode.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import signal
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm import pki
|
||||
|
||||
log = get_logger("agent.server")
|
||||
|
||||
|
||||
def run(host: str, port: int, agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR) -> int:
|
||||
bundle = pki.load_worker_bundle(agent_dir)
|
||||
if bundle is None:
|
||||
print(
|
||||
f"[agent] No cert bundle at {agent_dir}. "
|
||||
f"Run `decnet swarm enroll` from the master first.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
|
||||
keyfile = agent_dir / "worker.key"
|
||||
certfile = agent_dir / "worker.crt"
|
||||
cafile = agent_dir / "ca.crt"
|
||||
|
||||
cmd = [
|
||||
sys.executable,
|
||||
"-m",
|
||||
"uvicorn",
|
||||
"decnet.agent.app:app",
|
||||
"--host",
|
||||
host,
|
||||
"--port",
|
||||
str(port),
|
||||
"--ssl-keyfile",
|
||||
str(keyfile),
|
||||
"--ssl-certfile",
|
||||
str(certfile),
|
||||
"--ssl-ca-certs",
|
||||
str(cafile),
|
||||
# 2 == ssl.CERT_REQUIRED — clients MUST present a CA-signed cert.
|
||||
"--ssl-cert-reqs",
|
||||
"2",
|
||||
]
|
||||
log.info("agent starting host=%s port=%d bundle=%s", host, port, agent_dir)
|
||||
# Own process group for clean Ctrl+C / SIGTERM propagation to uvicorn
|
||||
# workers (same pattern as `decnet api`).
|
||||
proc = subprocess.Popen(cmd, start_new_session=True) # nosec B603
|
||||
try:
|
||||
return proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
try:
|
||||
os.killpg(proc.pid, signal.SIGTERM)
|
||||
try:
|
||||
return proc.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
os.killpg(proc.pid, signal.SIGKILL)
|
||||
return proc.wait()
|
||||
except ProcessLookupError:
|
||||
return 0
|
||||
476
decnet/cli.py
476
decnet/cli.py
@@ -1,476 +0,0 @@
|
||||
"""
|
||||
DECNET CLI — entry point for all commands.
|
||||
|
||||
Usage:
|
||||
decnet deploy --mode unihost --deckies 5 --randomize-services
|
||||
decnet status
|
||||
decnet teardown [--all | --id decky-01]
|
||||
decnet services
|
||||
"""
|
||||
|
||||
import signal
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.env import (
|
||||
DECNET_API_HOST,
|
||||
DECNET_API_PORT,
|
||||
DECNET_INGEST_LOG_FILE,
|
||||
DECNET_WEB_HOST,
|
||||
DECNET_WEB_PORT,
|
||||
)
|
||||
from decnet.archetypes import Archetype, all_archetypes, get_archetype
|
||||
from decnet.config import (
|
||||
DeckyConfig,
|
||||
DecnetConfig,
|
||||
random_hostname,
|
||||
)
|
||||
from decnet.distros import all_distros, get_distro
|
||||
from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini
|
||||
from decnet.ini_loader import IniConfig, load_ini
|
||||
from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
|
||||
from decnet.services.registry import all_services
|
||||
|
||||
app = typer.Typer(
|
||||
name="decnet",
|
||||
help="Deploy a deception network of honeypot deckies on your LAN.",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
console = Console()
|
||||
|
||||
|
||||
def _kill_api() -> None:
|
||||
"""Find and kill any running DECNET API (uvicorn) or mutator processes."""
|
||||
import psutil
|
||||
import os
|
||||
|
||||
_killed: bool = False
|
||||
for _proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
||||
try:
|
||||
_cmd = _proc.info['cmdline']
|
||||
if not _cmd:
|
||||
continue
|
||||
if "uvicorn" in _cmd and "decnet.web.api:app" in _cmd:
|
||||
console.print(f"[yellow]Stopping DECNET API (PID {_proc.info['pid']})...[/]")
|
||||
os.kill(_proc.info['pid'], signal.SIGTERM)
|
||||
_killed = True
|
||||
elif "decnet.cli" in _cmd and "mutate" in _cmd and "--watch" in _cmd:
|
||||
console.print(f"[yellow]Stopping DECNET Mutator Watcher (PID {_proc.info['pid']})...[/]")
|
||||
os.kill(_proc.info['pid'], signal.SIGTERM)
|
||||
_killed = True
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
|
||||
if _killed:
|
||||
console.print("[green]Background processes stopped.[/]")
|
||||
|
||||
|
||||
@app.command()
|
||||
def api(
|
||||
port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"),
|
||||
host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"),
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"),
|
||||
) -> None:
|
||||
"""Run the DECNET API and Web Dashboard in standalone mode."""
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
import os
|
||||
|
||||
console.print(f"[green]Starting DECNET API on {host}:{port}...[/]")
|
||||
_env: dict[str, str] = os.environ.copy()
|
||||
_env["DECNET_INGEST_LOG_FILE"] = str(log_file)
|
||||
try:
|
||||
subprocess.run( # nosec B603 B404
|
||||
[sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", host, "--port", str(port)],
|
||||
env=_env
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||
|
||||
|
||||
@app.command()
|
||||
def deploy(
|
||||
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
|
||||
deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
|
||||
interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
|
||||
subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
|
||||
ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
|
||||
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
|
||||
randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
|
||||
distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
|
||||
randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
|
||||
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
|
||||
archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
|
||||
mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
|
||||
dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
|
||||
no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
|
||||
parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"),
|
||||
ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
|
||||
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
|
||||
api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"),
|
||||
api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"),
|
||||
) -> None:
|
||||
"""Deploy deckies to the LAN."""
|
||||
import os
|
||||
if mode not in ("unihost", "swarm"):
|
||||
console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Config-file path #
|
||||
# ------------------------------------------------------------------ #
|
||||
if config_file:
|
||||
try:
|
||||
ini = load_ini(config_file)
|
||||
except FileNotFoundError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
iface = interface or ini.interface or detect_interface()
|
||||
subnet_cidr = subnet or ini.subnet
|
||||
effective_gateway = ini.gateway
|
||||
if subnet_cidr is None:
|
||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||
elif effective_gateway is None:
|
||||
_, effective_gateway = detect_subnet(iface)
|
||||
|
||||
host_ip = get_host_ip(iface)
|
||||
console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} "
|
||||
f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} "
|
||||
f"[dim]Host IP:[/] {host_ip}")
|
||||
|
||||
if ini.custom_services:
|
||||
from decnet.custom_service import CustomService
|
||||
from decnet.services.registry import register_custom_service
|
||||
for cs in ini.custom_services:
|
||||
register_custom_service(
|
||||
CustomService(
|
||||
name=cs.name,
|
||||
image=cs.image,
|
||||
exec_cmd=cs.exec_cmd,
|
||||
ports=cs.ports,
|
||||
)
|
||||
)
|
||||
|
||||
effective_log_file = log_file
|
||||
try:
|
||||
decky_configs = build_deckies_from_ini(
|
||||
ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval
|
||||
)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
# ------------------------------------------------------------------ #
|
||||
# Classic CLI path #
|
||||
# ------------------------------------------------------------------ #
|
||||
else:
|
||||
if deckies is None:
|
||||
console.print("[red]--deckies is required when --config is not used.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
services_list = [s.strip() for s in services.split(",")] if services else None
|
||||
if services_list:
|
||||
known = set(all_service_names())
|
||||
unknown = [s for s in services_list if s not in known]
|
||||
if unknown:
|
||||
console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
arch: Archetype | None = None
|
||||
if archetype_name:
|
||||
try:
|
||||
arch = get_archetype(archetype_name)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if not services_list and not randomize_services and not arch:
|
||||
console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
iface = interface or detect_interface()
|
||||
if subnet is None:
|
||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||
else:
|
||||
subnet_cidr = subnet
|
||||
_, effective_gateway = detect_subnet(iface)
|
||||
|
||||
host_ip = get_host_ip(iface)
|
||||
console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} "
|
||||
f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}")
|
||||
|
||||
distros_list = [d.strip() for d in distro.split(",")] if distro else None
|
||||
if distros_list:
|
||||
try:
|
||||
for slug in distros_list:
|
||||
get_distro(slug)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
|
||||
decky_configs = build_deckies(
|
||||
deckies, ips, services_list, randomize_services,
|
||||
distros_explicit=distros_list, randomize_distros=randomize_distros,
|
||||
archetype=arch, mutate_interval=mutate_interval,
|
||||
)
|
||||
effective_log_file = log_file
|
||||
|
||||
if api and not effective_log_file:
|
||||
effective_log_file = os.path.join(os.getcwd(), "decnet.log")
|
||||
console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]")
|
||||
|
||||
config = DecnetConfig(
|
||||
mode=mode,
|
||||
interface=iface,
|
||||
subnet=subnet_cidr,
|
||||
gateway=effective_gateway,
|
||||
deckies=decky_configs,
|
||||
log_file=effective_log_file,
|
||||
ipvlan=ipvlan,
|
||||
mutate_interval=mutate_interval,
|
||||
)
|
||||
|
||||
from decnet.engine import deploy as _deploy
|
||||
_deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel)
|
||||
|
||||
if mutate_interval is not None and not dry_run:
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "mutate", "--watch"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start mutator watcher.[/]")
|
||||
|
||||
if effective_log_file and not dry_run and not api:
|
||||
import subprocess # noqa: F811 # nosec B404
|
||||
import sys
|
||||
from pathlib import Path as _Path
|
||||
_collector_err = _Path(effective_log_file).with_suffix(".collector.log")
|
||||
console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=open(_collector_err, "a"), # nosec B603
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
|
||||
if api and not dry_run:
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
console.print(f"[green]Starting DECNET API on port {api_port}...[/]")
|
||||
_env: dict[str, str] = os.environ.copy()
|
||||
_env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)],
|
||||
env=_env,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT
|
||||
)
|
||||
console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]")
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||
|
||||
|
||||
@app.command()
|
||||
def collect(
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"),
|
||||
) -> None:
|
||||
"""Stream Docker logs from all running decky service containers to a log file."""
|
||||
import asyncio
|
||||
from decnet.collector import log_collector_worker
|
||||
console.print(f"[bold cyan]Collector starting[/] → {log_file}")
|
||||
asyncio.run(log_collector_worker(log_file))
|
||||
|
||||
|
||||
@app.command()
|
||||
def mutate(
|
||||
watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"),
|
||||
decky_name: Optional[str] = typer.Option(None, "--decky", "-d", help="Force mutate a specific decky immediately"),
|
||||
force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"),
|
||||
) -> None:
|
||||
"""Manually trigger or continuously watch for decky mutation."""
|
||||
from decnet.mutator import mutate_decky, mutate_all, run_watch_loop
|
||||
|
||||
if watch:
|
||||
run_watch_loop()
|
||||
return
|
||||
|
||||
if decky_name:
|
||||
mutate_decky(decky_name)
|
||||
elif force_all:
|
||||
mutate_all(force=True)
|
||||
else:
|
||||
mutate_all(force=False)
|
||||
|
||||
|
||||
@app.command()
|
||||
def status() -> None:
|
||||
"""Show running deckies and their status."""
|
||||
from decnet.engine import status as _status
|
||||
_status()
|
||||
|
||||
|
||||
@app.command()
|
||||
def teardown(
|
||||
all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
|
||||
id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
|
||||
) -> None:
|
||||
"""Stop and remove deckies."""
|
||||
if not all_ and not id_:
|
||||
console.print("[red]Specify --all or --id <name>.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
from decnet.engine import teardown as _teardown
|
||||
_teardown(decky_id=id_)
|
||||
|
||||
if all_:
|
||||
_kill_api()
|
||||
|
||||
|
||||
@app.command(name="services")
|
||||
def list_services() -> None:
|
||||
"""List all registered honeypot service plugins."""
|
||||
svcs = all_services()
|
||||
table = Table(title="Available Services", show_lines=True)
|
||||
table.add_column("Name", style="bold cyan")
|
||||
table.add_column("Ports")
|
||||
table.add_column("Image")
|
||||
for name, svc in sorted(svcs.items()):
|
||||
table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
|
||||
console.print(table)
|
||||
|
||||
|
||||
@app.command(name="distros")
|
||||
def list_distros() -> None:
|
||||
"""List all available OS distro profiles for deckies."""
|
||||
table = Table(title="Available Distro Profiles", show_lines=True)
|
||||
table.add_column("Slug", style="bold cyan")
|
||||
table.add_column("Display Name")
|
||||
table.add_column("Docker Image", style="dim")
|
||||
for slug, profile in sorted(all_distros().items()):
|
||||
table.add_row(slug, profile.display_name, profile.image)
|
||||
console.print(table)
|
||||
|
||||
|
||||
@app.command(name="correlate")
|
||||
def correlate(
|
||||
log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"),
|
||||
min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"),
|
||||
output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"),
|
||||
emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"),
|
||||
) -> None:
|
||||
"""Analyse logs for cross-decky traversals and print the attacker movement graph."""
|
||||
import sys
|
||||
import json as _json
|
||||
from pathlib import Path
|
||||
from decnet.correlation.engine import CorrelationEngine
|
||||
|
||||
engine = CorrelationEngine()
|
||||
|
||||
if log_file:
|
||||
path = Path(log_file)
|
||||
if not path.exists():
|
||||
console.print(f"[red]Log file not found: {log_file}[/]")
|
||||
raise typer.Exit(1)
|
||||
engine.ingest_file(path)
|
||||
elif not sys.stdin.isatty():
|
||||
for line in sys.stdin:
|
||||
engine.ingest(line)
|
||||
else:
|
||||
console.print("[red]Provide --log-file or pipe log data via stdin.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
traversals = engine.traversals(min_deckies)
|
||||
|
||||
if output == "json":
|
||||
console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2))
|
||||
elif output == "syslog":
|
||||
for line in engine.traversal_syslog_lines(min_deckies):
|
||||
typer.echo(line)
|
||||
else:
|
||||
if not traversals:
|
||||
console.print(
|
||||
f"[yellow]No traversals detected "
|
||||
f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]"
|
||||
)
|
||||
else:
|
||||
console.print(engine.report_table(min_deckies))
|
||||
console.print(
|
||||
f"[dim]Parsed {engine.lines_parsed} lines · "
|
||||
f"indexed {engine.events_indexed} events · "
|
||||
f"{len(engine.all_attackers())} unique IPs · "
|
||||
f"[bold]{len(traversals)}[/] traversal(s)[/]"
|
||||
)
|
||||
|
||||
if emit_syslog:
|
||||
for line in engine.traversal_syslog_lines(min_deckies):
|
||||
typer.echo(line)
|
||||
|
||||
|
||||
@app.command(name="archetypes")
|
||||
def list_archetypes() -> None:
|
||||
"""List all machine archetype profiles."""
|
||||
table = Table(title="Machine Archetypes", show_lines=True)
|
||||
table.add_column("Slug", style="bold cyan")
|
||||
table.add_column("Display Name")
|
||||
table.add_column("Default Services", style="green")
|
||||
table.add_column("Description", style="dim")
|
||||
for slug, arch in sorted(all_archetypes().items()):
|
||||
table.add_row(
|
||||
slug,
|
||||
arch.display_name,
|
||||
", ".join(arch.services),
|
||||
arch.description,
|
||||
)
|
||||
console.print(table)
|
||||
|
||||
|
||||
@app.command(name="web")
|
||||
def serve_web(
|
||||
web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"),
|
||||
host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"),
|
||||
) -> None:
|
||||
"""Serve the DECNET Web Dashboard frontend."""
|
||||
import http.server
|
||||
import socketserver
|
||||
from pathlib import Path
|
||||
|
||||
dist_dir = Path(__file__).parent.parent / "decnet_web" / "dist"
|
||||
|
||||
if not dist_dir.exists():
|
||||
console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
path = self.translate_path(self.path)
|
||||
if not Path(path).exists() or Path(path).is_dir():
|
||||
self.path = "/index.html"
|
||||
return super().do_GET()
|
||||
|
||||
import os
|
||||
os.chdir(dist_dir)
|
||||
|
||||
with socketserver.TCPServer((host, web_port), SPAHTTPRequestHandler) as httpd:
|
||||
console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]")
|
||||
try:
|
||||
httpd.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[dim]Shutting down dashboard server.[/]")
|
||||
|
||||
if __name__ == '__main__': # pragma: no cover
|
||||
app()
|
||||
80
decnet/cli/__init__.py
Normal file
80
decnet/cli/__init__.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
DECNET CLI — entry point for all commands.
|
||||
|
||||
Usage:
|
||||
decnet deploy --mode unihost --deckies 5 --randomize-services
|
||||
decnet status
|
||||
decnet teardown [--all | --id decky-01]
|
||||
decnet services
|
||||
|
||||
Layout: each command module exports ``register(app)`` which attaches its
|
||||
commands to the passed Typer app. ``__init__.py`` builds the root app,
|
||||
calls every module's ``register`` in order, then runs the master-only
|
||||
gate. The gate must fire LAST so it sees the fully-populated dispatch
|
||||
table before filtering.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import (
|
||||
agent,
|
||||
api,
|
||||
db,
|
||||
deploy,
|
||||
forwarder,
|
||||
inventory,
|
||||
lifecycle,
|
||||
listener,
|
||||
profiler,
|
||||
sniffer,
|
||||
swarm,
|
||||
swarmctl,
|
||||
updater,
|
||||
web,
|
||||
workers,
|
||||
)
|
||||
from .gating import _gate_commands_by_mode
|
||||
from .utils import console as console, log as log
|
||||
|
||||
app = typer.Typer(
|
||||
name="decnet",
|
||||
help="Deploy a deception network of honeypot deckies on your LAN.",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
|
||||
# Order matches the old flat layout so `decnet --help` reads the same.
|
||||
for _mod in (
|
||||
api, swarmctl, agent, updater, listener, forwarder,
|
||||
swarm,
|
||||
deploy, lifecycle, workers, inventory,
|
||||
web, profiler, sniffer, db,
|
||||
):
|
||||
_mod.register(app)
|
||||
|
||||
_gate_commands_by_mode(app)
|
||||
|
||||
# Backwards-compat re-exports. Tests and third-party tooling import these
|
||||
# directly from ``decnet.cli``; the refactor must keep them resolvable.
|
||||
from .db import _db_reset_mysql_async # noqa: E402,F401
|
||||
from .gating import ( # noqa: E402,F401
|
||||
MASTER_ONLY_COMMANDS,
|
||||
MASTER_ONLY_GROUPS,
|
||||
_agent_mode_active,
|
||||
_require_master_mode,
|
||||
)
|
||||
from .utils import ( # noqa: E402,F401
|
||||
_daemonize,
|
||||
_http_request,
|
||||
_is_running,
|
||||
_kill_all_services,
|
||||
_pid_dir,
|
||||
_service_registry,
|
||||
_spawn_detached,
|
||||
_swarmctl_base_url,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
app()
|
||||
64
decnet/cli/agent.py
Normal file
64
decnet/cli/agent.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import pathlib as _pathlib
|
||||
import sys as _sys
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def agent(
|
||||
port: int = typer.Option(8765, "--port", help="Port for the worker agent"),
|
||||
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the worker agent"), # nosec B104
|
||||
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent, expanded under the running user's HOME — set this when running as sudo/root)"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
no_forwarder: bool = typer.Option(False, "--no-forwarder", help="Do not auto-spawn the log forwarder alongside the agent"),
|
||||
) -> None:
|
||||
"""Run the DECNET SWARM worker agent (requires a cert bundle in ~/.decnet/agent/).
|
||||
|
||||
By default, `decnet agent` auto-spawns `decnet forwarder` as a fully-
|
||||
detached sibling process so worker logs start flowing to the master
|
||||
without a second manual invocation. The forwarder survives agent
|
||||
restarts and crashes — if it dies on its own, restart it manually
|
||||
with `decnet forwarder --daemon …`. Pass --no-forwarder to skip.
|
||||
"""
|
||||
from decnet.agent import server as _agent_server
|
||||
from decnet.env import DECNET_SWARM_MASTER_HOST, DECNET_INGEST_LOG_FILE
|
||||
from decnet.swarm import pki as _pki
|
||||
|
||||
resolved_dir = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
|
||||
|
||||
if daemon:
|
||||
log.info("agent daemonizing host=%s port=%d", host, port)
|
||||
_utils._daemonize()
|
||||
|
||||
if not no_forwarder and DECNET_SWARM_MASTER_HOST:
|
||||
fw_argv = [
|
||||
_sys.executable, "-m", "decnet", "forwarder",
|
||||
"--master-host", DECNET_SWARM_MASTER_HOST,
|
||||
"--master-port", str(int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))),
|
||||
"--agent-dir", str(resolved_dir),
|
||||
"--log-file", str(DECNET_INGEST_LOG_FILE),
|
||||
"--daemon",
|
||||
]
|
||||
try:
|
||||
pid = _utils._spawn_detached(fw_argv, _utils._pid_dir() / "forwarder.pid")
|
||||
log.info("agent auto-spawned forwarder pid=%d master=%s", pid, DECNET_SWARM_MASTER_HOST)
|
||||
console.print(f"[dim]Auto-spawned forwarder (pid {pid}) → {DECNET_SWARM_MASTER_HOST}.[/]")
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("agent could not auto-spawn forwarder: %s", e)
|
||||
console.print(f"[yellow]forwarder auto-spawn skipped: {e}[/]")
|
||||
elif not no_forwarder:
|
||||
log.info("agent skipping forwarder auto-spawn (DECNET_SWARM_MASTER_HOST unset)")
|
||||
|
||||
log.info("agent command invoked host=%s port=%d dir=%s", host, port, resolved_dir)
|
||||
console.print(f"[green]Starting DECNET worker agent on {host}:{port} (mTLS)...[/]")
|
||||
rc = _agent_server.run(host, port, agent_dir=resolved_dir)
|
||||
if rc != 0:
|
||||
raise typer.Exit(rc)
|
||||
53
decnet/cli/api.py
Normal file
53
decnet/cli/api.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
|
||||
|
||||
from . import utils as _utils
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def api(
|
||||
port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"),
|
||||
host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"),
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
workers: int = typer.Option(1, "--workers", "-w", min=1, help="Number of uvicorn worker processes"),
|
||||
) -> None:
|
||||
"""Run the DECNET API and Web Dashboard in standalone mode."""
|
||||
_require_master_mode("api")
|
||||
if daemon:
|
||||
log.info("API daemonizing host=%s port=%d workers=%d", host, port, workers)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("API command invoked host=%s port=%d workers=%d", host, port, workers)
|
||||
console.print(f"[green]Starting DECNET API on {host}:{port} (workers={workers})...[/]")
|
||||
_env: dict[str, str] = os.environ.copy()
|
||||
_env["DECNET_INGEST_LOG_FILE"] = str(log_file)
|
||||
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.api:app",
|
||||
"--host", host, "--port", str(port), "--workers", str(workers)]
|
||||
try:
|
||||
proc = subprocess.Popen(_cmd, env=_env, start_new_session=True) # nosec B603 B404
|
||||
try:
|
||||
proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
try:
|
||||
os.killpg(proc.pid, signal.SIGTERM)
|
||||
try:
|
||||
proc.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
os.killpg(proc.pid, signal.SIGKILL)
|
||||
proc.wait()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||
130
decnet/cli/db.py
Normal file
130
decnet/cli/db.py
Normal file
@@ -0,0 +1,130 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.table import Table
|
||||
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
_DB_RESET_TABLES: tuple[str, ...] = (
|
||||
# Order matters for DROP TABLE: child FKs first.
|
||||
# - attacker_behavior FK-references attackers.
|
||||
# - decky_shards FK-references swarm_hosts.
|
||||
"attacker_behavior",
|
||||
"attackers",
|
||||
"logs",
|
||||
"bounty",
|
||||
"state",
|
||||
"users",
|
||||
"decky_shards",
|
||||
"swarm_hosts",
|
||||
)
|
||||
|
||||
|
||||
async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None:
|
||||
"""Inspect + (optionally) wipe a MySQL database. Pulled out of the CLI
|
||||
wrapper so tests can drive it without spawning a Typer runner."""
|
||||
from urllib.parse import urlparse
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
|
||||
db_name = urlparse(dsn).path.lstrip("/") or "(default)"
|
||||
engine = create_async_engine(dsn)
|
||||
try:
|
||||
rows: dict[str, int] = {}
|
||||
async with engine.connect() as conn:
|
||||
for tbl in _DB_RESET_TABLES:
|
||||
try:
|
||||
result = await conn.execute(text(f"SELECT COUNT(*) FROM `{tbl}`")) # nosec B608
|
||||
rows[tbl] = result.scalar() or 0
|
||||
except Exception: # noqa: BLE001 — ProgrammingError for missing table varies by driver
|
||||
rows[tbl] = -1
|
||||
|
||||
summary = Table(title=f"DECNET MySQL reset — database `{db_name}` (mode={mode})")
|
||||
summary.add_column("Table", style="cyan")
|
||||
summary.add_column("Rows", justify="right")
|
||||
for tbl, count in rows.items():
|
||||
summary.add_row(tbl, "[dim]missing[/]" if count < 0 else f"{count:,}")
|
||||
console.print(summary)
|
||||
|
||||
if not confirm:
|
||||
console.print(
|
||||
"[yellow]Dry-run only. Re-run with [bold]--i-know-what-im-doing[/] "
|
||||
"to actually execute.[/]"
|
||||
)
|
||||
return
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0"))
|
||||
for tbl in _DB_RESET_TABLES:
|
||||
if rows.get(tbl, -1) < 0:
|
||||
continue
|
||||
if mode == "truncate":
|
||||
await conn.execute(text(f"TRUNCATE TABLE `{tbl}`"))
|
||||
console.print(f"[green]✓ TRUNCATE {tbl}[/]")
|
||||
else:
|
||||
await conn.execute(text(f"DROP TABLE `{tbl}`"))
|
||||
console.print(f"[green]✓ DROP TABLE {tbl}[/]")
|
||||
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 1"))
|
||||
|
||||
console.print(f"[bold green]Done. Database `{db_name}` reset ({mode}).[/]")
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="db-reset")
|
||||
def db_reset(
|
||||
i_know: bool = typer.Option(
|
||||
False,
|
||||
"--i-know-what-im-doing",
|
||||
help="Required to actually execute. Without it, the command runs in dry-run mode.",
|
||||
),
|
||||
mode: str = typer.Option(
|
||||
"truncate",
|
||||
"--mode",
|
||||
help="truncate (wipe rows, keep schema) | drop-tables (DROP TABLE for each DECNET table)",
|
||||
),
|
||||
url: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--url",
|
||||
help="Override DECNET_DB_URL for this invocation (e.g. when cleanup needs admin creds).",
|
||||
),
|
||||
) -> None:
|
||||
"""Wipe the MySQL database used by the DECNET dashboard.
|
||||
|
||||
Destructive. Runs dry by default — pass --i-know-what-im-doing to commit.
|
||||
Only supported against MySQL; refuses to operate on SQLite.
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
if mode not in ("truncate", "drop-tables"):
|
||||
console.print(f"[red]Invalid --mode '{mode}'. Expected: truncate | drop-tables.[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
|
||||
if db_type != "mysql":
|
||||
console.print(
|
||||
f"[red]db-reset is MySQL-only (DECNET_DB_TYPE='{db_type}'). "
|
||||
f"For SQLite, just delete the decnet.db file.[/]"
|
||||
)
|
||||
raise typer.Exit(2)
|
||||
|
||||
dsn = url or os.environ.get("DECNET_DB_URL")
|
||||
if not dsn:
|
||||
from decnet.web.db.mysql.database import build_mysql_url
|
||||
try:
|
||||
dsn = build_mysql_url()
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(2) from e
|
||||
|
||||
log.info("db-reset invoked mode=%s confirm=%s", mode, i_know)
|
||||
try:
|
||||
asyncio.run(_db_reset_mysql_async(dsn, mode=mode, confirm=i_know))
|
||||
except Exception as e: # noqa: BLE001
|
||||
console.print(f"[red]db-reset failed: {e}[/]")
|
||||
raise typer.Exit(1) from e
|
||||
307
decnet/cli/deploy.py
Normal file
307
decnet/cli/deploy.py
Normal file
@@ -0,0 +1,307 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.archetypes import Archetype, get_archetype
|
||||
from decnet.config import DecnetConfig
|
||||
from decnet.distros import get_distro
|
||||
from decnet.env import DECNET_API_HOST, DECNET_INGEST_LOG_FILE
|
||||
from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini
|
||||
from decnet.ini_loader import load_ini
|
||||
from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
|
||||
|
||||
from . import utils as _utils
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def _deploy_swarm(config: "DecnetConfig", *, dry_run: bool, no_cache: bool) -> None:
|
||||
"""Shard deckies round-robin across enrolled workers and POST to swarmctl."""
|
||||
base = _utils._swarmctl_base_url(None)
|
||||
resp = _utils._http_request("GET", base + "/swarm/hosts?host_status=enrolled")
|
||||
enrolled = resp.json()
|
||||
resp2 = _utils._http_request("GET", base + "/swarm/hosts?host_status=active")
|
||||
active = resp2.json()
|
||||
workers = [*enrolled, *active]
|
||||
if not workers:
|
||||
console.print("[red]No enrolled workers — run `decnet swarm enroll ...` first.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
assigned: list = []
|
||||
for idx, d in enumerate(config.deckies):
|
||||
target = workers[idx % len(workers)]
|
||||
assigned.append(d.model_copy(update={"host_uuid": target["uuid"]}))
|
||||
config = config.model_copy(update={"deckies": assigned})
|
||||
|
||||
body = {"config": config.model_dump(mode="json"), "dry_run": dry_run, "no_cache": no_cache}
|
||||
console.print(f"[cyan]Dispatching {len(config.deckies)} deckies across {len(workers)} worker(s)...[/]")
|
||||
resp3 = _utils._http_request("POST", base + "/swarm/deploy", json_body=body, timeout=900.0)
|
||||
results = resp3.json().get("results", [])
|
||||
|
||||
table = Table(title="SWARM deploy results")
|
||||
for col in ("worker", "host_uuid", "ok", "detail"):
|
||||
table.add_column(col)
|
||||
any_failed = False
|
||||
for r in results:
|
||||
ok = bool(r.get("ok"))
|
||||
if not ok:
|
||||
any_failed = True
|
||||
detail = r.get("detail")
|
||||
if isinstance(detail, dict):
|
||||
detail = detail.get("status") or "ok"
|
||||
table.add_row(
|
||||
str(r.get("host_name") or ""),
|
||||
str(r.get("host_uuid") or ""),
|
||||
"[green]yes[/]" if ok else "[red]no[/]",
|
||||
str(detail)[:80],
|
||||
)
|
||||
console.print(table)
|
||||
if any_failed:
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def deploy(
|
||||
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
|
||||
deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
|
||||
interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
|
||||
subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
|
||||
ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
|
||||
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
|
||||
randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
|
||||
distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
|
||||
randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
|
||||
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
|
||||
archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
|
||||
mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
|
||||
dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
|
||||
no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
|
||||
parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"),
|
||||
ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
|
||||
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
|
||||
api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"),
|
||||
api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"),
|
||||
daemon: bool = typer.Option(False, "--daemon", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Deploy deckies to the LAN."""
|
||||
import os
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
from pathlib import Path as _Path
|
||||
|
||||
_require_master_mode("deploy")
|
||||
if daemon:
|
||||
log.info("deploy daemonizing mode=%s deckies=%s", mode, deckies)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("deploy command invoked mode=%s deckies=%s dry_run=%s", mode, deckies, dry_run)
|
||||
if mode not in ("unihost", "swarm"):
|
||||
console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if config_file:
|
||||
try:
|
||||
ini = load_ini(config_file)
|
||||
except FileNotFoundError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
iface = interface or ini.interface or detect_interface()
|
||||
subnet_cidr = subnet or ini.subnet
|
||||
effective_gateway = ini.gateway
|
||||
if subnet_cidr is None:
|
||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||
elif effective_gateway is None:
|
||||
_, effective_gateway = detect_subnet(iface)
|
||||
|
||||
host_ip = get_host_ip(iface)
|
||||
console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} "
|
||||
f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} "
|
||||
f"[dim]Host IP:[/] {host_ip}")
|
||||
|
||||
if ini.custom_services:
|
||||
from decnet.custom_service import CustomService
|
||||
from decnet.services.registry import register_custom_service
|
||||
for cs in ini.custom_services:
|
||||
register_custom_service(
|
||||
CustomService(
|
||||
name=cs.name,
|
||||
image=cs.image,
|
||||
exec_cmd=cs.exec_cmd,
|
||||
ports=cs.ports,
|
||||
)
|
||||
)
|
||||
|
||||
effective_log_file = log_file
|
||||
try:
|
||||
decky_configs = build_deckies_from_ini(
|
||||
ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval
|
||||
)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
if deckies is None:
|
||||
console.print("[red]--deckies is required when --config is not used.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
services_list = [s.strip() for s in services.split(",")] if services else None
|
||||
if services_list:
|
||||
known = set(all_service_names())
|
||||
unknown = [s for s in services_list if s not in known]
|
||||
if unknown:
|
||||
console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
arch: Archetype | None = None
|
||||
if archetype_name:
|
||||
try:
|
||||
arch = get_archetype(archetype_name)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if not services_list and not randomize_services and not arch:
|
||||
console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
iface = interface or detect_interface()
|
||||
if subnet is None:
|
||||
subnet_cidr, effective_gateway = detect_subnet(iface)
|
||||
else:
|
||||
subnet_cidr = subnet
|
||||
_, effective_gateway = detect_subnet(iface)
|
||||
|
||||
host_ip = get_host_ip(iface)
|
||||
console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} "
|
||||
f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}")
|
||||
|
||||
distros_list = [d.strip() for d in distro.split(",")] if distro else None
|
||||
if distros_list:
|
||||
try:
|
||||
for slug in distros_list:
|
||||
get_distro(slug)
|
||||
except ValueError as e:
|
||||
console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
|
||||
decky_configs = build_deckies(
|
||||
deckies, ips, services_list, randomize_services,
|
||||
distros_explicit=distros_list, randomize_distros=randomize_distros,
|
||||
archetype=arch, mutate_interval=mutate_interval,
|
||||
)
|
||||
effective_log_file = log_file
|
||||
|
||||
if api and not effective_log_file:
|
||||
effective_log_file = os.path.join(os.getcwd(), "decnet.log")
|
||||
console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]")
|
||||
|
||||
config = DecnetConfig(
|
||||
mode=mode,
|
||||
interface=iface,
|
||||
subnet=subnet_cidr,
|
||||
gateway=effective_gateway,
|
||||
deckies=decky_configs,
|
||||
log_file=effective_log_file,
|
||||
ipvlan=ipvlan,
|
||||
mutate_interval=mutate_interval,
|
||||
)
|
||||
|
||||
log.debug("deploy: config built deckies=%d interface=%s subnet=%s", len(config.deckies), config.interface, config.subnet)
|
||||
|
||||
if mode == "swarm":
|
||||
_deploy_swarm(config, dry_run=dry_run, no_cache=no_cache)
|
||||
if dry_run:
|
||||
log.info("deploy: swarm dry-run complete, no workers dispatched")
|
||||
else:
|
||||
log.info("deploy: swarm deployment complete deckies=%d", len(config.deckies))
|
||||
return
|
||||
|
||||
from decnet.engine import deploy as _deploy
|
||||
_deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel)
|
||||
if dry_run:
|
||||
log.info("deploy: dry-run complete, no containers started")
|
||||
else:
|
||||
log.info("deploy: deployment complete deckies=%d", len(config.deckies))
|
||||
|
||||
if mutate_interval is not None and not dry_run:
|
||||
console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "mutate", "--watch"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start mutator watcher.[/]")
|
||||
|
||||
if effective_log_file and not dry_run and not api:
|
||||
_collector_err = _Path(effective_log_file).with_suffix(".collector.log")
|
||||
console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=open(_collector_err, "a"),
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
|
||||
if api and not dry_run:
|
||||
console.print(f"[green]Starting DECNET API on port {api_port}...[/]")
|
||||
_env: dict[str, str] = os.environ.copy()
|
||||
_env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)],
|
||||
env=_env,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT
|
||||
)
|
||||
console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]")
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||
|
||||
if effective_log_file and not dry_run:
|
||||
console.print("[bold cyan]Starting DECNET-PROBER[/] (auto-discovers attackers from log stream)")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "probe", "--daemon", "--log-file", str(effective_log_file)],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start DECNET-PROBER.[/]")
|
||||
|
||||
if effective_log_file and not dry_run:
|
||||
console.print("[bold cyan]Starting DECNET-PROFILER[/] (builds attacker profiles from log stream)")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "profiler", "--daemon"],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start DECNET-PROFILER.[/]")
|
||||
|
||||
if effective_log_file and not dry_run:
|
||||
console.print("[bold cyan]Starting DECNET-SNIFFER[/] (passive network capture)")
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
[sys.executable, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", str(effective_log_file)],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start DECNET-SNIFFER.[/]")
|
||||
74
decnet/cli/forwarder.py
Normal file
74
decnet/cli/forwarder.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import pathlib
|
||||
import signal
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def forwarder(
|
||||
master_host: Optional[str] = typer.Option(None, "--master-host", help="Master listener hostname/IP (default: $DECNET_SWARM_MASTER_HOST)"),
|
||||
master_port: int = typer.Option(6514, "--master-port", help="Master listener TCP port (RFC 5425 default 6514)"),
|
||||
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Local RFC 5424 file to tail and forward"),
|
||||
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent)"),
|
||||
state_db: Optional[str] = typer.Option(None, "--state-db", help="Forwarder offset SQLite path (default: <agent_dir>/forwarder.db)"),
|
||||
poll_interval: float = typer.Option(0.5, "--poll-interval", help="Seconds between log file stat checks"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the worker-side syslog-over-TLS forwarder (RFC 5425, mTLS to master:6514)."""
|
||||
from decnet.env import DECNET_SWARM_MASTER_HOST
|
||||
from decnet.swarm import pki
|
||||
from decnet.swarm.log_forwarder import ForwarderConfig, run_forwarder
|
||||
|
||||
resolved_host = master_host or DECNET_SWARM_MASTER_HOST
|
||||
if not resolved_host:
|
||||
console.print("[red]--master-host is required (or set DECNET_SWARM_MASTER_HOST).[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
resolved_agent_dir = pathlib.Path(agent_dir) if agent_dir else pki.DEFAULT_AGENT_DIR
|
||||
if not (resolved_agent_dir / "worker.crt").exists():
|
||||
console.print(f"[red]No worker cert bundle at {resolved_agent_dir} — enroll from the master first.[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
if not log_file:
|
||||
console.print("[red]--log-file is required.[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
cfg = ForwarderConfig(
|
||||
log_path=pathlib.Path(log_file),
|
||||
master_host=resolved_host,
|
||||
master_port=master_port,
|
||||
agent_dir=resolved_agent_dir,
|
||||
state_db=pathlib.Path(state_db) if state_db else None,
|
||||
)
|
||||
|
||||
if daemon:
|
||||
log.info("forwarder daemonizing master=%s:%d log=%s", resolved_host, master_port, log_file)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("forwarder command invoked master=%s:%d log=%s", resolved_host, master_port, log_file)
|
||||
console.print(f"[green]Starting DECNET forwarder → {resolved_host}:{master_port} (mTLS)...[/]")
|
||||
|
||||
async def _main() -> None:
|
||||
stop = asyncio.Event()
|
||||
loop = asyncio.get_running_loop()
|
||||
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||
try:
|
||||
loop.add_signal_handler(sig, stop.set)
|
||||
except (NotImplementedError, RuntimeError): # pragma: no cover
|
||||
pass
|
||||
await run_forwarder(cfg, poll_interval=poll_interval, stop_event=stop)
|
||||
|
||||
try:
|
||||
asyncio.run(_main())
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
71
decnet/cli/gating.py
Normal file
71
decnet/cli/gating.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""Role-based CLI gating.
|
||||
|
||||
MAINTAINERS: when you add a new Typer command (or add_typer group) that is
|
||||
master-only, register its name in MASTER_ONLY_COMMANDS / MASTER_ONLY_GROUPS
|
||||
below. The gate is the only thing that:
|
||||
(a) hides the command from `decnet --help` on worker hosts, and
|
||||
(b) prevents a misconfigured worker from invoking master-side logic.
|
||||
Forgetting to register a new command is a role-boundary bug. Grep for
|
||||
MASTER_ONLY when touching command registration.
|
||||
|
||||
Worker-legitimate commands (NOT in these sets): agent, updater, forwarder,
|
||||
status, collect, probe, sniffer. Agents run deckies locally and should be
|
||||
able to inspect them + run the per-host microservices (collector streams
|
||||
container logs, prober characterizes attackers hitting this host, sniffer
|
||||
captures traffic). Mutator and Profiler stay master-only: the mutator
|
||||
orchestrates respawns across the swarm; the profiler rebuilds attacker
|
||||
profiles against the master DB (no per-host DB exists).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import typer
|
||||
|
||||
from .utils import console
|
||||
|
||||
MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
|
||||
"api", "swarmctl", "deploy", "redeploy", "teardown",
|
||||
"mutate", "listener", "profiler",
|
||||
"services", "distros", "correlate", "archetypes", "web",
|
||||
"db-reset",
|
||||
})
|
||||
MASTER_ONLY_GROUPS: frozenset[str] = frozenset({"swarm"})
|
||||
|
||||
|
||||
def _agent_mode_active() -> bool:
|
||||
"""True when the host is configured as an agent AND master commands are
|
||||
disallowed (the default for agents). Workers overriding this explicitly
|
||||
set DECNET_DISALLOW_MASTER=false to opt into hybrid use."""
|
||||
mode = os.environ.get("DECNET_MODE", "master").lower()
|
||||
disallow = os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true"
|
||||
return mode == "agent" and disallow
|
||||
|
||||
|
||||
def _require_master_mode(command_name: str) -> None:
|
||||
"""Defence-in-depth: called at the top of every master-only command body.
|
||||
|
||||
The registration-time gate in _gate_commands_by_mode() already hides
|
||||
these commands from Typer's dispatch table, but this check protects
|
||||
against direct function imports (e.g. from tests or third-party tools)
|
||||
that would bypass Typer entirely."""
|
||||
if _agent_mode_active():
|
||||
console.print(
|
||||
f"[red]`decnet {command_name}` is a master-only command; this host "
|
||||
f"is configured as an agent (DECNET_MODE=agent).[/]"
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
def _gate_commands_by_mode(_app: typer.Typer) -> None:
|
||||
if not _agent_mode_active():
|
||||
return
|
||||
_app.registered_commands = [
|
||||
c for c in _app.registered_commands
|
||||
if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS
|
||||
]
|
||||
_app.registered_groups = [
|
||||
g for g in _app.registered_groups
|
||||
if g.name not in MASTER_ONLY_GROUPS
|
||||
]
|
||||
52
decnet/cli/inventory.py
Normal file
52
decnet/cli/inventory.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.archetypes import all_archetypes
|
||||
from decnet.distros import all_distros
|
||||
from decnet.services.registry import all_services
|
||||
|
||||
from .utils import console
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="services")
|
||||
def list_services() -> None:
|
||||
"""List all registered honeypot service plugins."""
|
||||
svcs = all_services()
|
||||
table = Table(title="Available Services", show_lines=True)
|
||||
table.add_column("Name", style="bold cyan")
|
||||
table.add_column("Ports")
|
||||
table.add_column("Image")
|
||||
for name, svc in sorted(svcs.items()):
|
||||
table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
|
||||
console.print(table)
|
||||
|
||||
@app.command(name="distros")
|
||||
def list_distros() -> None:
|
||||
"""List all available OS distro profiles for deckies."""
|
||||
table = Table(title="Available Distro Profiles", show_lines=True)
|
||||
table.add_column("Slug", style="bold cyan")
|
||||
table.add_column("Display Name")
|
||||
table.add_column("Docker Image", style="dim")
|
||||
for slug, profile in sorted(all_distros().items()):
|
||||
table.add_row(slug, profile.display_name, profile.image)
|
||||
console.print(table)
|
||||
|
||||
@app.command(name="archetypes")
|
||||
def list_archetypes() -> None:
|
||||
"""List all machine archetype profiles."""
|
||||
table = Table(title="Machine Archetypes", show_lines=True)
|
||||
table.add_column("Slug", style="bold cyan")
|
||||
table.add_column("Display Name")
|
||||
table.add_column("Default Services", style="green")
|
||||
table.add_column("Description", style="dim")
|
||||
for slug, arch in sorted(all_archetypes().items()):
|
||||
table.add_row(
|
||||
slug,
|
||||
arch.display_name,
|
||||
", ".join(arch.services),
|
||||
arch.description,
|
||||
)
|
||||
console.print(table)
|
||||
97
decnet/cli/lifecycle.py
Normal file
97
decnet/cli/lifecycle.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess # nosec B404
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||
|
||||
from . import utils as _utils
|
||||
from .gating import _agent_mode_active, _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def redeploy(
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to the DECNET log file"),
|
||||
) -> None:
|
||||
"""Check running DECNET services and relaunch any that are down."""
|
||||
log.info("redeploy: checking services")
|
||||
registry = _utils._service_registry(str(log_file))
|
||||
|
||||
table = Table(title="DECNET Services", show_lines=True)
|
||||
table.add_column("Service", style="bold cyan")
|
||||
table.add_column("Status")
|
||||
table.add_column("PID", style="dim")
|
||||
table.add_column("Action")
|
||||
|
||||
relaunched = 0
|
||||
for name, match_fn, launch_args in registry:
|
||||
pid = _utils._is_running(match_fn)
|
||||
if pid is not None:
|
||||
table.add_row(name, "[green]UP[/]", str(pid), "—")
|
||||
else:
|
||||
try:
|
||||
subprocess.Popen( # nosec B603
|
||||
launch_args,
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
table.add_row(name, "[red]DOWN[/]", "—", "[green]relaunched[/]")
|
||||
relaunched += 1
|
||||
except (FileNotFoundError, subprocess.SubprocessError) as exc:
|
||||
table.add_row(name, "[red]DOWN[/]", "—", f"[red]failed: {exc}[/]")
|
||||
|
||||
console.print(table)
|
||||
if relaunched:
|
||||
console.print(f"[green]{relaunched} service(s) relaunched.[/]")
|
||||
else:
|
||||
console.print("[green]All services running.[/]")
|
||||
|
||||
@app.command()
|
||||
def status() -> None:
|
||||
"""Show running deckies and their status."""
|
||||
log.info("status command invoked")
|
||||
from decnet.engine import status as _status
|
||||
_status()
|
||||
|
||||
registry = _utils._service_registry(str(DECNET_INGEST_LOG_FILE))
|
||||
if _agent_mode_active():
|
||||
registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}]
|
||||
svc_table = Table(title="DECNET Services", show_lines=True)
|
||||
svc_table.add_column("Service", style="bold cyan")
|
||||
svc_table.add_column("Status")
|
||||
svc_table.add_column("PID", style="dim")
|
||||
|
||||
for name, match_fn, _launch_args in registry:
|
||||
pid = _utils._is_running(match_fn)
|
||||
if pid is not None:
|
||||
svc_table.add_row(name, "[green]UP[/]", str(pid))
|
||||
else:
|
||||
svc_table.add_row(name, "[red]DOWN[/]", "—")
|
||||
|
||||
console.print(svc_table)
|
||||
|
||||
@app.command()
|
||||
def teardown(
|
||||
all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
|
||||
id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
|
||||
) -> None:
|
||||
"""Stop and remove deckies."""
|
||||
_require_master_mode("teardown")
|
||||
if not all_ and not id_:
|
||||
console.print("[red]Specify --all or --id <name>.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
log.info("teardown command invoked all=%s id=%s", all_, id_)
|
||||
from decnet.engine import teardown as _teardown
|
||||
_teardown(decky_id=id_)
|
||||
log.info("teardown complete all=%s id=%s", all_, id_)
|
||||
|
||||
if all_:
|
||||
_utils._kill_all_services()
|
||||
57
decnet/cli/listener.py
Normal file
57
decnet/cli/listener.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import pathlib
|
||||
import signal
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def listener(
|
||||
bind_host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the master syslog-TLS listener"), # nosec B104
|
||||
bind_port: int = typer.Option(6514, "--port", help="Listener TCP port (RFC 5425 default 6514)"),
|
||||
log_path: Optional[str] = typer.Option(None, "--log-path", help="RFC 5424 forensic sink (default: ./master.log)"),
|
||||
json_path: Optional[str] = typer.Option(None, "--json-path", help="Parsed-JSON ingest sink (default: ./master.json)"),
|
||||
ca_dir: Optional[str] = typer.Option(None, "--ca-dir", help="DECNET CA dir (default: ~/.decnet/ca)"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the master-side syslog-over-TLS listener (RFC 5425, mTLS)."""
|
||||
from decnet.swarm import pki
|
||||
from decnet.swarm.log_listener import ListenerConfig, run_listener
|
||||
|
||||
resolved_ca_dir = pathlib.Path(ca_dir) if ca_dir else pki.DEFAULT_CA_DIR
|
||||
resolved_log = pathlib.Path(log_path) if log_path else pathlib.Path("master.log")
|
||||
resolved_json = pathlib.Path(json_path) if json_path else pathlib.Path("master.json")
|
||||
|
||||
cfg = ListenerConfig(
|
||||
log_path=resolved_log, json_path=resolved_json,
|
||||
bind_host=bind_host, bind_port=bind_port, ca_dir=resolved_ca_dir,
|
||||
)
|
||||
|
||||
if daemon:
|
||||
log.info("listener daemonizing host=%s port=%d", bind_host, bind_port)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("listener command invoked host=%s port=%d", bind_host, bind_port)
|
||||
console.print(f"[green]Starting DECNET log listener on {bind_host}:{bind_port} (mTLS)...[/]")
|
||||
|
||||
async def _main() -> None:
|
||||
stop = asyncio.Event()
|
||||
loop = asyncio.get_running_loop()
|
||||
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||
try:
|
||||
loop.add_signal_handler(sig, stop.set)
|
||||
except (NotImplementedError, RuntimeError): # pragma: no cover
|
||||
pass
|
||||
await run_listener(cfg, stop_event=stop)
|
||||
|
||||
try:
|
||||
asyncio.run(_main())
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
34
decnet/cli/profiler.py
Normal file
34
decnet/cli/profiler.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="profiler")
|
||||
def profiler_cmd(
|
||||
interval: int = typer.Option(30, "--interval", "-i", help="Seconds between profile rebuild cycles"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the attacker profiler as a standalone microservice."""
|
||||
import asyncio
|
||||
from decnet.profiler import attacker_profile_worker
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
if daemon:
|
||||
log.info("profiler daemonizing interval=%d", interval)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("profiler starting interval=%d", interval)
|
||||
console.print(f"[bold cyan]Profiler starting[/] (interval: {interval}s)")
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await attacker_profile_worker(repo, interval=interval)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Profiler stopped.[/]")
|
||||
31
decnet/cli/sniffer.py
Normal file
31
decnet/cli/sniffer.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="sniffer")
|
||||
def sniffer_cmd(
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write captured syslog + JSON records"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the network sniffer as a standalone microservice."""
|
||||
import asyncio
|
||||
from decnet.sniffer import sniffer_worker
|
||||
|
||||
if daemon:
|
||||
log.info("sniffer daemonizing log_file=%s", log_file)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("sniffer starting log_file=%s", log_file)
|
||||
console.print(f"[bold cyan]Sniffer starting[/] → {log_file}")
|
||||
|
||||
try:
|
||||
asyncio.run(sniffer_worker(log_file))
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Sniffer stopped.[/]")
|
||||
346
decnet/cli/swarm.py
Normal file
346
decnet/cli/swarm.py
Normal file
@@ -0,0 +1,346 @@
|
||||
"""`decnet swarm ...` — master-side operator commands (HTTP to local swarmctl)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.table import Table
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
swarm_app = typer.Typer(
|
||||
name="swarm",
|
||||
help="Manage swarm workers (enroll, list, decommission). Requires `decnet swarmctl` running.",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
app.add_typer(swarm_app, name="swarm")
|
||||
|
||||
@swarm_app.command("enroll")
|
||||
def swarm_enroll(
|
||||
name: str = typer.Option(..., "--name", help="Short hostname for the worker (also the cert CN)"),
|
||||
address: str = typer.Option(..., "--address", help="IP or DNS the master uses to reach the worker"),
|
||||
agent_port: int = typer.Option(8765, "--agent-port", help="Worker agent TCP port"),
|
||||
sans: Optional[str] = typer.Option(None, "--sans", help="Comma-separated extra SANs for the worker cert"),
|
||||
notes: Optional[str] = typer.Option(None, "--notes", help="Free-form operator notes"),
|
||||
out_dir: Optional[str] = typer.Option(None, "--out-dir", help="Write the bundle (ca.crt/worker.crt/worker.key) to this dir for scp"),
|
||||
updater: bool = typer.Option(False, "--updater", help="Also issue an updater-identity cert (CN=updater@<name>) for the remote self-updater"),
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL (default: 127.0.0.1:8770)"),
|
||||
) -> None:
|
||||
"""Issue a mTLS bundle for a new worker and register it in the swarm."""
|
||||
import pathlib as _pathlib
|
||||
|
||||
body: dict = {"name": name, "address": address, "agent_port": agent_port}
|
||||
if sans:
|
||||
body["sans"] = [s.strip() for s in sans.split(",") if s.strip()]
|
||||
if notes:
|
||||
body["notes"] = notes
|
||||
if updater:
|
||||
body["issue_updater_bundle"] = True
|
||||
|
||||
resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/enroll", json_body=body)
|
||||
data = resp.json()
|
||||
|
||||
console.print(f"[green]Enrolled worker:[/] {data['name']} "
|
||||
f"[dim]uuid=[/]{data['host_uuid']} "
|
||||
f"[dim]fingerprint=[/]{data['fingerprint']}")
|
||||
if data.get("updater"):
|
||||
console.print(f"[green] + updater identity[/] "
|
||||
f"[dim]fingerprint=[/]{data['updater']['fingerprint']}")
|
||||
|
||||
if out_dir:
|
||||
target = _pathlib.Path(out_dir).expanduser()
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
(target / "ca.crt").write_text(data["ca_cert_pem"])
|
||||
(target / "worker.crt").write_text(data["worker_cert_pem"])
|
||||
(target / "worker.key").write_text(data["worker_key_pem"])
|
||||
for leaf in ("worker.key",):
|
||||
try:
|
||||
(target / leaf).chmod(0o600)
|
||||
except OSError:
|
||||
pass
|
||||
console.print(f"[cyan]Agent bundle written to[/] {target}")
|
||||
|
||||
if data.get("updater"):
|
||||
upd_target = target.parent / f"{target.name}-updater"
|
||||
upd_target.mkdir(parents=True, exist_ok=True)
|
||||
(upd_target / "ca.crt").write_text(data["ca_cert_pem"])
|
||||
(upd_target / "updater.crt").write_text(data["updater"]["updater_cert_pem"])
|
||||
(upd_target / "updater.key").write_text(data["updater"]["updater_key_pem"])
|
||||
try:
|
||||
(upd_target / "updater.key").chmod(0o600)
|
||||
except OSError:
|
||||
pass
|
||||
console.print(f"[cyan]Updater bundle written to[/] {upd_target}")
|
||||
console.print("[dim]Ship the agent dir to ~/.decnet/agent/ and the updater dir to ~/.decnet/updater/ on the worker.[/]")
|
||||
else:
|
||||
console.print("[dim]Ship this directory to the worker at ~/.decnet/agent/ (or wherever `decnet agent --agent-dir` points).[/]")
|
||||
else:
|
||||
console.print("[yellow]No --out-dir given — bundle PEMs are in the JSON response; persist them before leaving this shell.[/]")
|
||||
|
||||
@swarm_app.command("list")
|
||||
def swarm_list(
|
||||
host_status: Optional[str] = typer.Option(None, "--status", help="Filter by status (enrolled|active|unreachable|decommissioned)"),
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||
) -> None:
|
||||
"""List enrolled workers."""
|
||||
q = f"?host_status={host_status}" if host_status else ""
|
||||
resp = _utils._http_request("GET", _utils._swarmctl_base_url(url) + "/swarm/hosts" + q)
|
||||
rows = resp.json()
|
||||
if not rows:
|
||||
console.print("[dim]No workers enrolled.[/]")
|
||||
return
|
||||
table = Table(title="DECNET swarm workers")
|
||||
for col in ("name", "address", "port", "status", "last heartbeat", "enrolled"):
|
||||
table.add_column(col)
|
||||
for r in rows:
|
||||
table.add_row(
|
||||
r.get("name") or "",
|
||||
r.get("address") or "",
|
||||
str(r.get("agent_port") or ""),
|
||||
r.get("status") or "",
|
||||
str(r.get("last_heartbeat") or "—"),
|
||||
str(r.get("enrolled_at") or "—"),
|
||||
)
|
||||
console.print(table)
|
||||
|
||||
@swarm_app.command("check")
|
||||
def swarm_check(
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||
json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
|
||||
) -> None:
|
||||
"""Actively probe every enrolled worker and refresh status + last_heartbeat."""
|
||||
resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/check", timeout=60.0)
|
||||
payload = resp.json()
|
||||
results = payload.get("results", [])
|
||||
|
||||
if json_out:
|
||||
console.print_json(data=payload)
|
||||
return
|
||||
|
||||
if not results:
|
||||
console.print("[dim]No workers enrolled.[/]")
|
||||
return
|
||||
|
||||
table = Table(title="DECNET swarm check")
|
||||
for col in ("name", "address", "reachable", "detail"):
|
||||
table.add_column(col)
|
||||
for r in results:
|
||||
reachable = r.get("reachable")
|
||||
mark = "[green]yes[/]" if reachable else "[red]no[/]"
|
||||
detail = r.get("detail")
|
||||
detail_str = "—"
|
||||
if isinstance(detail, dict):
|
||||
detail_str = detail.get("status") or ", ".join(f"{k}={v}" for k, v in detail.items())
|
||||
elif detail is not None:
|
||||
detail_str = str(detail)
|
||||
table.add_row(
|
||||
r.get("name") or "",
|
||||
r.get("address") or "",
|
||||
mark,
|
||||
detail_str,
|
||||
)
|
||||
console.print(table)
|
||||
|
||||
@swarm_app.command("update")
|
||||
def swarm_update(
|
||||
host: Optional[str] = typer.Option(None, "--host", help="Target worker (name or UUID). Omit with --all."),
|
||||
all_hosts: bool = typer.Option(False, "--all", help="Push to every enrolled worker."),
|
||||
include_self: bool = typer.Option(False, "--include-self", help="Also push to each updater's /update-self after a successful agent update."),
|
||||
root: Optional[str] = typer.Option(None, "--root", help="Source tree to tar (default: CWD)."),
|
||||
exclude: list[str] = typer.Option([], "--exclude", help="Additional exclude glob. Repeatable."),
|
||||
updater_port: int = typer.Option(8766, "--updater-port", help="Port the workers' updater listens on."),
|
||||
dry_run: bool = typer.Option(False, "--dry-run", help="Build the tarball and print stats; no network."),
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL."),
|
||||
) -> None:
|
||||
"""Push the current working tree to workers' self-updaters (with auto-rollback on failure)."""
|
||||
import asyncio
|
||||
import pathlib as _pathlib
|
||||
|
||||
from decnet.swarm.tar_tree import tar_working_tree, detect_git_sha
|
||||
from decnet.swarm.updater_client import UpdaterClient
|
||||
|
||||
if not (host or all_hosts):
|
||||
console.print("[red]Supply --host <name> or --all.[/]")
|
||||
raise typer.Exit(2)
|
||||
if host and all_hosts:
|
||||
console.print("[red]--host and --all are mutually exclusive.[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
base = _utils._swarmctl_base_url(url)
|
||||
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||
rows = resp.json()
|
||||
if host:
|
||||
targets = [r for r in rows if r.get("name") == host or r.get("uuid") == host]
|
||||
if not targets:
|
||||
console.print(f"[red]No enrolled worker matching '{host}'.[/]")
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
targets = [r for r in rows if r.get("status") != "decommissioned"]
|
||||
if not targets:
|
||||
console.print("[dim]No targets.[/]")
|
||||
return
|
||||
|
||||
tree_root = _pathlib.Path(root) if root else _pathlib.Path.cwd()
|
||||
sha = detect_git_sha(tree_root)
|
||||
console.print(f"[dim]Tarring[/] {tree_root} [dim]sha={sha or '(not a git repo)'}[/]")
|
||||
tarball = tar_working_tree(tree_root, extra_excludes=exclude)
|
||||
console.print(f"[dim]Tarball size:[/] {len(tarball):,} bytes")
|
||||
|
||||
if dry_run:
|
||||
console.print("[yellow]--dry-run: not pushing.[/]")
|
||||
for t in targets:
|
||||
console.print(f" would push to [cyan]{t.get('name')}[/] at {t.get('address')}:{updater_port}")
|
||||
return
|
||||
|
||||
async def _push_one(h: dict) -> dict:
|
||||
name = h.get("name") or h.get("uuid")
|
||||
out: dict = {"name": name, "address": h.get("address"), "agent": None, "self": None}
|
||||
try:
|
||||
async with UpdaterClient(h, updater_port=updater_port) as u:
|
||||
r = await u.update(tarball, sha=sha)
|
||||
out["agent"] = {"status": r.status_code, "body": r.json() if r.content else {}}
|
||||
if r.status_code == 200 and include_self:
|
||||
rs = await u.update_self(tarball, sha=sha)
|
||||
out["self"] = {"status": rs.status_code, "body": rs.json() if rs.content else {}}
|
||||
except Exception as exc: # noqa: BLE001
|
||||
out["error"] = f"{type(exc).__name__}: {exc}"
|
||||
return out
|
||||
|
||||
async def _push_all() -> list[dict]:
|
||||
return await asyncio.gather(*(_push_one(t) for t in targets))
|
||||
|
||||
results = asyncio.run(_push_all())
|
||||
|
||||
table = Table(title="DECNET swarm update")
|
||||
for col in ("host", "address", "agent", "self", "detail"):
|
||||
table.add_column(col)
|
||||
any_failure = False
|
||||
for r in results:
|
||||
agent = r.get("agent") or {}
|
||||
selff = r.get("self") or {}
|
||||
err = r.get("error")
|
||||
if err:
|
||||
any_failure = True
|
||||
table.add_row(r["name"], r.get("address") or "", "[red]error[/]", "—", err)
|
||||
continue
|
||||
a_status = agent.get("status")
|
||||
if a_status == 200:
|
||||
agent_cell = "[green]updated[/]"
|
||||
elif a_status == 409:
|
||||
agent_cell = "[yellow]rolled-back[/]"
|
||||
any_failure = True
|
||||
else:
|
||||
agent_cell = f"[red]{a_status}[/]"
|
||||
any_failure = True
|
||||
if not include_self:
|
||||
self_cell = "—"
|
||||
elif selff.get("status") == 200 or selff.get("status") is None:
|
||||
self_cell = "[green]ok[/]" if selff else "[dim]skipped[/]"
|
||||
else:
|
||||
self_cell = f"[red]{selff.get('status')}[/]"
|
||||
detail = ""
|
||||
body = agent.get("body") or {}
|
||||
if isinstance(body, dict):
|
||||
detail = body.get("release", {}).get("sha") or body.get("detail", {}).get("error") or ""
|
||||
table.add_row(r["name"], r.get("address") or "", agent_cell, self_cell, str(detail)[:80])
|
||||
console.print(table)
|
||||
|
||||
if any_failure:
|
||||
raise typer.Exit(1)
|
||||
|
||||
@swarm_app.command("deckies")
|
||||
def swarm_deckies(
|
||||
host: Optional[str] = typer.Option(None, "--host", help="Filter by worker name or UUID"),
|
||||
state: Optional[str] = typer.Option(None, "--state", help="Filter by shard state (pending|running|failed|torn_down)"),
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||
json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
|
||||
) -> None:
|
||||
"""List deployed deckies across the swarm with their owning worker host."""
|
||||
base = _utils._swarmctl_base_url(url)
|
||||
|
||||
host_uuid: Optional[str] = None
|
||||
if host:
|
||||
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||
rows = resp.json()
|
||||
match = next((r for r in rows if r.get("uuid") == host or r.get("name") == host), None)
|
||||
if match is None:
|
||||
console.print(f"[red]No enrolled worker matching '{host}'.[/]")
|
||||
raise typer.Exit(1)
|
||||
host_uuid = match["uuid"]
|
||||
|
||||
query = []
|
||||
if host_uuid:
|
||||
query.append(f"host_uuid={host_uuid}")
|
||||
if state:
|
||||
query.append(f"state={state}")
|
||||
path = "/swarm/deckies" + ("?" + "&".join(query) if query else "")
|
||||
|
||||
resp = _utils._http_request("GET", base + path)
|
||||
rows = resp.json()
|
||||
|
||||
if json_out:
|
||||
console.print_json(data=rows)
|
||||
return
|
||||
|
||||
if not rows:
|
||||
console.print("[dim]No deckies deployed.[/]")
|
||||
return
|
||||
|
||||
table = Table(title="DECNET swarm deckies")
|
||||
for col in ("decky", "host", "address", "state", "services"):
|
||||
table.add_column(col)
|
||||
for r in rows:
|
||||
services = ",".join(r.get("services") or []) or "—"
|
||||
state_val = r.get("state") or "pending"
|
||||
colored = {
|
||||
"running": f"[green]{state_val}[/]",
|
||||
"failed": f"[red]{state_val}[/]",
|
||||
"pending": f"[yellow]{state_val}[/]",
|
||||
"torn_down": f"[dim]{state_val}[/]",
|
||||
}.get(state_val, state_val)
|
||||
table.add_row(
|
||||
r.get("decky_name") or "",
|
||||
r.get("host_name") or "<unknown>",
|
||||
r.get("host_address") or "",
|
||||
colored,
|
||||
services,
|
||||
)
|
||||
console.print(table)
|
||||
|
||||
@swarm_app.command("decommission")
|
||||
def swarm_decommission(
|
||||
name: Optional[str] = typer.Option(None, "--name", help="Worker hostname"),
|
||||
uuid: Optional[str] = typer.Option(None, "--uuid", help="Worker UUID (skip lookup)"),
|
||||
url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
|
||||
yes: bool = typer.Option(False, "--yes", "-y", help="Skip interactive confirmation"),
|
||||
) -> None:
|
||||
"""Remove a worker from the swarm (cascades decky shard rows)."""
|
||||
if not (name or uuid):
|
||||
console.print("[red]Supply --name or --uuid.[/]")
|
||||
raise typer.Exit(2)
|
||||
|
||||
base = _utils._swarmctl_base_url(url)
|
||||
target_uuid = uuid
|
||||
target_name = name
|
||||
if target_uuid is None:
|
||||
resp = _utils._http_request("GET", base + "/swarm/hosts")
|
||||
rows = resp.json()
|
||||
match = next((r for r in rows if r.get("name") == name), None)
|
||||
if match is None:
|
||||
console.print(f"[red]No enrolled worker named '{name}'.[/]")
|
||||
raise typer.Exit(1)
|
||||
target_uuid = match["uuid"]
|
||||
target_name = match.get("name") or target_name
|
||||
|
||||
if not yes:
|
||||
confirm = typer.confirm(f"Decommission worker {target_name!r} ({target_uuid})?", default=False)
|
||||
if not confirm:
|
||||
console.print("[dim]Aborted.[/]")
|
||||
raise typer.Exit(0)
|
||||
|
||||
_utils._http_request("DELETE", f"{base}/swarm/hosts/{target_uuid}")
|
||||
console.print(f"[green]Decommissioned {target_name or target_uuid}.[/]")
|
||||
104
decnet/cli/swarmctl.py
Normal file
104
decnet/cli/swarmctl.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def swarmctl(
|
||||
port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
|
||||
host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
|
||||
tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
|
||||
cert: Optional[str] = typer.Option(None, "--cert", help="BYOC: path to TLS server cert (PEM). Auto-issues from the DECNET CA if omitted."),
|
||||
key: Optional[str] = typer.Option(None, "--key", help="BYOC: path to TLS server private key (PEM)."),
|
||||
client_ca: Optional[str] = typer.Option(None, "--client-ca", help="CA bundle used to verify worker client certs. Defaults to the DECNET CA."),
|
||||
) -> None:
|
||||
"""Run the DECNET SWARM controller (master-side, separate process from `decnet api`).
|
||||
|
||||
By default, `decnet swarmctl` auto-spawns `decnet listener` as a fully-
|
||||
detached sibling process so the master starts accepting forwarder
|
||||
connections on 6514 without a second manual invocation. The listener
|
||||
survives swarmctl restarts and crashes — if it dies on its own,
|
||||
restart it manually with `decnet listener --daemon …`. Pass
|
||||
--no-listener to skip.
|
||||
|
||||
Pass ``--tls`` to serve over HTTPS with mutual-TLS enforcement. By
|
||||
default the server cert is auto-issued from the DECNET CA under
|
||||
``~/.decnet/swarmctl/`` so enrolled workers (which already ship that
|
||||
CA's ``ca.crt``) trust it out of the box. BYOC via ``--cert``/``--key``
|
||||
if you need a publicly-trusted or externally-managed cert.
|
||||
"""
|
||||
_require_master_mode("swarmctl")
|
||||
if daemon:
|
||||
log.info("swarmctl daemonizing host=%s port=%d", host, port)
|
||||
_utils._daemonize()
|
||||
|
||||
if not no_listener:
|
||||
listener_host = os.environ.get("DECNET_LISTENER_HOST", "0.0.0.0") # nosec B104
|
||||
listener_port = int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))
|
||||
lst_argv = [
|
||||
sys.executable, "-m", "decnet", "listener",
|
||||
"--host", listener_host,
|
||||
"--port", str(listener_port),
|
||||
"--daemon",
|
||||
]
|
||||
try:
|
||||
pid = _utils._spawn_detached(lst_argv, _utils._pid_dir() / "listener.pid")
|
||||
log.info("swarmctl auto-spawned listener pid=%d bind=%s:%d",
|
||||
pid, listener_host, listener_port)
|
||||
console.print(f"[dim]Auto-spawned listener (pid {pid}) on {listener_host}:{listener_port}.[/]")
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("swarmctl could not auto-spawn listener: %s", e)
|
||||
console.print(f"[yellow]listener auto-spawn skipped: {e}[/]")
|
||||
|
||||
log.info("swarmctl command invoked host=%s port=%d tls=%s", host, port, tls)
|
||||
scheme = "https" if tls else "http"
|
||||
console.print(f"[green]Starting DECNET SWARM controller on {scheme}://{host}:{port}...[/]")
|
||||
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app",
|
||||
"--host", host, "--port", str(port)]
|
||||
if tls:
|
||||
from decnet.swarm import pki as _pki
|
||||
if cert and key:
|
||||
cert_path, key_path = cert, key
|
||||
elif cert or key:
|
||||
console.print("[red]--cert and --key must be provided together.[/]")
|
||||
raise typer.Exit(code=2)
|
||||
else:
|
||||
auto_cert, auto_key, _auto_ca = _pki.ensure_swarmctl_cert(host)
|
||||
cert_path, key_path = str(auto_cert), str(auto_key)
|
||||
console.print(f"[dim]Auto-issued swarmctl server cert → {cert_path}[/]")
|
||||
ca_path = client_ca or str(_pki.DEFAULT_CA_DIR / "ca.crt")
|
||||
_cmd += [
|
||||
"--ssl-keyfile", key_path,
|
||||
"--ssl-certfile", cert_path,
|
||||
"--ssl-ca-certs", ca_path,
|
||||
"--ssl-cert-reqs", "2",
|
||||
]
|
||||
try:
|
||||
proc = subprocess.Popen(_cmd, start_new_session=True) # nosec B603 B404
|
||||
try:
|
||||
proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
try:
|
||||
os.killpg(proc.pid, signal.SIGTERM)
|
||||
try:
|
||||
proc.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
os.killpg(proc.pid, signal.SIGKILL)
|
||||
proc.wait()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||
46
decnet/cli/updater.py
Normal file
46
decnet/cli/updater.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pathlib as _pathlib
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def updater(
|
||||
port: int = typer.Option(8766, "--port", help="Port for the self-updater daemon"),
|
||||
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the updater"), # nosec B104
|
||||
updater_dir: Optional[str] = typer.Option(None, "--updater-dir", help="Updater cert bundle dir (default: ~/.decnet/updater)"),
|
||||
install_dir: Optional[str] = typer.Option(None, "--install-dir", help="Release install root (default: /opt/decnet)"),
|
||||
agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker agent cert bundle (for local /health probes; default: ~/.decnet/agent)"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the DECNET self-updater (requires a bundle in ~/.decnet/updater/)."""
|
||||
from decnet.swarm import pki as _pki
|
||||
from decnet.updater import server as _upd_server
|
||||
|
||||
resolved_updater = _pathlib.Path(updater_dir) if updater_dir else _upd_server.DEFAULT_UPDATER_DIR
|
||||
resolved_install = _pathlib.Path(install_dir) if install_dir else _pathlib.Path("/opt/decnet")
|
||||
resolved_agent = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
|
||||
|
||||
if daemon:
|
||||
log.info("updater daemonizing host=%s port=%d", host, port)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info(
|
||||
"updater command invoked host=%s port=%d updater_dir=%s install_dir=%s",
|
||||
host, port, resolved_updater, resolved_install,
|
||||
)
|
||||
console.print(f"[green]Starting DECNET self-updater on {host}:{port} (mTLS)...[/]")
|
||||
rc = _upd_server.run(
|
||||
host, port,
|
||||
updater_dir=resolved_updater,
|
||||
install_dir=resolved_install,
|
||||
agent_dir=resolved_agent,
|
||||
)
|
||||
if rc != 0:
|
||||
raise typer.Exit(rc)
|
||||
177
decnet/cli/utils.py
Normal file
177
decnet/cli/utils.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""Shared CLI helpers: console, logger, process management, swarm HTTP client.
|
||||
|
||||
Submodules reference these as ``from . import utils`` then ``utils.foo(...)``
|
||||
so tests can patch ``decnet.cli.utils.<name>`` and have every caller see it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
|
||||
|
||||
log = get_logger("cli")
|
||||
console = Console()
|
||||
|
||||
|
||||
def _daemonize() -> None:
|
||||
"""Fork the current process into a background daemon (Unix double-fork)."""
|
||||
if os.fork() > 0:
|
||||
raise SystemExit(0)
|
||||
os.setsid()
|
||||
if os.fork() > 0:
|
||||
raise SystemExit(0)
|
||||
sys.stdout = open(os.devnull, "w") # noqa: SIM115
|
||||
sys.stderr = open(os.devnull, "w") # noqa: SIM115
|
||||
sys.stdin = open(os.devnull, "r") # noqa: SIM115
|
||||
|
||||
|
||||
def _pid_dir() -> Path:
|
||||
"""Return the writable PID directory.
|
||||
|
||||
/opt/decnet when it exists and is writable (production), else
|
||||
~/.decnet (dev). The directory is created if needed."""
|
||||
candidates = [Path("/opt/decnet"), Path.home() / ".decnet"]
|
||||
for path in candidates:
|
||||
try:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
if os.access(path, os.W_OK):
|
||||
return path
|
||||
except (PermissionError, OSError):
|
||||
continue
|
||||
return Path("/tmp") # nosec B108
|
||||
|
||||
|
||||
def _spawn_detached(argv: list[str], pid_file: Path) -> int:
|
||||
"""Spawn a DECNET subcommand as a fully-independent sibling process.
|
||||
|
||||
The parent does NOT wait() on this child. start_new_session=True puts
|
||||
the child in its own session so SIGHUP on parent exit doesn't kill it;
|
||||
stdin/stdout/stderr go to /dev/null so the launching shell can close
|
||||
without EIO on the child. close_fds=True prevents inherited sockets
|
||||
from pinning ports we're trying to rebind.
|
||||
|
||||
This is deliberately NOT a supervisor — we fire-and-forget. If the
|
||||
child dies, the operator restarts it manually via its own subcommand.
|
||||
"""
|
||||
if pid_file.exists():
|
||||
try:
|
||||
existing = int(pid_file.read_text().strip())
|
||||
os.kill(existing, 0)
|
||||
return existing
|
||||
except (ValueError, ProcessLookupError, PermissionError, OSError):
|
||||
pass # stale pid_file — fall through and spawn
|
||||
|
||||
with open(os.devnull, "rb") as dn_in, open(os.devnull, "ab") as dn_out:
|
||||
proc = subprocess.Popen( # nosec B603
|
||||
argv,
|
||||
stdin=dn_in, stdout=dn_out, stderr=dn_out,
|
||||
start_new_session=True, close_fds=True,
|
||||
)
|
||||
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
pid_file.write_text(f"{proc.pid}\n")
|
||||
return proc.pid
|
||||
|
||||
|
||||
def _is_running(match_fn) -> int | None:
|
||||
"""Return PID of a running DECNET process matching ``match_fn(cmdline)``, or None."""
|
||||
import psutil
|
||||
|
||||
for proc in psutil.process_iter(["pid", "cmdline"]):
|
||||
try:
|
||||
cmd = proc.info["cmdline"]
|
||||
if cmd and match_fn(cmd):
|
||||
return proc.info["pid"]
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
|
||||
"""Return the microservice registry for health-check and relaunch.
|
||||
|
||||
On agents these run as systemd units invoking /usr/local/bin/decnet,
|
||||
which doesn't include "decnet.cli" in its cmdline. On master dev boxes
|
||||
they're launched via `python -m decnet.cli`. Match either form — cmd
|
||||
is a list of argv tokens, so substring-check the joined string.
|
||||
"""
|
||||
_py = sys.executable
|
||||
|
||||
def _matches(sub: str, extras: tuple[str, ...] = ()):
|
||||
def _check(cmd) -> bool:
|
||||
joined = " ".join(cmd) if not isinstance(cmd, str) else cmd
|
||||
if "decnet" not in joined:
|
||||
return False
|
||||
if sub not in joined:
|
||||
return False
|
||||
return all(e in joined for e in extras)
|
||||
return _check
|
||||
|
||||
return [
|
||||
("Collector", _matches("collect"),
|
||||
[_py, "-m", "decnet.cli", "collect", "--daemon", "--log-file", log_file]),
|
||||
("Mutator", _matches("mutate", ("--watch",)),
|
||||
[_py, "-m", "decnet.cli", "mutate", "--daemon", "--watch"]),
|
||||
("Prober", _matches("probe"),
|
||||
[_py, "-m", "decnet.cli", "probe", "--daemon", "--log-file", log_file]),
|
||||
("Profiler", _matches("profiler"),
|
||||
[_py, "-m", "decnet.cli", "profiler", "--daemon"]),
|
||||
("Sniffer", _matches("sniffer"),
|
||||
[_py, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", log_file]),
|
||||
("API",
|
||||
lambda cmd: "uvicorn" in cmd and "decnet.web.api:app" in cmd,
|
||||
[_py, "-m", "uvicorn", "decnet.web.api:app",
|
||||
"--host", DECNET_API_HOST, "--port", str(DECNET_API_PORT)]),
|
||||
]
|
||||
|
||||
|
||||
def _kill_all_services() -> None:
|
||||
"""Find and kill all running DECNET microservice processes."""
|
||||
registry = _service_registry(str(DECNET_INGEST_LOG_FILE))
|
||||
killed = 0
|
||||
for name, match_fn, _launch_args in registry:
|
||||
pid = _is_running(match_fn)
|
||||
if pid is not None:
|
||||
console.print(f"[yellow]Stopping {name} (PID {pid})...[/]")
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
killed += 1
|
||||
|
||||
if killed:
|
||||
console.print(f"[green]{killed} background process(es) stopped.[/]")
|
||||
else:
|
||||
console.print("[dim]No DECNET services were running.[/]")
|
||||
|
||||
|
||||
_DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770"
|
||||
|
||||
|
||||
def _swarmctl_base_url(url: Optional[str]) -> str:
|
||||
return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL)
|
||||
|
||||
|
||||
def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
|
||||
"""Tiny sync wrapper around httpx; avoids leaking async into the CLI."""
|
||||
import httpx
|
||||
try:
|
||||
resp = httpx.request(method, url, json=json_body, timeout=timeout)
|
||||
except httpx.HTTPError as exc:
|
||||
console.print(f"[red]Could not reach swarm controller at {url}: {exc}[/]")
|
||||
console.print("[dim]Is `decnet swarmctl` running?[/]")
|
||||
raise typer.Exit(2)
|
||||
if resp.status_code >= 400:
|
||||
try:
|
||||
detail = resp.json().get("detail", resp.text)
|
||||
except Exception: # nosec B110
|
||||
detail = resp.text
|
||||
console.print(f"[red]{method} {url} failed: {resp.status_code} — {detail}[/]")
|
||||
raise typer.Exit(1)
|
||||
return resp
|
||||
120
decnet/cli/web.py
Normal file
120
decnet/cli/web.py
Normal file
@@ -0,0 +1,120 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.env import DECNET_API_PORT, DECNET_WEB_HOST, DECNET_WEB_PORT
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="web")
|
||||
def serve_web(
|
||||
web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"),
|
||||
host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"),
|
||||
api_port: int = typer.Option(DECNET_API_PORT, "--api-port", help="Port the DECNET API is listening on"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Serve the DECNET Web Dashboard frontend.
|
||||
|
||||
Proxies /api/* requests to the API server so the frontend can use
|
||||
relative URLs (/api/v1/...) with no CORS configuration required.
|
||||
"""
|
||||
import http.client
|
||||
import http.server
|
||||
import os
|
||||
import socketserver
|
||||
from pathlib import Path
|
||||
|
||||
dist_dir = Path(__file__).resolve().parent.parent.parent / "decnet_web" / "dist"
|
||||
|
||||
if not dist_dir.exists():
|
||||
console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if daemon:
|
||||
log.info("web daemonizing host=%s port=%d api_port=%d", host, web_port, api_port)
|
||||
_utils._daemonize()
|
||||
|
||||
_api_port = api_port
|
||||
|
||||
class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("GET")
|
||||
return
|
||||
path = self.translate_path(self.path)
|
||||
if not Path(path).exists() or Path(path).is_dir():
|
||||
self.path = "/index.html"
|
||||
return super().do_GET()
|
||||
|
||||
def do_POST(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("POST")
|
||||
return
|
||||
self.send_error(405)
|
||||
|
||||
def do_PUT(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("PUT")
|
||||
return
|
||||
self.send_error(405)
|
||||
|
||||
def do_DELETE(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("DELETE")
|
||||
return
|
||||
self.send_error(405)
|
||||
|
||||
def _proxy(self, method: str) -> None:
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.rfile.read(content_length) if content_length else None
|
||||
|
||||
forward = {k: v for k, v in self.headers.items()
|
||||
if k.lower() not in ("host", "connection")}
|
||||
|
||||
try:
|
||||
conn = http.client.HTTPConnection("127.0.0.1", _api_port, timeout=120)
|
||||
conn.request(method, self.path, body=body, headers=forward)
|
||||
resp = conn.getresponse()
|
||||
|
||||
self.send_response(resp.status)
|
||||
for key, val in resp.getheaders():
|
||||
if key.lower() not in ("connection", "transfer-encoding"):
|
||||
self.send_header(key, val)
|
||||
self.end_headers()
|
||||
|
||||
content_type = resp.getheader("Content-Type", "")
|
||||
if "text/event-stream" in content_type:
|
||||
conn.sock.settimeout(None)
|
||||
|
||||
_read = getattr(resp, "read1", resp.read)
|
||||
while True:
|
||||
chunk = _read(4096)
|
||||
if not chunk:
|
||||
break
|
||||
self.wfile.write(chunk)
|
||||
self.wfile.flush()
|
||||
except Exception as exc:
|
||||
log.warning("web proxy error %s %s: %s", method, self.path, exc)
|
||||
self.send_error(502, f"API proxy error: {exc}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception: # nosec B110 — best-effort conn cleanup
|
||||
pass
|
||||
|
||||
def log_message(self, fmt: str, *args: object) -> None:
|
||||
log.debug("web %s", fmt % args)
|
||||
|
||||
os.chdir(dist_dir)
|
||||
|
||||
socketserver.TCPServer.allow_reuse_address = True
|
||||
with socketserver.ThreadingTCPServer((host, web_port), SPAHTTPRequestHandler) as httpd:
|
||||
console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]")
|
||||
console.print(f"[dim]Proxying /api/* → http://127.0.0.1:{_api_port}[/]")
|
||||
try:
|
||||
httpd.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[dim]Shutting down dashboard server.[/]")
|
||||
142
decnet/cli/workers.py
Normal file
142
decnet/cli/workers.py
Normal file
@@ -0,0 +1,142 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.env import DECNET_INGEST_LOG_FILE
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def probe(
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path for RFC 5424 syslog + .json output (reads attackers from .json, writes results to both)"),
|
||||
interval: int = typer.Option(300, "--interval", "-i", help="Seconds between probe cycles (default: 300)"),
|
||||
timeout: float = typer.Option(5.0, "--timeout", help="Per-probe TCP timeout in seconds"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background (used by deploy, no console output)"),
|
||||
) -> None:
|
||||
"""Fingerprint attackers (JARM + HASSH + TCP/IP stack) discovered in the log stream."""
|
||||
import asyncio
|
||||
from decnet.prober import prober_worker
|
||||
|
||||
if daemon:
|
||||
log.info("probe daemonizing log_file=%s interval=%d", log_file, interval)
|
||||
_utils._daemonize()
|
||||
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
|
||||
return
|
||||
|
||||
log.info("probe command invoked log_file=%s interval=%d", log_file, interval)
|
||||
console.print(f"[bold cyan]DECNET-PROBER[/] watching {log_file} for attackers (interval: {interval}s)")
|
||||
console.print("[dim]Press Ctrl+C to stop[/]")
|
||||
try:
|
||||
asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]DECNET-PROBER stopped.[/]")
|
||||
|
||||
@app.command()
|
||||
def collect(
|
||||
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Stream Docker logs from all running decky service containers to a log file."""
|
||||
import asyncio
|
||||
from decnet.collector import log_collector_worker
|
||||
|
||||
if daemon:
|
||||
log.info("collect daemonizing log_file=%s", log_file)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("collect command invoked log_file=%s", log_file)
|
||||
console.print(f"[bold cyan]Collector starting[/] → {log_file}")
|
||||
asyncio.run(log_collector_worker(log_file))
|
||||
|
||||
@app.command()
|
||||
def mutate(
|
||||
watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"),
|
||||
decky_name: Optional[str] = typer.Option(None, "--decky", help="Force mutate a specific decky immediately"),
|
||||
force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Manually trigger or continuously watch for decky mutation."""
|
||||
import asyncio
|
||||
from decnet.mutator import mutate_decky, mutate_all, run_watch_loop
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
if daemon:
|
||||
log.info("mutate daemonizing watch=%s", watch)
|
||||
_utils._daemonize()
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
if watch:
|
||||
await run_watch_loop(repo)
|
||||
elif decky_name:
|
||||
await mutate_decky(decky_name, repo)
|
||||
elif force_all:
|
||||
await mutate_all(force=True, repo=repo)
|
||||
else:
|
||||
await mutate_all(force=False, repo=repo)
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
@app.command(name="correlate")
|
||||
def correlate(
|
||||
log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"),
|
||||
min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"),
|
||||
output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"),
|
||||
emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Analyse logs for cross-decky traversals and print the attacker movement graph."""
|
||||
import sys
|
||||
import json as _json
|
||||
from pathlib import Path
|
||||
from decnet.correlation.engine import CorrelationEngine
|
||||
|
||||
if daemon:
|
||||
log.info("correlate daemonizing log_file=%s", log_file)
|
||||
_utils._daemonize()
|
||||
|
||||
engine = CorrelationEngine()
|
||||
|
||||
if log_file:
|
||||
path = Path(log_file)
|
||||
if not path.exists():
|
||||
console.print(f"[red]Log file not found: {log_file}[/]")
|
||||
raise typer.Exit(1)
|
||||
engine.ingest_file(path)
|
||||
elif not sys.stdin.isatty():
|
||||
for line in sys.stdin:
|
||||
engine.ingest(line)
|
||||
else:
|
||||
console.print("[red]Provide --log-file or pipe log data via stdin.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
traversals = engine.traversals(min_deckies)
|
||||
|
||||
if output == "json":
|
||||
console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2))
|
||||
elif output == "syslog":
|
||||
for line in engine.traversal_syslog_lines(min_deckies):
|
||||
typer.echo(line)
|
||||
else:
|
||||
if not traversals:
|
||||
console.print(
|
||||
f"[yellow]No traversals detected "
|
||||
f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]"
|
||||
)
|
||||
else:
|
||||
console.print(engine.report_table(min_deckies))
|
||||
console.print(
|
||||
f"[dim]Parsed {engine.lines_parsed} lines · "
|
||||
f"indexed {engine.events_indexed} events · "
|
||||
f"{len(engine.all_attackers())} unique IPs · "
|
||||
f"[bold]{len(traversals)}[/] traversal(s)[/]"
|
||||
)
|
||||
|
||||
if emit_syslog:
|
||||
for line in engine.traversal_syslog_lines(min_deckies):
|
||||
typer.echo(line)
|
||||
@@ -8,13 +8,100 @@ The ingester tails the .json file; rsyslog can consume the .log file independent
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
logger = logging.getLogger("decnet.collector")
|
||||
from decnet.logging import get_logger
|
||||
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer, inject_context as _inject_ctx
|
||||
|
||||
logger = get_logger("collector")
|
||||
|
||||
# ─── Ingestion rate limiter ───────────────────────────────────────────────────
|
||||
#
|
||||
# Rationale: connection-lifecycle events (connect/disconnect/accept/close) are
|
||||
# emitted once per TCP connection. During a portscan or credential-stuffing
|
||||
# run, a single attacker can generate hundreds of these per second from the
|
||||
# honeypot services themselves — each becoming a tiny WAL-write transaction
|
||||
# through the ingester, starving reads until the queue drains.
|
||||
#
|
||||
# The collector still writes every line to the raw .log file (forensic record
|
||||
# for rsyslog/SIEM). Only the .json path — which feeds SQLite — is deduped.
|
||||
#
|
||||
# Dedup key: (attacker_ip, decky, service, event_type)
|
||||
# Window: DECNET_COLLECTOR_RL_WINDOW_SEC seconds (default 1.0)
|
||||
# Scope: DECNET_COLLECTOR_RL_EVENT_TYPES comma list
|
||||
# (default: connect,disconnect,connection,accept,close)
|
||||
# Events outside that set bypass the limiter untouched.
|
||||
|
||||
def _parse_float_env(name: str, default: float) -> float:
|
||||
raw = os.environ.get(name)
|
||||
if raw is None:
|
||||
return default
|
||||
try:
|
||||
value = float(raw)
|
||||
except ValueError:
|
||||
logger.warning("collector: invalid %s=%r, using default %s", name, raw, default)
|
||||
return default
|
||||
return max(0.0, value)
|
||||
|
||||
|
||||
_RL_WINDOW_SEC: float = _parse_float_env("DECNET_COLLECTOR_RL_WINDOW_SEC", 1.0)
|
||||
_RL_EVENT_TYPES: frozenset[str] = frozenset(
|
||||
t.strip()
|
||||
for t in os.environ.get(
|
||||
"DECNET_COLLECTOR_RL_EVENT_TYPES",
|
||||
"connect,disconnect,connection,accept,close",
|
||||
).split(",")
|
||||
if t.strip()
|
||||
)
|
||||
_RL_MAX_ENTRIES: int = 10_000
|
||||
|
||||
_rl_lock: threading.Lock = threading.Lock()
|
||||
_rl_last: dict[tuple[str, str, str, str], float] = {}
|
||||
|
||||
|
||||
def _should_ingest(parsed: dict[str, Any]) -> bool:
|
||||
"""
|
||||
Return True if this parsed event should be written to the JSON ingestion
|
||||
stream. Rate-limited connection-lifecycle events return False when another
|
||||
event with the same (attacker_ip, decky, service, event_type) was emitted
|
||||
inside the dedup window.
|
||||
"""
|
||||
event_type = parsed.get("event_type", "")
|
||||
if _RL_WINDOW_SEC <= 0.0 or event_type not in _RL_EVENT_TYPES:
|
||||
return True
|
||||
key = (
|
||||
parsed.get("attacker_ip", "Unknown"),
|
||||
parsed.get("decky", ""),
|
||||
parsed.get("service", ""),
|
||||
event_type,
|
||||
)
|
||||
now = time.monotonic()
|
||||
with _rl_lock:
|
||||
last = _rl_last.get(key, 0.0)
|
||||
if now - last < _RL_WINDOW_SEC:
|
||||
return False
|
||||
_rl_last[key] = now
|
||||
# Opportunistic GC: when the map grows past the cap, drop entries older
|
||||
# than 60 windows (well outside any realistic in-flight dedup range).
|
||||
if len(_rl_last) > _RL_MAX_ENTRIES:
|
||||
cutoff = now - (_RL_WINDOW_SEC * 60.0)
|
||||
stale = [k for k, t in _rl_last.items() if t < cutoff]
|
||||
for k in stale:
|
||||
del _rl_last[k]
|
||||
return True
|
||||
|
||||
|
||||
def _reset_rate_limiter() -> None:
|
||||
"""Test-only helper — clear dedup state between test cases."""
|
||||
with _rl_lock:
|
||||
_rl_last.clear()
|
||||
|
||||
# ─── RFC 5424 parser ──────────────────────────────────────────────────────────
|
||||
|
||||
@@ -23,13 +110,22 @@ _RFC5424_RE = re.compile(
|
||||
r"(\S+) " # 1: TIMESTAMP
|
||||
r"(\S+) " # 2: HOSTNAME (decky name)
|
||||
r"(\S+) " # 3: APP-NAME (service)
|
||||
r"- " # PROCID always NILVALUE
|
||||
r"\S+ " # PROCID — NILVALUE ("-") for syslog_bridge emitters,
|
||||
# real PID for native syslog callers like sshd/sudo
|
||||
# routed through rsyslog. Accept both; we don't consume it.
|
||||
r"(\S+) " # 4: MSGID (event_type)
|
||||
r"(.+)$", # 5: SD element + optional MSG
|
||||
)
|
||||
_SD_BLOCK_RE = re.compile(r'\[decnet@55555\s+(.*?)\]', re.DOTALL)
|
||||
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip")
|
||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
|
||||
|
||||
# Free-form `key=value` pairs in the MSG body. Used for lines that bypass the
|
||||
# syslog_bridge SD format — e.g. the SSH container's PROMPT_COMMAND which
|
||||
# calls `logger -t bash "CMD uid=0 user=root src=1.2.3.4 pwd=/root cmd=…"`.
|
||||
# Values run until the next whitespace, so `cmd=…` at end-of-line is preserved
|
||||
# as one unit; we only care about IP-shaped fields here anyway.
|
||||
_MSG_KV_RE = re.compile(r'(\w+)=(\S+)')
|
||||
|
||||
|
||||
def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
||||
@@ -64,6 +160,19 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
||||
attacker_ip = fields[fname]
|
||||
break
|
||||
|
||||
# Fallback for plain `logger` callers that don't use SD params (notably
|
||||
# the SSH container's bash PROMPT_COMMAND: `logger -t bash "CMD … src=IP …"`).
|
||||
# Scan the MSG body for IP-shaped `key=value` tokens ONLY — don't fold
|
||||
# them into `fields`, because the frontend's parseEventBody already
|
||||
# renders kv pairs from the msg and doubling them up produces noisy
|
||||
# duplicate pills. This keeps attacker attribution working without
|
||||
# changing the shape of `fields` for non-SD lines.
|
||||
if attacker_ip == "Unknown" and msg:
|
||||
for k, v in _MSG_KV_RE.findall(msg):
|
||||
if k in _IP_FIELDS:
|
||||
attacker_ip = v
|
||||
break
|
||||
|
||||
try:
|
||||
ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
@@ -115,19 +224,37 @@ def is_service_event(attrs: dict) -> bool:
|
||||
|
||||
# ─── Blocking stream worker (runs in a thread) ────────────────────────────────
|
||||
|
||||
def _reopen_if_needed(path: Path, fh: Optional[Any]) -> Any:
|
||||
"""Return fh if it still points to the same inode as path; otherwise close
|
||||
fh and open a fresh handle. Handles the file being deleted (manual rm) or
|
||||
rotated (logrotate rename + create)."""
|
||||
try:
|
||||
if fh is not None and os.fstat(fh.fileno()).st_ino == os.stat(path).st_ino:
|
||||
return fh
|
||||
except OSError:
|
||||
pass
|
||||
# File gone or inode changed — close stale handle and open a new one.
|
||||
if fh is not None:
|
||||
try:
|
||||
fh.close()
|
||||
except Exception: # nosec B110 — best-effort file handle cleanup
|
||||
pass
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
return open(path, "a", encoding="utf-8")
|
||||
|
||||
|
||||
@_traced("collector.stream_container")
|
||||
def _stream_container(container_id: str, log_path: Path, json_path: Path) -> None:
|
||||
"""Stream logs from one container and append to the host log files."""
|
||||
import docker # type: ignore[import]
|
||||
|
||||
lf: Optional[Any] = None
|
||||
jf: Optional[Any] = None
|
||||
try:
|
||||
client = docker.from_env()
|
||||
container = client.containers.get(container_id)
|
||||
log_stream = container.logs(stream=True, follow=True, stdout=True, stderr=False)
|
||||
buf = ""
|
||||
with (
|
||||
open(log_path, "a", encoding="utf-8") as lf,
|
||||
open(json_path, "a", encoding="utf-8") as jf,
|
||||
):
|
||||
for chunk in log_stream:
|
||||
buf += chunk.decode("utf-8", errors="replace")
|
||||
while "\n" in buf:
|
||||
@@ -135,14 +262,40 @@ def _stream_container(container_id: str, log_path: Path, json_path: Path) -> Non
|
||||
line = line.rstrip()
|
||||
if not line:
|
||||
continue
|
||||
lf = _reopen_if_needed(log_path, lf)
|
||||
lf.write(line + "\n")
|
||||
lf.flush()
|
||||
parsed = parse_rfc5424(line)
|
||||
if parsed:
|
||||
if _should_ingest(parsed):
|
||||
_tracer = _get_tracer("collector")
|
||||
with _tracer.start_as_current_span("collector.event") as _span:
|
||||
_span.set_attribute("decky", parsed.get("decky", ""))
|
||||
_span.set_attribute("service", parsed.get("service", ""))
|
||||
_span.set_attribute("event_type", parsed.get("event_type", ""))
|
||||
_span.set_attribute("attacker_ip", parsed.get("attacker_ip", ""))
|
||||
_inject_ctx(parsed)
|
||||
logger.debug("collector: event written decky=%s type=%s", parsed.get("decky"), parsed.get("event_type"))
|
||||
jf = _reopen_if_needed(json_path, jf)
|
||||
jf.write(json.dumps(parsed) + "\n")
|
||||
jf.flush()
|
||||
else:
|
||||
logger.debug(
|
||||
"collector: rate-limited decky=%s service=%s type=%s attacker=%s",
|
||||
parsed.get("decky"), parsed.get("service"),
|
||||
parsed.get("event_type"), parsed.get("attacker_ip"),
|
||||
)
|
||||
else:
|
||||
logger.debug("collector: malformed RFC5424 line snippet=%r", line[:80])
|
||||
except Exception as exc:
|
||||
logger.debug("Log stream ended for container %s: %s", container_id, exc)
|
||||
logger.debug("collector: log stream ended container_id=%s reason=%s", container_id, exc)
|
||||
finally:
|
||||
for fh in (lf, jf):
|
||||
if fh is not None:
|
||||
try:
|
||||
fh.close()
|
||||
except Exception: # nosec B110 — best-effort file handle cleanup
|
||||
pass
|
||||
|
||||
|
||||
# ─── Async collector ──────────────────────────────────────────────────────────
|
||||
@@ -164,15 +317,26 @@ async def log_collector_worker(log_file: str) -> None:
|
||||
active: dict[str, asyncio.Task[None]] = {}
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
# Dedicated thread pool so long-running container log streams don't
|
||||
# saturate the default asyncio executor and starve short-lived
|
||||
# to_thread() calls elsewhere (e.g. load_state in the web API).
|
||||
collector_pool = ThreadPoolExecutor(
|
||||
max_workers=64, thread_name_prefix="decnet-collector",
|
||||
)
|
||||
|
||||
def _spawn(container_id: str, container_name: str) -> None:
|
||||
if container_id not in active or active[container_id].done():
|
||||
active[container_id] = asyncio.ensure_future(
|
||||
asyncio.to_thread(_stream_container, container_id, log_path, json_path),
|
||||
loop.run_in_executor(
|
||||
collector_pool, _stream_container,
|
||||
container_id, log_path, json_path,
|
||||
),
|
||||
loop=loop,
|
||||
)
|
||||
logger.info("Collecting logs from container: %s", container_name)
|
||||
logger.info("collector: streaming container=%s", container_name)
|
||||
|
||||
try:
|
||||
logger.info("collector started log_path=%s", log_path)
|
||||
client = docker.from_env()
|
||||
|
||||
for container in client.containers.list():
|
||||
@@ -190,11 +354,15 @@ async def log_collector_worker(log_file: str) -> None:
|
||||
if cid and is_service_event(attrs):
|
||||
loop.call_soon_threadsafe(_spawn, cid, name)
|
||||
|
||||
await asyncio.to_thread(_watch_events)
|
||||
await loop.run_in_executor(collector_pool, _watch_events)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("collector shutdown requested cancelling %d tasks", len(active))
|
||||
for task in active.values():
|
||||
task.cancel()
|
||||
collector_pool.shutdown(wait=False)
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error("Collector error: %s", exc)
|
||||
logger.error("collector error: %s", exc)
|
||||
finally:
|
||||
collector_pool.shutdown(wait=False)
|
||||
|
||||
@@ -64,6 +64,8 @@ def generate_compose(config: DecnetConfig) -> dict:
|
||||
# --- Service containers: share base network namespace ---
|
||||
for svc_name in decky.services:
|
||||
svc = get_service(svc_name)
|
||||
if svc.fleet_singleton:
|
||||
continue
|
||||
svc_cfg = decky.service_config.get(svc_name, {})
|
||||
fragment = svc.compose_fragment(decky.name, service_cfg=svc_cfg)
|
||||
|
||||
|
||||
131
decnet/config.py
131
decnet/config.py
@@ -4,13 +4,107 @@ State is persisted to decnet-state.json in the working directory.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket as _socket
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, field_validator # field_validator used by DeckyConfig
|
||||
from decnet.models import DeckyConfig, DecnetConfig # noqa: F401
|
||||
|
||||
from decnet.distros import random_hostname as _random_hostname
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RFC 5424 syslog formatter
|
||||
# ---------------------------------------------------------------------------
|
||||
# Severity mapping: Python level → syslog severity (RFC 5424 §6.2.1)
|
||||
_SYSLOG_SEVERITY: dict[int, int] = {
|
||||
logging.CRITICAL: 2, # Critical
|
||||
logging.ERROR: 3, # Error
|
||||
logging.WARNING: 4, # Warning
|
||||
logging.INFO: 6, # Informational
|
||||
logging.DEBUG: 7, # Debug
|
||||
}
|
||||
_FACILITY_LOCAL0 = 16 # local0 (RFC 5424 §6.2.1 / POSIX)
|
||||
|
||||
|
||||
class Rfc5424Formatter(logging.Formatter):
|
||||
"""Formats log records as RFC 5424 syslog messages.
|
||||
|
||||
Output:
|
||||
<PRIVAL>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID STRUCTURED-DATA MSG
|
||||
|
||||
Example:
|
||||
<134>1 2026-04-12T21:48:03.123456+00:00 host decnet 1234 decnet.config - Dev mode active
|
||||
"""
|
||||
|
||||
_hostname: str = _socket.gethostname()
|
||||
_app: str = "decnet"
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
severity = _SYSLOG_SEVERITY.get(record.levelno, 6)
|
||||
prival = (_FACILITY_LOCAL0 * 8) + severity
|
||||
ts = datetime.fromtimestamp(record.created, tz=timezone.utc).isoformat(timespec="microseconds")
|
||||
msg = record.getMessage()
|
||||
if record.exc_info:
|
||||
msg += "\n" + self.formatException(record.exc_info)
|
||||
app = getattr(record, "decnet_component", self._app)
|
||||
return (
|
||||
f"<{prival}>1 {ts} {self._hostname} {app}"
|
||||
f" {os.getpid()} {record.name} - {msg}"
|
||||
)
|
||||
|
||||
|
||||
def _configure_logging(dev: bool) -> None:
|
||||
"""Install RFC 5424 handlers on the root logger (idempotent).
|
||||
|
||||
Always adds a StreamHandler (stderr). Also adds a RotatingFileHandler
|
||||
writing to DECNET_SYSTEM_LOGS (default: decnet.system.log in $PWD) so
|
||||
all microservice daemons — which redirect stderr to /dev/null — still
|
||||
produce readable logs. File handler is skipped under pytest.
|
||||
"""
|
||||
from decnet.logging.inode_aware_handler import InodeAwareRotatingFileHandler
|
||||
|
||||
root = logging.getLogger()
|
||||
# Guard: if our StreamHandler is already installed, all handlers are set.
|
||||
if any(isinstance(h, logging.StreamHandler) and isinstance(h.formatter, Rfc5424Formatter)
|
||||
for h in root.handlers):
|
||||
return
|
||||
|
||||
fmt = Rfc5424Formatter()
|
||||
root.setLevel(logging.DEBUG if dev else logging.INFO)
|
||||
|
||||
stream_handler = logging.StreamHandler()
|
||||
stream_handler.setFormatter(fmt)
|
||||
root.addHandler(stream_handler)
|
||||
|
||||
# Skip the file handler during pytest runs to avoid polluting the test cwd.
|
||||
_in_pytest = any(k.startswith("PYTEST") for k in os.environ)
|
||||
if not _in_pytest:
|
||||
_log_path = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.log")
|
||||
file_handler = InodeAwareRotatingFileHandler(
|
||||
_log_path,
|
||||
mode="a",
|
||||
maxBytes=10 * 1024 * 1024, # 10 MB
|
||||
backupCount=5,
|
||||
encoding="utf-8",
|
||||
)
|
||||
file_handler.setFormatter(fmt)
|
||||
root.addHandler(file_handler)
|
||||
# Drop root ownership when invoked via sudo so non-root follow-up
|
||||
# commands (e.g. `decnet api` after `sudo decnet deploy`) can append.
|
||||
from decnet.privdrop import chown_to_invoking_user
|
||||
chown_to_invoking_user(_log_path)
|
||||
|
||||
|
||||
_dev = os.environ.get("DECNET_DEVELOPER", "").lower() == "true"
|
||||
_configure_logging(_dev)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
if _dev:
|
||||
log.debug("Developer mode: debug logging active")
|
||||
|
||||
# Calculate absolute path to the project root (where the config file resides)
|
||||
_ROOT: Path = Path(__file__).parent.parent.absolute()
|
||||
STATE_FILE: Path = _ROOT / "decnet-state.json"
|
||||
@@ -21,39 +115,6 @@ def random_hostname(distro_slug: str = "debian") -> str:
|
||||
return _random_hostname(distro_slug)
|
||||
|
||||
|
||||
class DeckyConfig(BaseModel):
|
||||
name: str
|
||||
ip: str
|
||||
services: list[str]
|
||||
distro: str # slug from distros.DISTROS, e.g. "debian", "ubuntu22"
|
||||
base_image: str # Docker image for the base/IP-holder container
|
||||
build_base: str = "debian:bookworm-slim" # apt-compatible image for service Dockerfiles
|
||||
hostname: str
|
||||
archetype: str | None = None # archetype slug if spawned from an archetype profile
|
||||
service_config: dict[str, dict] = {} # optional per-service persona config
|
||||
nmap_os: str = "linux" # OS family for TCP/IP stack spoofing (see os_fingerprint.py)
|
||||
mutate_interval: int | None = None # automatic rotation interval in minutes
|
||||
last_mutated: float = 0.0 # timestamp of last mutation
|
||||
|
||||
@field_validator("services")
|
||||
@classmethod
|
||||
def services_not_empty(cls, v: list[str]) -> list[str]:
|
||||
if not v:
|
||||
raise ValueError("A decky must have at least one service.")
|
||||
return v
|
||||
|
||||
|
||||
class DecnetConfig(BaseModel):
|
||||
mode: Literal["unihost", "swarm"]
|
||||
interface: str
|
||||
subnet: str
|
||||
gateway: str
|
||||
deckies: list[DeckyConfig]
|
||||
log_file: str | None = None # host path where the collector writes the log file
|
||||
ipvlan: bool = False # use IPvlan L2 instead of MACVLAN (WiFi-friendly)
|
||||
mutate_interval: int | None = DEFAULT_MUTATE_INTERVAL # global automatic rotation interval in minutes
|
||||
|
||||
|
||||
def save_state(config: DecnetConfig, compose_path: Path) -> None:
|
||||
payload = {
|
||||
"config": config.model_dump(),
|
||||
|
||||
90
decnet/config_ini.py
Normal file
90
decnet/config_ini.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""Parse /etc/decnet/decnet.ini and seed os.environ defaults.
|
||||
|
||||
The INI file is a convenience layer on top of the existing DECNET_* env
|
||||
vars. It never overrides an explicit environment variable (uses
|
||||
os.environ.setdefault). Call load_ini_config() once, very early, before
|
||||
any decnet.env import, so env.py picks up the seeded values as if they
|
||||
had been exported by the shell.
|
||||
|
||||
Shape::
|
||||
|
||||
[decnet]
|
||||
mode = agent # or "master"
|
||||
log-directory = /var/log/decnet
|
||||
disallow-master = true
|
||||
|
||||
[agent]
|
||||
master-host = 192.168.1.50
|
||||
master-port = 8770
|
||||
agent-port = 8765
|
||||
agent-dir = /home/anti/.decnet/agent
|
||||
...
|
||||
|
||||
[master]
|
||||
api-host = 0.0.0.0
|
||||
swarmctl-port = 8770
|
||||
listener-port = 6514
|
||||
...
|
||||
|
||||
Only the section matching `mode` is loaded. The other section is
|
||||
ignored silently so an agent host never reads master secrets (and
|
||||
vice versa). Keys are converted to SCREAMING_SNAKE_CASE and prefixed
|
||||
with ``DECNET_`` — e.g. ``master-host`` → ``DECNET_MASTER_HOST``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import configparser
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
DEFAULT_CONFIG_PATH = Path("/etc/decnet/decnet.ini")
|
||||
|
||||
# The [decnet] section keys are role-agnostic and always exported.
|
||||
_COMMON_KEYS = frozenset({"mode", "disallow-master", "log-directory"})
|
||||
|
||||
|
||||
def _key_to_env(key: str) -> str:
|
||||
return "DECNET_" + key.replace("-", "_").upper()
|
||||
|
||||
|
||||
def load_ini_config(path: Optional[Path] = None) -> Optional[Path]:
|
||||
"""Seed os.environ defaults from the DECNET INI file.
|
||||
|
||||
Returns the path that was actually loaded (so callers can log it), or
|
||||
None if no file was read. Missing file is a no-op — callers fall back
|
||||
to env vars / CLI flags / hardcoded defaults.
|
||||
|
||||
Precedence: real os.environ > INI > defaults. Real env vars are never
|
||||
overwritten because we use setdefault().
|
||||
"""
|
||||
if path is None:
|
||||
override = os.environ.get("DECNET_CONFIG")
|
||||
path = Path(override) if override else DEFAULT_CONFIG_PATH
|
||||
|
||||
if not path.is_file():
|
||||
return None
|
||||
|
||||
parser = configparser.ConfigParser()
|
||||
parser.read(path)
|
||||
|
||||
# [decnet] first — mode/disallow-master/log-directory. These seed the
|
||||
# mode decision for the section selection below.
|
||||
if parser.has_section("decnet"):
|
||||
for key, value in parser.items("decnet"):
|
||||
os.environ.setdefault(_key_to_env(key), value)
|
||||
|
||||
mode = os.environ.get("DECNET_MODE", "master").lower()
|
||||
if mode not in ("agent", "master"):
|
||||
raise ValueError(
|
||||
f"decnet.ini: [decnet] mode must be 'agent' or 'master', got '{mode}'"
|
||||
)
|
||||
|
||||
# Role-specific section.
|
||||
section = mode
|
||||
if parser.has_section(section):
|
||||
for key, value in parser.items(section):
|
||||
os.environ.setdefault(_key_to_env(key), value)
|
||||
|
||||
return path
|
||||
@@ -5,9 +5,9 @@ from decnet.correlation.graph import AttackerTraversal, TraversalHop
|
||||
from decnet.correlation.parser import LogEvent, parse_line
|
||||
|
||||
__all__ = [
|
||||
"CorrelationEngine",
|
||||
"AttackerTraversal",
|
||||
"TraversalHop",
|
||||
"CorrelationEngine",
|
||||
"LogEvent",
|
||||
"TraversalHop",
|
||||
"parse_line",
|
||||
]
|
||||
|
||||
@@ -33,6 +33,7 @@ from decnet.logging.syslog_formatter import (
|
||||
SEVERITY_WARNING,
|
||||
format_rfc5424,
|
||||
)
|
||||
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
|
||||
|
||||
|
||||
class CorrelationEngine:
|
||||
@@ -64,6 +65,7 @@ class CorrelationEngine:
|
||||
self.events_indexed += 1
|
||||
return event
|
||||
|
||||
@_traced("correlation.ingest_file")
|
||||
def ingest_file(self, path: Path) -> int:
|
||||
"""
|
||||
Parse every line of *path* and index it.
|
||||
@@ -73,12 +75,18 @@ class CorrelationEngine:
|
||||
with open(path) as fh:
|
||||
for line in fh:
|
||||
self.ingest(line)
|
||||
_tracer = _get_tracer("correlation")
|
||||
with _tracer.start_as_current_span("correlation.ingest_file.summary") as _span:
|
||||
_span.set_attribute("lines_parsed", self.lines_parsed)
|
||||
_span.set_attribute("events_indexed", self.events_indexed)
|
||||
_span.set_attribute("unique_ips", len(self._events))
|
||||
return self.events_indexed
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Query #
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
@_traced("correlation.traversals")
|
||||
def traversals(self, min_deckies: int = 2) -> list[AttackerTraversal]:
|
||||
"""
|
||||
Return all attackers that touched at least *min_deckies* distinct
|
||||
@@ -135,6 +143,7 @@ class CorrelationEngine:
|
||||
)
|
||||
return table
|
||||
|
||||
@_traced("correlation.report_json")
|
||||
def report_json(self, min_deckies: int = 2) -> dict:
|
||||
"""Serialisable dict representation of all traversals."""
|
||||
return {
|
||||
@@ -147,6 +156,7 @@ class CorrelationEngine:
|
||||
"traversals": [t.to_dict() for t in self.traversals(min_deckies)],
|
||||
}
|
||||
|
||||
@_traced("correlation.traversal_syslog_lines")
|
||||
def traversal_syslog_lines(self, min_deckies: int = 2) -> list[str]:
|
||||
"""
|
||||
Emit one RFC 5424 syslog line per detected traversal.
|
||||
|
||||
@@ -6,7 +6,7 @@ the fields needed for cross-decky correlation: attacker IP, decky name,
|
||||
service, event type, and timestamp.
|
||||
|
||||
Log format (produced by decnet.logging.syslog_formatter):
|
||||
<PRI>1 TIMESTAMP HOSTNAME APP-NAME - MSGID [decnet@55555 k1="v1" k2="v2"] [MSG]
|
||||
<PRI>1 TIMESTAMP HOSTNAME APP-NAME - MSGID [relay@55555 k1="v1" k2="v2"] [MSG]
|
||||
|
||||
The attacker IP may appear under several field names depending on service:
|
||||
src_ip — ftp, smtp, http, most services
|
||||
@@ -31,14 +31,14 @@ _RFC5424_RE = re.compile(
|
||||
r"(.+)$", # 5: SD element + optional MSG
|
||||
)
|
||||
|
||||
# Structured data block: [decnet@55555 k="v" ...]
|
||||
_SD_BLOCK_RE = re.compile(r'\[decnet@55555\s+(.*?)\]', re.DOTALL)
|
||||
# Structured data block: [relay@55555 k="v" ...]
|
||||
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||
|
||||
# Individual param: key="value" (with escaped chars inside value)
|
||||
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
||||
|
||||
# Field names to probe for attacker IP, in priority order
|
||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip")
|
||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -11,6 +11,8 @@ import docker
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.config import DecnetConfig, clear_state, load_state, save_state
|
||||
from decnet.composer import write_compose
|
||||
from decnet.network import (
|
||||
@@ -26,13 +28,14 @@ from decnet.network import (
|
||||
teardown_host_macvlan,
|
||||
)
|
||||
|
||||
log = get_logger("engine")
|
||||
console = Console()
|
||||
COMPOSE_FILE = Path("decnet-compose.yml")
|
||||
_CANONICAL_LOGGING = Path(__file__).parent.parent.parent / "templates" / "decnet_logging.py"
|
||||
_CANONICAL_LOGGING = Path(__file__).parent.parent / "templates" / "syslog_bridge.py"
|
||||
|
||||
|
||||
def _sync_logging_helper(config: DecnetConfig) -> None:
|
||||
"""Copy the canonical decnet_logging.py into every active template build context."""
|
||||
"""Copy the canonical syslog_bridge.py into every active template build context."""
|
||||
from decnet.services.registry import get_service
|
||||
seen: set[Path] = set()
|
||||
for decky in config.deckies:
|
||||
@@ -44,16 +47,32 @@ def _sync_logging_helper(config: DecnetConfig) -> None:
|
||||
if ctx is None or ctx in seen:
|
||||
continue
|
||||
seen.add(ctx)
|
||||
dest = ctx / "decnet_logging.py"
|
||||
dest = ctx / "syslog_bridge.py"
|
||||
if not dest.exists() or dest.read_bytes() != _CANONICAL_LOGGING.read_bytes():
|
||||
shutil.copy2(_CANONICAL_LOGGING, dest)
|
||||
|
||||
|
||||
def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
|
||||
import os
|
||||
cmd = ["docker", "compose", "-f", str(compose_file), *args]
|
||||
# -p decnet pins the compose project name. Without it, docker compose
|
||||
# derives the project from basename($PWD); when a daemon (systemd) runs
|
||||
# with WorkingDirectory=/ that basename is empty and compose aborts with
|
||||
# "project name must not be empty".
|
||||
cmd = ["docker", "compose", "-p", "decnet", "-f", str(compose_file), *args]
|
||||
merged = {**os.environ, **(env or {})}
|
||||
subprocess.run(cmd, check=True, env=merged) # nosec B603
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603
|
||||
if result.stdout:
|
||||
print(result.stdout, end="")
|
||||
if result.returncode != 0:
|
||||
# Docker emits the useful detail ("Address already in use", which IP,
|
||||
# which port) on stderr. Surface it to the structured log so the
|
||||
# agent's journal carries it — without this the upstream traceback
|
||||
# just shows the exit code.
|
||||
if result.stderr:
|
||||
log.error("docker compose %s failed: %s", " ".join(args), result.stderr.strip())
|
||||
raise subprocess.CalledProcessError(
|
||||
result.returncode, cmd, result.stdout, result.stderr
|
||||
)
|
||||
|
||||
|
||||
_PERMANENT_ERRORS = (
|
||||
@@ -65,6 +84,7 @@ _PERMANENT_ERRORS = (
|
||||
)
|
||||
|
||||
|
||||
@_traced("engine.compose_with_retry")
|
||||
def _compose_with_retry(
|
||||
*args: str,
|
||||
compose_file: Path = COMPOSE_FILE,
|
||||
@@ -75,7 +95,11 @@ def _compose_with_retry(
|
||||
"""Run a docker compose command, retrying on transient failures."""
|
||||
import os
|
||||
last_exc: subprocess.CalledProcessError | None = None
|
||||
cmd = ["docker", "compose", "-f", str(compose_file), *args]
|
||||
# -p decnet pins the compose project name. Without it, docker compose
|
||||
# derives the project from basename($PWD); when a daemon (systemd) runs
|
||||
# with WorkingDirectory=/ that basename is empty and compose aborts with
|
||||
# "project name must not be empty".
|
||||
cmd = ["docker", "compose", "-p", "decnet", "-f", str(compose_file), *args]
|
||||
merged = {**os.environ, **(env or {})}
|
||||
for attempt in range(1, retries + 1):
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603
|
||||
@@ -102,15 +126,21 @@ def _compose_with_retry(
|
||||
else:
|
||||
if result.stderr:
|
||||
console.print(f"[red]{result.stderr.strip()}[/]")
|
||||
log.error("docker compose %s failed after %d attempts: %s",
|
||||
" ".join(args), retries, result.stderr.strip())
|
||||
raise last_exc
|
||||
|
||||
|
||||
@_traced("engine.deploy")
|
||||
def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False, parallel: bool = False) -> None:
|
||||
log.info("deployment started n_deckies=%d interface=%s subnet=%s dry_run=%s", len(config.deckies), config.interface, config.subnet, dry_run)
|
||||
log.debug("deploy: deckies=%s", [d.name for d in config.deckies])
|
||||
client = docker.from_env()
|
||||
|
||||
ip_list = [d.ip for d in config.deckies]
|
||||
decky_range = ips_to_range(ip_list)
|
||||
host_ip = get_host_ip(config.interface)
|
||||
log.debug("deploy: ip_range=%s host_ip=%s", decky_range, host_ip)
|
||||
|
||||
net_driver = "IPvlan L2" if config.ipvlan else "MACVLAN"
|
||||
console.print(f"[bold cyan]Creating {net_driver} network[/] ({MACVLAN_NETWORK_NAME}) on {config.interface}")
|
||||
@@ -140,11 +170,21 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False,
|
||||
console.print(f"[bold cyan]Compose file written[/] → {compose_path}")
|
||||
|
||||
if dry_run:
|
||||
log.info("deployment dry-run complete compose_path=%s", compose_path)
|
||||
console.print("[yellow]Dry run — no containers started.[/]")
|
||||
return
|
||||
|
||||
save_state(config, compose_path)
|
||||
|
||||
# Pre-up cleanup: a prior half-failed `up` can leave containers still
|
||||
# holding the IPs/ports this run wants, which surfaces as the recurring
|
||||
# "Address already in use" from Docker's IPAM. Best-effort — ignore
|
||||
# failure (e.g. nothing to tear down on a clean host).
|
||||
try:
|
||||
_compose("down", "--remove-orphans", compose_file=compose_path)
|
||||
except subprocess.CalledProcessError:
|
||||
log.debug("pre-up cleanup: compose down failed (likely nothing to remove)")
|
||||
|
||||
build_env = {"DOCKER_BUILDKIT": "1"} if parallel else {}
|
||||
|
||||
console.print("[bold cyan]Building images and starting deckies...[/]")
|
||||
@@ -161,12 +201,16 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False,
|
||||
_compose_with_retry("build", "--no-cache", compose_file=compose_path)
|
||||
_compose_with_retry("up", "--build", "-d", compose_file=compose_path)
|
||||
|
||||
log.info("deployment complete n_deckies=%d", len(config.deckies))
|
||||
_print_status(config)
|
||||
|
||||
|
||||
@_traced("engine.teardown")
|
||||
def teardown(decky_id: str | None = None) -> None:
|
||||
log.info("teardown requested decky_id=%s", decky_id or "all")
|
||||
state = load_state()
|
||||
if state is None:
|
||||
log.warning("teardown: no active deployment found")
|
||||
console.print("[red]No active deployment found (no decnet-state.json).[/]")
|
||||
return
|
||||
|
||||
@@ -174,10 +218,14 @@ def teardown(decky_id: str | None = None) -> None:
|
||||
client = docker.from_env()
|
||||
|
||||
if decky_id:
|
||||
svc_names = [f"{decky_id}-{svc}" for svc in [d.services for d in config.deckies if d.name == decky_id]]
|
||||
if not svc_names:
|
||||
decky = next((d for d in config.deckies if d.name == decky_id), None)
|
||||
if decky is None:
|
||||
console.print(f"[red]Decky '{decky_id}' not found in current deployment.[/]")
|
||||
return
|
||||
svc_names = [f"{decky_id}-{svc}" for svc in decky.services]
|
||||
if not svc_names:
|
||||
log.warning("teardown: decky %s has no services to stop", decky_id)
|
||||
return
|
||||
_compose("stop", *svc_names, compose_file=compose_path)
|
||||
_compose("rm", "-f", *svc_names, compose_file=compose_path)
|
||||
else:
|
||||
@@ -193,6 +241,7 @@ def teardown(decky_id: str | None = None) -> None:
|
||||
clear_state()
|
||||
|
||||
net_driver = "IPvlan" if config.ipvlan else "MACVLAN"
|
||||
log.info("teardown complete all deckies removed network_driver=%s", net_driver)
|
||||
console.print(f"[green]All deckies torn down. {net_driver} network removed.[/]")
|
||||
|
||||
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Calculate absolute path to the project root
|
||||
_ROOT: Path = Path(__file__).parent.parent.absolute()
|
||||
|
||||
# Load .env.local first, then fallback to .env
|
||||
# Load .env.local first, then fallback to .env.
|
||||
# Also check CWD so deployments that install into site-packages (e.g. the
|
||||
# self-updater's release slots) can ship a per-host .env.local at the
|
||||
# process's working directory without having to edit site-packages.
|
||||
load_dotenv(_ROOT / ".env.local")
|
||||
load_dotenv(_ROOT / ".env")
|
||||
load_dotenv(Path.cwd() / ".env.local")
|
||||
load_dotenv(Path.cwd() / ".env")
|
||||
|
||||
|
||||
def _port(name: str, default: int) -> int:
|
||||
@@ -39,26 +45,109 @@ def _require_env(name: str) -> str:
|
||||
f"Environment variable '{name}' is set to an insecure default ('{value}'). "
|
||||
f"Choose a strong, unique value before starting DECNET."
|
||||
)
|
||||
if name == "DECNET_JWT_SECRET" and len(value) < 32:
|
||||
_developer = os.environ.get("DECNET_DEVELOPER", "False").lower() == "true"
|
||||
if not _developer:
|
||||
raise ValueError(
|
||||
f"DECNET_JWT_SECRET is too short ({len(value)} bytes). "
|
||||
f"Use at least 32 characters to satisfy HS256 requirements (RFC 7518 §3.2)."
|
||||
)
|
||||
return value
|
||||
|
||||
|
||||
# System logging — all microservice daemons append here.
|
||||
DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.log")
|
||||
|
||||
# Set to "true" to embed the profiler inside the API process.
|
||||
# Leave unset (default) when the standalone `decnet profiler --daemon` is
|
||||
# running — embedding both produces two workers sharing the same DB cursor,
|
||||
# which causes events to be skipped or processed twice.
|
||||
DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
|
||||
|
||||
# Set to "true" to embed the MACVLAN sniffer inside the API process.
|
||||
# Leave unset (default) when the standalone `decnet sniffer --daemon` is
|
||||
# running (which `decnet deploy` always does). Embedding both produces two
|
||||
# workers sniffing the same interface — duplicated events and wasted CPU.
|
||||
DECNET_EMBED_SNIFFER: bool = os.environ.get("DECNET_EMBED_SNIFFER", "").lower() == "true"
|
||||
|
||||
# Set to "true" to mount the Pyinstrument ASGI middleware on the FastAPI app.
|
||||
# Produces per-request HTML flamegraphs under ./profiles/. Off by default so
|
||||
# production and normal dev runs pay zero profiling overhead.
|
||||
DECNET_PROFILE_REQUESTS: bool = os.environ.get("DECNET_PROFILE_REQUESTS", "").lower() == "true"
|
||||
DECNET_PROFILE_DIR: str = os.environ.get("DECNET_PROFILE_DIR", "profiles")
|
||||
|
||||
# API Options
|
||||
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "0.0.0.0") # nosec B104
|
||||
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "127.0.0.1")
|
||||
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
|
||||
DECNET_JWT_SECRET: str = _require_env("DECNET_JWT_SECRET")
|
||||
# DECNET_JWT_SECRET is resolved lazily via module __getattr__ so that agent /
|
||||
# updater / swarmctl subcommands (which never touch auth) can start without
|
||||
# the master's JWT secret being present in the environment.
|
||||
DECNET_INGEST_LOG_FILE: str | None = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
|
||||
|
||||
# SWARM log pipeline — RFC 5425 syslog-over-TLS between worker forwarders
|
||||
# and the master listener. Plaintext syslog across hosts is forbidden.
|
||||
DECNET_SWARM_SYSLOG_PORT: int = _port("DECNET_SWARM_SYSLOG_PORT", 6514)
|
||||
DECNET_SWARM_MASTER_HOST: str | None = os.environ.get("DECNET_SWARM_MASTER_HOST")
|
||||
|
||||
# Worker-side identity + swarmctl locator, seeded by the enroll bundle's
|
||||
# /etc/decnet/decnet.ini ([agent] host-uuid / master-host / swarmctl-port).
|
||||
# The agent heartbeat loop uses these to self-identify to the master.
|
||||
DECNET_HOST_UUID: str | None = os.environ.get("DECNET_HOST_UUID")
|
||||
DECNET_MASTER_HOST: str | None = os.environ.get("DECNET_MASTER_HOST")
|
||||
DECNET_SWARMCTL_PORT: int = _port("DECNET_SWARMCTL_PORT", 8770)
|
||||
|
||||
# Ingester batching: how many log rows to accumulate per commit, and the
|
||||
# max wait (ms) before flushing a partial batch. Larger batches reduce
|
||||
# SQLite write-lock contention; the timeout keeps latency bounded during
|
||||
# low-traffic periods.
|
||||
DECNET_BATCH_SIZE: int = int(os.environ.get("DECNET_BATCH_SIZE", "100"))
|
||||
DECNET_BATCH_MAX_WAIT_MS: int = int(os.environ.get("DECNET_BATCH_MAX_WAIT_MS", "250"))
|
||||
|
||||
# Web Dashboard Options
|
||||
DECNET_WEB_HOST: str = os.environ.get("DECNET_WEB_HOST", "0.0.0.0") # nosec B104
|
||||
DECNET_WEB_HOST: str = os.environ.get("DECNET_WEB_HOST", "127.0.0.1")
|
||||
DECNET_WEB_PORT: int = _port("DECNET_WEB_PORT", 8080)
|
||||
DECNET_ADMIN_USER: str = os.environ.get("DECNET_ADMIN_USER", "admin")
|
||||
DECNET_ADMIN_PASSWORD: str = os.environ.get("DECNET_ADMIN_PASSWORD", "admin")
|
||||
DECNET_DEVELOPER: bool = os.environ.get("DECNET_DEVELOPER", "False").lower() == "true"
|
||||
|
||||
# Host role — seeded by /etc/decnet/decnet.ini or exported directly.
|
||||
# "master" = the central server (api, web, swarmctl, listener).
|
||||
# "agent" = a worker node (agent, forwarder, updater). Workers gate their
|
||||
# Typer CLI to hide master-only commands (see decnet/cli.py).
|
||||
DECNET_MODE: str = os.environ.get("DECNET_MODE", "master").lower()
|
||||
# When mode=agent, hide master-only Typer commands. Set to "false" for dual-
|
||||
# role dev hosts where a single machine plays both sides.
|
||||
DECNET_DISALLOW_MASTER: bool = (
|
||||
os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true"
|
||||
)
|
||||
|
||||
# Tracing — set to "true" to enable OpenTelemetry distributed tracing.
|
||||
# Separate from DECNET_DEVELOPER so tracing can be toggled independently.
|
||||
DECNET_DEVELOPER_TRACING: bool = os.environ.get("DECNET_DEVELOPER_TRACING", "").lower() == "true"
|
||||
DECNET_OTEL_ENDPOINT: str = os.environ.get("DECNET_OTEL_ENDPOINT", "http://localhost:4317")
|
||||
|
||||
# Database Options
|
||||
DECNET_DB_TYPE: str = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
|
||||
DECNET_DB_URL: Optional[str] = os.environ.get("DECNET_DB_URL")
|
||||
# MySQL component vars (used only when DECNET_DB_URL is not set)
|
||||
DECNET_DB_HOST: str = os.environ.get("DECNET_DB_HOST", "localhost")
|
||||
DECNET_DB_PORT: int = _port("DECNET_DB_PORT", 3306) if os.environ.get("DECNET_DB_PORT") else 3306
|
||||
DECNET_DB_NAME: str = os.environ.get("DECNET_DB_NAME", "decnet")
|
||||
DECNET_DB_USER: str = os.environ.get("DECNET_DB_USER", "decnet")
|
||||
DECNET_DB_PASSWORD: Optional[str] = os.environ.get("DECNET_DB_PASSWORD")
|
||||
|
||||
# CORS — comma-separated list of allowed origins for the web dashboard API.
|
||||
# Defaults to the configured web host/port. Override with DECNET_CORS_ORIGINS if needed.
|
||||
# Example: DECNET_CORS_ORIGINS=http://192.168.1.50:9090,https://dashboard.example.com
|
||||
_web_hostname: str = "localhost" if DECNET_WEB_HOST in ("0.0.0.0", "127.0.0.1", "::") else DECNET_WEB_HOST # nosec B104
|
||||
_WILDCARD_ADDRS = {"0.0.0.0", "127.0.0.1", "::"} # nosec B104 — comparison only, not a bind
|
||||
_web_hostname: str = "localhost" if DECNET_WEB_HOST in _WILDCARD_ADDRS else DECNET_WEB_HOST
|
||||
_cors_default: str = f"http://{_web_hostname}:{DECNET_WEB_PORT}"
|
||||
_cors_raw: str = os.environ.get("DECNET_CORS_ORIGINS", _cors_default)
|
||||
DECNET_CORS_ORIGINS: list[str] = [o.strip() for o in _cors_raw.split(",") if o.strip()]
|
||||
|
||||
|
||||
def __getattr__(name: str) -> str:
|
||||
"""Lazy resolution for secrets only the master web/api process needs."""
|
||||
if name == "DECNET_JWT_SECRET":
|
||||
return _require_env("DECNET_JWT_SECRET")
|
||||
raise AttributeError(f"module 'decnet.env' has no attribute {name!r}")
|
||||
|
||||
@@ -12,13 +12,16 @@ from typing import Optional
|
||||
from decnet.archetypes import Archetype, get_archetype
|
||||
from decnet.config import DeckyConfig, random_hostname
|
||||
from decnet.distros import all_distros, get_distro, random_distro
|
||||
from decnet.ini_loader import IniConfig
|
||||
from decnet.models import IniConfig
|
||||
from decnet.services.registry import all_services
|
||||
|
||||
|
||||
def all_service_names() -> list[str]:
|
||||
"""Return all registered service names from the live plugin registry."""
|
||||
return sorted(all_services().keys())
|
||||
"""Return all registered per-decky service names (excludes fleet singletons)."""
|
||||
return sorted(
|
||||
name for name, svc in all_services().items()
|
||||
if not svc.fleet_singleton
|
||||
)
|
||||
|
||||
|
||||
def resolve_distros(
|
||||
@@ -146,15 +149,10 @@ def build_deckies_from_ini(
|
||||
svc_list = spec.services
|
||||
elif arch:
|
||||
svc_list = list(arch.services)
|
||||
elif randomize:
|
||||
elif randomize or (not spec.services and not arch):
|
||||
svc_pool = all_service_names()
|
||||
count = random.randint(1, min(3, len(svc_pool))) # nosec B311
|
||||
svc_list = random.sample(svc_pool, count) # nosec B311
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Decky '[{spec.name}]' has no services= in config. "
|
||||
"Add services=, archetype=, or use --randomize-services."
|
||||
)
|
||||
|
||||
resolved_nmap_os = spec.nmap_os or (arch.nmap_os if arch else "linux")
|
||||
|
||||
|
||||
@@ -41,38 +41,8 @@ Format:
|
||||
"""
|
||||
|
||||
import configparser
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@dataclass
|
||||
class DeckySpec:
|
||||
name: str
|
||||
ip: str | None = None
|
||||
services: list[str] | None = None
|
||||
archetype: str | None = None
|
||||
service_config: dict[str, dict] = field(default_factory=dict)
|
||||
nmap_os: str | None = None # explicit OS family override (linux/windows/bsd/embedded/cisco)
|
||||
mutate_interval: int | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class CustomServiceSpec:
|
||||
"""Spec for a user-defined (bring-your-own) service."""
|
||||
name: str # service slug, e.g. "myservice" (section is "custom-myservice")
|
||||
image: str # Docker image to use
|
||||
exec_cmd: str # command to run inside the container
|
||||
ports: list[int] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class IniConfig:
|
||||
subnet: str | None = None
|
||||
gateway: str | None = None
|
||||
interface: str | None = None
|
||||
mutate_interval: int | None = None
|
||||
deckies: list[DeckySpec] = field(default_factory=list)
|
||||
custom_services: list[CustomServiceSpec] = field(default_factory=list)
|
||||
from decnet.models import IniConfig, DeckySpec, CustomServiceSpec, validate_ini_string # noqa: F401
|
||||
|
||||
|
||||
def load_ini(path: str | Path) -> IniConfig:
|
||||
@@ -86,27 +56,15 @@ def load_ini(path: str | Path) -> IniConfig:
|
||||
|
||||
def load_ini_from_string(content: str) -> IniConfig:
|
||||
"""Parse a DECNET INI string and return an IniConfig."""
|
||||
# Normalize line endings (CRLF → LF, bare CR → LF) so the validator
|
||||
# and configparser both see the same line boundaries.
|
||||
content = content.replace('\r\n', '\n').replace('\r', '\n')
|
||||
validate_ini_string(content)
|
||||
cp = configparser.ConfigParser()
|
||||
cp = configparser.ConfigParser(strict=False)
|
||||
cp.read_string(content)
|
||||
return _parse_configparser(cp)
|
||||
|
||||
|
||||
def validate_ini_string(content: str) -> None:
|
||||
"""Perform safety and sanity checks on raw INI content string."""
|
||||
# 1. Size limit (e.g. 512KB)
|
||||
if len(content) > 512 * 1024:
|
||||
raise ValueError("INI content too large (max 512KB).")
|
||||
|
||||
# 2. Ensure it's not empty
|
||||
if not content.strip():
|
||||
raise ValueError("INI content is empty.")
|
||||
|
||||
# 3. Basic structure check (must contain at least one section header)
|
||||
if "[" not in content or "]" not in content:
|
||||
raise ValueError("Invalid INI format: no sections found.")
|
||||
|
||||
|
||||
def _parse_configparser(cp: configparser.ConfigParser) -> IniConfig:
|
||||
cfg = IniConfig()
|
||||
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
"""
|
||||
DECNET application logging helpers.
|
||||
|
||||
Usage:
|
||||
from decnet.logging import get_logger
|
||||
log = get_logger("engine") # APP-NAME in RFC 5424 output becomes "engine"
|
||||
|
||||
The returned logger propagates to the root logger (configured in config.py with
|
||||
Rfc5424Formatter), so level control via DECNET_DEVELOPER still applies globally.
|
||||
|
||||
When ``DECNET_DEVELOPER_TRACING`` is active, every LogRecord is enriched with
|
||||
``otel_trace_id`` and ``otel_span_id`` from the current OTEL span context.
|
||||
This lets you correlate log lines with Jaeger traces — click a log entry and
|
||||
jump straight to the span that produced it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
|
||||
class _ComponentFilter(logging.Filter):
|
||||
"""Injects *decnet_component* onto every LogRecord so Rfc5424Formatter can
|
||||
use it as the RFC 5424 APP-NAME field instead of the hardcoded "decnet"."""
|
||||
|
||||
def __init__(self, component: str) -> None:
|
||||
super().__init__()
|
||||
self.component = component
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
record.decnet_component = self.component # type: ignore[attr-defined]
|
||||
return True
|
||||
|
||||
|
||||
class _TraceContextFilter(logging.Filter):
|
||||
"""Injects ``otel_trace_id`` and ``otel_span_id`` onto every LogRecord
|
||||
from the active OTEL span context.
|
||||
|
||||
Installed once by ``enable_trace_context()`` on the root ``decnet`` logger
|
||||
so all child loggers inherit the enrichment via propagation.
|
||||
|
||||
When no span is active, both fields are set to ``"0"`` (cheap string
|
||||
comparison downstream, no None-checks needed).
|
||||
"""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
try:
|
||||
from opentelemetry import trace
|
||||
span = trace.get_current_span()
|
||||
ctx = span.get_span_context()
|
||||
if ctx and ctx.trace_id:
|
||||
record.otel_trace_id = format(ctx.trace_id, "032x") # type: ignore[attr-defined]
|
||||
record.otel_span_id = format(ctx.span_id, "016x") # type: ignore[attr-defined]
|
||||
else:
|
||||
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
||||
record.otel_span_id = "0" # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
||||
record.otel_span_id = "0" # type: ignore[attr-defined]
|
||||
return True
|
||||
|
||||
|
||||
_trace_filter_installed: bool = False
|
||||
|
||||
|
||||
def enable_trace_context() -> None:
|
||||
"""Install the OTEL trace-context filter on the root ``decnet`` logger.
|
||||
|
||||
Called once from ``decnet.telemetry.setup_tracing()`` after the
|
||||
TracerProvider is initialised. Safe to call multiple times (idempotent).
|
||||
"""
|
||||
global _trace_filter_installed
|
||||
if _trace_filter_installed:
|
||||
return
|
||||
root = logging.getLogger("decnet")
|
||||
root.addFilter(_TraceContextFilter())
|
||||
_trace_filter_installed = True
|
||||
|
||||
|
||||
def get_logger(component: str) -> logging.Logger:
|
||||
"""Return a named logger that self-identifies as *component* in RFC 5424.
|
||||
|
||||
Valid components: cli, engine, api, mutator, collector.
|
||||
|
||||
The logger is named ``decnet.<component>`` and propagates normally, so the
|
||||
root handler (Rfc5424Formatter + level gate from DECNET_DEVELOPER) handles
|
||||
output. Calling this function multiple times for the same component is safe.
|
||||
"""
|
||||
logger = logging.getLogger(f"decnet.{component}")
|
||||
if not any(isinstance(f, _ComponentFilter) for f in logger.filters):
|
||||
logger.addFilter(_ComponentFilter(component))
|
||||
return logger
|
||||
|
||||
@@ -13,29 +13,37 @@ import logging.handlers
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from decnet.logging.inode_aware_handler import InodeAwareRotatingFileHandler
|
||||
from decnet.privdrop import chown_to_invoking_user, chown_tree_to_invoking_user
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
_LOG_FILE_ENV = "DECNET_LOG_FILE"
|
||||
_DEFAULT_LOG_FILE = "/var/log/decnet/decnet.log"
|
||||
_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
|
||||
_BACKUP_COUNT = 5
|
||||
|
||||
_handler: logging.handlers.RotatingFileHandler | None = None
|
||||
_handler: InodeAwareRotatingFileHandler | None = None
|
||||
_logger: logging.Logger | None = None
|
||||
|
||||
|
||||
def _get_logger() -> logging.Logger:
|
||||
@_traced("logging.init_file_handler")
|
||||
def _init_file_handler() -> logging.Logger:
|
||||
"""One-time initialisation of the rotating file handler."""
|
||||
global _handler, _logger
|
||||
if _logger is not None:
|
||||
return _logger
|
||||
|
||||
log_path = Path(os.environ.get(_LOG_FILE_ENV, _DEFAULT_LOG_FILE))
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# When running under sudo, hand the parent dir back to the invoking user
|
||||
# so a subsequent non-root `decnet api` can also write to it.
|
||||
chown_tree_to_invoking_user(log_path.parent)
|
||||
|
||||
_handler = logging.handlers.RotatingFileHandler(
|
||||
_handler = InodeAwareRotatingFileHandler(
|
||||
log_path,
|
||||
maxBytes=_MAX_BYTES,
|
||||
backupCount=_BACKUP_COUNT,
|
||||
encoding="utf-8",
|
||||
)
|
||||
chown_to_invoking_user(log_path)
|
||||
_handler.setFormatter(logging.Formatter("%(message)s"))
|
||||
|
||||
_logger = logging.getLogger("decnet.syslog")
|
||||
@@ -46,6 +54,12 @@ def _get_logger() -> logging.Logger:
|
||||
return _logger
|
||||
|
||||
|
||||
def _get_logger() -> logging.Logger:
|
||||
if _logger is not None:
|
||||
return _logger
|
||||
return _init_file_handler()
|
||||
|
||||
|
||||
def write_syslog(line: str) -> None:
|
||||
"""Write a single RFC 5424 syslog line to the rotating log file."""
|
||||
try:
|
||||
|
||||
@@ -11,6 +11,8 @@ shared utilities for validating and parsing the log_target string.
|
||||
|
||||
import socket
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
|
||||
def parse_log_target(log_target: str) -> tuple[str, int]:
|
||||
"""
|
||||
@@ -23,6 +25,7 @@ def parse_log_target(log_target: str) -> tuple[str, int]:
|
||||
return parts[0], int(parts[1])
|
||||
|
||||
|
||||
@_traced("logging.probe_log_target")
|
||||
def probe_log_target(log_target: str, timeout: float = 2.0) -> bool:
|
||||
"""
|
||||
Return True if the log target is reachable (TCP connect succeeds).
|
||||
|
||||
60
decnet/logging/inode_aware_handler.py
Normal file
60
decnet/logging/inode_aware_handler.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""
|
||||
RotatingFileHandler that detects external deletion or rotation.
|
||||
|
||||
Stdlib ``RotatingFileHandler`` holds an open file descriptor for the
|
||||
lifetime of the handler. If the target file is deleted (``rm``) or
|
||||
rotated out (``logrotate`` without ``copytruncate``), the handler keeps
|
||||
writing to the now-orphaned inode until its own size-based rotation
|
||||
finally triggers — silently losing every line in between.
|
||||
|
||||
Stdlib ``WatchedFileHandler`` solves exactly this problem but doesn't
|
||||
rotate by size. This subclass combines both: before each emit we stat
|
||||
the configured path and compare its inode/device to the currently open
|
||||
file; on mismatch we close and reopen.
|
||||
|
||||
Cheap: one ``os.stat`` per log record. Matches the pattern used by
|
||||
``decnet/collector/worker.py:_reopen_if_needed``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
import os
|
||||
|
||||
|
||||
class InodeAwareRotatingFileHandler(logging.handlers.RotatingFileHandler):
|
||||
"""RotatingFileHandler that reopens the target on external rotation/deletion."""
|
||||
|
||||
def _should_reopen(self) -> bool:
|
||||
if self.stream is None:
|
||||
return True
|
||||
try:
|
||||
disk_stat = os.stat(self.baseFilename)
|
||||
except FileNotFoundError:
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
try:
|
||||
open_stat = os.fstat(self.stream.fileno())
|
||||
except OSError:
|
||||
return True
|
||||
return (disk_stat.st_ino != open_stat.st_ino
|
||||
or disk_stat.st_dev != open_stat.st_dev)
|
||||
|
||||
def emit(self, record: logging.LogRecord) -> None:
|
||||
if self._should_reopen():
|
||||
try:
|
||||
if self.stream is not None:
|
||||
self.close()
|
||||
except Exception: # nosec B110
|
||||
pass
|
||||
try:
|
||||
self.stream = self._open()
|
||||
except OSError:
|
||||
# A logging handler MUST NOT crash its caller. If we can't
|
||||
# reopen (e.g. file is root-owned after `sudo decnet deploy`
|
||||
# and the current process is non-root), defer to the stdlib
|
||||
# error path, which just prints a traceback to stderr.
|
||||
self.handleError(record)
|
||||
return
|
||||
super().emit(record)
|
||||
@@ -5,7 +5,7 @@ Produces fully-compliant syslog messages:
|
||||
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
|
||||
|
||||
Facility: local0 (16)
|
||||
PEN for structured data: decnet@55555
|
||||
PEN for structured data: relay@55555
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,7 +16,7 @@ from typing import Any
|
||||
|
||||
FACILITY_LOCAL0 = 16
|
||||
NILVALUE = "-"
|
||||
_SD_ID = "decnet@55555"
|
||||
_SD_ID = "relay@55555"
|
||||
|
||||
SEVERITY_INFO = 6
|
||||
SEVERITY_WARNING = 4
|
||||
|
||||
123
decnet/models.py
Normal file
123
decnet/models.py
Normal file
@@ -0,0 +1,123 @@
|
||||
"""
|
||||
DECNET Domain Models.
|
||||
Centralized repository for all Pydantic specifications used throughout the project.
|
||||
This file ensures that core domain logic has no dependencies on the web or database layers.
|
||||
"""
|
||||
from typing import Optional, List, Dict, Literal, Annotated, Any
|
||||
from pydantic import BaseModel, ConfigDict, Field as PydanticField, field_validator, BeforeValidator
|
||||
import configparser
|
||||
|
||||
|
||||
# --- INI Specification Models ---
|
||||
|
||||
def validate_ini_string(v: Any) -> str:
|
||||
"""Structural validator for DECNET INI strings using configparser."""
|
||||
if not isinstance(v, str):
|
||||
# This remains an internal type mismatch (caught by Pydantic usually)
|
||||
raise ValueError("INI content must be a string")
|
||||
|
||||
# 512KB limit to prevent DoS/OOM
|
||||
if len(v) > 512 * 1024:
|
||||
raise ValueError("INI content is too large (max 512KB)")
|
||||
|
||||
if not v.strip():
|
||||
# Using exact phrasing expected by tests
|
||||
raise ValueError("INI content is empty")
|
||||
|
||||
parser = configparser.ConfigParser(interpolation=None, allow_no_value=True, strict=False)
|
||||
try:
|
||||
parser.read_string(v)
|
||||
if not parser.sections():
|
||||
raise ValueError("The provided INI content must contain at least one section (no sections found)")
|
||||
except configparser.Error as e:
|
||||
# If it's a generic parsing error, we check if it's effectively a "missing sections" error
|
||||
if "no section headers" in str(e).lower():
|
||||
raise ValueError("Invalid INI format: no sections found")
|
||||
raise ValueError(f"Invalid INI format: {str(e)}")
|
||||
|
||||
return v
|
||||
|
||||
# Reusable type that enforces INI structure during initialization.
|
||||
# Removed min_length=1 to make empty strings schema-compliant yet semantically invalid (mapped to 409).
|
||||
IniContent = Annotated[str, BeforeValidator(validate_ini_string)]
|
||||
|
||||
class DeckySpec(BaseModel):
|
||||
"""Configuration spec for a single decky as defined in the INI file."""
|
||||
model_config = ConfigDict(strict=True, extra="forbid")
|
||||
name: str = PydanticField(..., max_length=128, pattern=r"^[A-Za-z0-9\-_.]+$")
|
||||
ip: Optional[str] = None
|
||||
services: Optional[List[str]] = None
|
||||
archetype: Optional[str] = None
|
||||
service_config: Dict[str, Dict] = PydanticField(default_factory=dict)
|
||||
nmap_os: Optional[str] = None
|
||||
mutate_interval: Optional[int] = PydanticField(None, ge=1)
|
||||
|
||||
|
||||
class CustomServiceSpec(BaseModel):
|
||||
"""Spec for a user-defined (bring-your-own) service."""
|
||||
model_config = ConfigDict(strict=True, extra="forbid")
|
||||
name: str
|
||||
image: str
|
||||
exec_cmd: str
|
||||
ports: List[int] = PydanticField(default_factory=list)
|
||||
|
||||
|
||||
class IniConfig(BaseModel):
|
||||
"""The complete structured representation of a DECNET INI file."""
|
||||
model_config = ConfigDict(strict=True, extra="forbid")
|
||||
subnet: Optional[str] = None
|
||||
gateway: Optional[str] = None
|
||||
interface: Optional[str] = None
|
||||
mutate_interval: Optional[int] = PydanticField(None, ge=1)
|
||||
deckies: List[DeckySpec] = PydanticField(default_factory=list, min_length=1)
|
||||
custom_services: List[CustomServiceSpec] = PydanticField(default_factory=list)
|
||||
|
||||
@field_validator("deckies")
|
||||
@classmethod
|
||||
def at_least_one_decky(cls, v: List[DeckySpec]) -> List[DeckySpec]:
|
||||
"""Ensure that an INI deployment always contains at least one machine."""
|
||||
if not v:
|
||||
raise ValueError("INI must contain at least one decky section")
|
||||
return v
|
||||
|
||||
|
||||
# --- Runtime Configuration Models ---
|
||||
|
||||
class DeckyConfig(BaseModel):
|
||||
"""Full operational configuration for a deployed decky container."""
|
||||
model_config = ConfigDict(strict=True, extra="forbid")
|
||||
name: str
|
||||
ip: str
|
||||
services: list[str] = PydanticField(..., min_length=1)
|
||||
distro: str # slug from distros.DISTROS, e.g. "debian", "ubuntu22"
|
||||
base_image: str # Docker image for the base/IP-holder container
|
||||
build_base: str = "debian:bookworm-slim" # apt-compatible image for service Dockerfiles
|
||||
hostname: str
|
||||
archetype: str | None = None # archetype slug if spawned from an archetype profile
|
||||
service_config: dict[str, dict] = PydanticField(default_factory=dict)
|
||||
nmap_os: str = "linux" # OS family for TCP/IP stack spoofing (see os_fingerprint.py)
|
||||
mutate_interval: int | None = None # automatic rotation interval in minutes
|
||||
last_mutated: float = 0.0 # timestamp of last mutation
|
||||
last_login_attempt: float = 0.0 # timestamp of most recent interaction
|
||||
# SWARM: the SwarmHost.uuid that runs this decky. None in unihost mode
|
||||
# so existing state files deserialize unchanged.
|
||||
host_uuid: str | None = None
|
||||
|
||||
@field_validator("services")
|
||||
@classmethod
|
||||
def services_not_empty(cls, v: list[str]) -> list[str]:
|
||||
if not v:
|
||||
raise ValueError("A decky must have at least one service.")
|
||||
return v
|
||||
|
||||
|
||||
class DecnetConfig(BaseModel):
|
||||
"""Root configuration for the entire DECNET fleet deployment."""
|
||||
mode: Literal["unihost", "swarm"]
|
||||
interface: str
|
||||
subnet: str
|
||||
gateway: str
|
||||
deckies: list[DeckyConfig] = PydanticField(..., min_length=1)
|
||||
log_file: str | None = None # host path where the collector writes the log file
|
||||
ipvlan: bool = False # use IPvlan L2 instead of MACVLAN (WiFi-friendly)
|
||||
mutate_interval: int | None = 30 # global automatic rotation interval in minutes
|
||||
@@ -12,25 +12,35 @@ from rich.console import Console
|
||||
from decnet.archetypes import get_archetype
|
||||
from decnet.fleet import all_service_names
|
||||
from decnet.composer import write_compose
|
||||
from decnet.config import DeckyConfig, load_state, save_state
|
||||
from decnet.engine import COMPOSE_FILE, _compose_with_retry
|
||||
from decnet.config import DeckyConfig, DecnetConfig
|
||||
from decnet.engine import _compose_with_retry
|
||||
from decnet.logging import get_logger
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
import subprocess # nosec B404
|
||||
from pathlib import Path
|
||||
import anyio
|
||||
import asyncio
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
|
||||
log = get_logger("mutator")
|
||||
console = Console()
|
||||
|
||||
|
||||
def mutate_decky(decky_name: str) -> bool:
|
||||
@_traced("mutator.mutate_decky")
|
||||
async def mutate_decky(decky_name: str, repo: BaseRepository) -> bool:
|
||||
"""
|
||||
Perform an Intra-Archetype Shuffle for a specific decky.
|
||||
Returns True if mutation succeeded, False otherwise.
|
||||
"""
|
||||
state = load_state()
|
||||
if state is None:
|
||||
console.print("[red]No active deployment found (no decnet-state.json).[/]")
|
||||
log.debug("mutate_decky: start decky=%s", decky_name)
|
||||
state_dict = await repo.get_state("deployment")
|
||||
if state_dict is None:
|
||||
log.error("mutate_decky: no active deployment found in database")
|
||||
console.print("[red]No active deployment found in database.[/]")
|
||||
return False
|
||||
|
||||
config, compose_path = state
|
||||
config = DecnetConfig(**state_dict["config"])
|
||||
compose_path = Path(state_dict["compose_path"])
|
||||
decky: Optional[DeckyConfig] = next((d for d in config.deckies if d.name == decky_name), None)
|
||||
|
||||
if not decky:
|
||||
@@ -63,31 +73,40 @@ def mutate_decky(decky_name: str) -> bool:
|
||||
decky.services = list(chosen)
|
||||
decky.last_mutated = time.time()
|
||||
|
||||
save_state(config, compose_path)
|
||||
# Save to DB
|
||||
await repo.set_state("deployment", {"config": config.model_dump(), "compose_path": str(compose_path)})
|
||||
|
||||
# Still writes files for Docker to use
|
||||
write_compose(config, compose_path)
|
||||
|
||||
log.info("mutation applied decky=%s services=%s", decky_name, ",".join(decky.services))
|
||||
console.print(f"[cyan]Mutating '{decky_name}' to services: {', '.join(decky.services)}[/]")
|
||||
|
||||
try:
|
||||
_compose_with_retry("up", "-d", "--remove-orphans", compose_file=compose_path)
|
||||
except subprocess.CalledProcessError as e:
|
||||
console.print(f"[red]Failed to mutate '{decky_name}': {e.stderr}[/]")
|
||||
# Wrap blocking call in thread
|
||||
await anyio.to_thread.run_sync(_compose_with_retry, "up", "-d", "--remove-orphans", compose_path)
|
||||
except Exception as e:
|
||||
log.error("mutation failed decky=%s error=%s", decky_name, e)
|
||||
console.print(f"[red]Failed to mutate '{decky_name}': {e}[/]")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def mutate_all(force: bool = False) -> None:
|
||||
@_traced("mutator.mutate_all")
|
||||
async def mutate_all(repo: BaseRepository, force: bool = False) -> None:
|
||||
"""
|
||||
Check all deckies and mutate those that are due.
|
||||
If force=True, mutates all deckies regardless of schedule.
|
||||
"""
|
||||
state = load_state()
|
||||
if state is None:
|
||||
log.debug("mutate_all: start force=%s", force)
|
||||
state_dict = await repo.get_state("deployment")
|
||||
if state_dict is None:
|
||||
log.error("mutate_all: no active deployment found")
|
||||
console.print("[red]No active deployment found.[/]")
|
||||
return
|
||||
|
||||
config, _ = state
|
||||
config = DecnetConfig(**state_dict["config"])
|
||||
now = time.time()
|
||||
|
||||
mutated_count = 0
|
||||
@@ -103,20 +122,26 @@ def mutate_all(force: bool = False) -> None:
|
||||
due = elapsed_secs >= (interval_mins * 60)
|
||||
|
||||
if due:
|
||||
success = mutate_decky(decky.name)
|
||||
success = await mutate_decky(decky.name, repo=repo)
|
||||
if success:
|
||||
mutated_count += 1
|
||||
|
||||
if mutated_count == 0 and not force:
|
||||
log.debug("mutate_all: no deckies due for mutation")
|
||||
console.print("[dim]No deckies are due for mutation.[/]")
|
||||
else:
|
||||
log.info("mutate_all: complete mutated_count=%d", mutated_count)
|
||||
|
||||
|
||||
def run_watch_loop(poll_interval_secs: int = 10) -> None:
|
||||
@_traced("mutator.watch_loop")
|
||||
async def run_watch_loop(repo: BaseRepository, poll_interval_secs: int = 10) -> None:
|
||||
"""Run an infinite loop checking for deckies that need mutation."""
|
||||
log.info("mutator watch loop started poll_interval_secs=%d", poll_interval_secs)
|
||||
console.print(f"[green]DECNET Mutator Watcher started (polling every {poll_interval_secs}s).[/]")
|
||||
try:
|
||||
while True:
|
||||
mutate_all(force=False)
|
||||
time.sleep(poll_interval_secs)
|
||||
await mutate_all(force=False, repo=repo)
|
||||
await asyncio.sleep(poll_interval_secs)
|
||||
except KeyboardInterrupt:
|
||||
log.info("mutator watch loop stopped")
|
||||
console.print("\n[dim]Mutator watcher stopped.[/]")
|
||||
|
||||
@@ -126,22 +126,57 @@ def allocate_ips(
|
||||
# Docker MACVLAN network
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def create_macvlan_network(
|
||||
def _ensure_network(
|
||||
client: docker.DockerClient,
|
||||
*,
|
||||
driver: str,
|
||||
interface: str,
|
||||
subnet: str,
|
||||
gateway: str,
|
||||
ip_range: str,
|
||||
extra_options: dict | None = None,
|
||||
) -> None:
|
||||
"""Create the MACVLAN Docker network. No-op if it already exists."""
|
||||
existing = [n.name for n in client.networks.list()]
|
||||
if MACVLAN_NETWORK_NAME in existing:
|
||||
return
|
||||
"""Create the decnet docker network with ``driver``, replacing any
|
||||
existing network of the same name that was built with a different driver.
|
||||
|
||||
Why the replace-on-driver-mismatch: macvlan and ipvlan slaves can't
|
||||
coexist on the same parent interface. If an earlier run left behind a
|
||||
macvlan-driver network and we're now asked for ipvlan (or vice versa),
|
||||
short-circuiting on name alone leaves Docker attaching new containers
|
||||
to the old driver and the host NIC ends up EBUSY on the next port
|
||||
create. So: when driver disagrees, disconnect everything and DROP it.
|
||||
"""
|
||||
options = {"parent": interface}
|
||||
if extra_options:
|
||||
options.update(extra_options)
|
||||
|
||||
for net in client.networks.list(names=[MACVLAN_NETWORK_NAME]):
|
||||
if net.attrs.get("Driver") == driver:
|
||||
# Same driver — but if the IPAM pool drifted (different subnet,
|
||||
# gateway, or ip-range than this deploy asks for), reusing it
|
||||
# hands out addresses from the old pool and we race the real LAN.
|
||||
# Compare and rebuild on mismatch.
|
||||
pools = (net.attrs.get("IPAM") or {}).get("Config") or []
|
||||
cur = pools[0] if pools else {}
|
||||
if (
|
||||
cur.get("Subnet") == subnet
|
||||
and cur.get("Gateway") == gateway
|
||||
and cur.get("IPRange") == ip_range
|
||||
):
|
||||
return # right driver AND matching pool, leave it alone
|
||||
# Driver mismatch OR IPAM drift — tear it down. Disconnect any live
|
||||
# containers first so `remove()` doesn't refuse with ErrNetworkInUse.
|
||||
for cid in (net.attrs.get("Containers") or {}):
|
||||
try:
|
||||
net.disconnect(cid, force=True)
|
||||
except docker.errors.APIError:
|
||||
pass
|
||||
net.remove()
|
||||
|
||||
client.networks.create(
|
||||
name=MACVLAN_NETWORK_NAME,
|
||||
driver="macvlan",
|
||||
options={"parent": interface},
|
||||
driver=driver,
|
||||
options=options,
|
||||
ipam=docker.types.IPAMConfig(
|
||||
driver="default",
|
||||
pool_configs=[
|
||||
@@ -155,6 +190,21 @@ def create_macvlan_network(
|
||||
)
|
||||
|
||||
|
||||
def create_macvlan_network(
|
||||
client: docker.DockerClient,
|
||||
interface: str,
|
||||
subnet: str,
|
||||
gateway: str,
|
||||
ip_range: str,
|
||||
) -> None:
|
||||
"""Create the MACVLAN Docker network, replacing an ipvlan-driver one of
|
||||
the same name if necessary (parent-NIC can't host both drivers)."""
|
||||
_ensure_network(
|
||||
client, driver="macvlan", interface=interface,
|
||||
subnet=subnet, gateway=gateway, ip_range=ip_range,
|
||||
)
|
||||
|
||||
|
||||
def create_ipvlan_network(
|
||||
client: docker.DockerClient,
|
||||
interface: str,
|
||||
@@ -162,25 +212,12 @@ def create_ipvlan_network(
|
||||
gateway: str,
|
||||
ip_range: str,
|
||||
) -> None:
|
||||
"""Create an IPvlan L2 Docker network. No-op if it already exists."""
|
||||
existing = [n.name for n in client.networks.list()]
|
||||
if MACVLAN_NETWORK_NAME in existing:
|
||||
return
|
||||
|
||||
client.networks.create(
|
||||
name=MACVLAN_NETWORK_NAME,
|
||||
driver="ipvlan",
|
||||
options={"parent": interface, "ipvlan_mode": "l2"},
|
||||
ipam=docker.types.IPAMConfig(
|
||||
driver="default",
|
||||
pool_configs=[
|
||||
docker.types.IPAMPool(
|
||||
subnet=subnet,
|
||||
gateway=gateway,
|
||||
iprange=ip_range,
|
||||
)
|
||||
],
|
||||
),
|
||||
"""Create an IPvlan L2 Docker network, replacing a macvlan-driver one of
|
||||
the same name if necessary (parent-NIC can't host both drivers)."""
|
||||
_ensure_network(
|
||||
client, driver="ipvlan", interface=interface,
|
||||
subnet=subnet, gateway=gateway, ip_range=ip_range,
|
||||
extra_options={"ipvlan_mode": "l2"},
|
||||
)
|
||||
|
||||
|
||||
@@ -204,10 +241,14 @@ def _require_root() -> None:
|
||||
def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
|
||||
"""
|
||||
Create a macvlan interface on the host so the deployer can reach deckies.
|
||||
Idempotent — skips steps that are already done.
|
||||
Idempotent — skips steps that are already done. Drops a stale ipvlan
|
||||
host-helper first: the two drivers can share a parent NIC on paper but
|
||||
leaving the opposite helper in place is just cruft after a driver swap.
|
||||
"""
|
||||
_require_root()
|
||||
|
||||
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
||||
|
||||
# Check if interface already exists
|
||||
result = _run(["ip", "link", "show", HOST_MACVLAN_IFACE], check=False)
|
||||
if result.returncode != 0:
|
||||
@@ -227,10 +268,14 @@ def teardown_host_macvlan(decky_ip_range: str) -> None:
|
||||
def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str) -> None:
|
||||
"""
|
||||
Create an IPvlan interface on the host so the deployer can reach deckies.
|
||||
Idempotent — skips steps that are already done.
|
||||
Idempotent — skips steps that are already done. Drops a stale macvlan
|
||||
host-helper first so a prior macvlan deploy doesn't leave its slave
|
||||
dangling on the parent NIC after the driver swap.
|
||||
"""
|
||||
_require_root()
|
||||
|
||||
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
||||
|
||||
result = _run(["ip", "link", "show", HOST_IPVLAN_IFACE], check=False)
|
||||
if result.returncode != 0:
|
||||
_run(["ip", "link", "add", HOST_IPVLAN_IFACE, "link", interface, "type", "ipvlan", "mode", "l2"])
|
||||
|
||||
67
decnet/privdrop.py
Normal file
67
decnet/privdrop.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""
|
||||
Helpers for dropping root ownership on files created during privileged
|
||||
operations (e.g. `sudo decnet deploy` needs root for MACVLAN, but its log
|
||||
files should be owned by the invoking user so a subsequent non-root
|
||||
`decnet api` can append to them).
|
||||
|
||||
When sudo invokes a process, it sets SUDO_UID / SUDO_GID in the
|
||||
environment to the original user's IDs. We use those to chown files
|
||||
back after creation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _sudo_ids() -> Optional[tuple[int, int]]:
|
||||
"""Return (uid, gid) of the sudo-invoking user, or None when the
|
||||
process was not launched via sudo / the env vars are missing."""
|
||||
raw_uid = os.environ.get("SUDO_UID")
|
||||
raw_gid = os.environ.get("SUDO_GID")
|
||||
if not raw_uid or not raw_gid:
|
||||
return None
|
||||
try:
|
||||
return int(raw_uid), int(raw_gid)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def chown_to_invoking_user(path: str | os.PathLike[str]) -> None:
|
||||
"""Best-effort chown of *path* to the sudo-invoking user.
|
||||
|
||||
No-op when:
|
||||
* not running as root (nothing to drop),
|
||||
* not launched via sudo (no SUDO_UID/SUDO_GID),
|
||||
* the path does not exist,
|
||||
* chown fails (logged-only — never raises).
|
||||
"""
|
||||
if os.geteuid() != 0:
|
||||
return
|
||||
ids = _sudo_ids()
|
||||
if ids is None:
|
||||
return
|
||||
uid, gid = ids
|
||||
p = Path(path)
|
||||
if not p.exists():
|
||||
return
|
||||
try:
|
||||
os.chown(p, uid, gid)
|
||||
except OSError:
|
||||
# Best-effort; a failed chown is not fatal to logging.
|
||||
pass
|
||||
|
||||
|
||||
def chown_tree_to_invoking_user(root: str | os.PathLike[str]) -> None:
|
||||
"""Apply :func:`chown_to_invoking_user` to *root* and every file/dir
|
||||
beneath it. Used for parent directories that we just created with
|
||||
``mkdir(parents=True)`` as root."""
|
||||
if os.geteuid() != 0 or _sudo_ids() is None:
|
||||
return
|
||||
root_path = Path(root)
|
||||
if not root_path.exists():
|
||||
return
|
||||
chown_to_invoking_user(root_path)
|
||||
for entry in root_path.rglob("*"):
|
||||
chown_to_invoking_user(entry)
|
||||
13
decnet/prober/__init__.py
Normal file
13
decnet/prober/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""
|
||||
DECNET-PROBER — standalone active network probing service.
|
||||
|
||||
Runs as a detached host-level process (no container). Sends crafted TLS
|
||||
probes to discover C2 frameworks and other attacker infrastructure via
|
||||
JARM fingerprinting. Results are written as RFC 5424 syslog + JSON to the
|
||||
same log file the collector uses, so the existing ingestion pipeline picks
|
||||
them up automatically.
|
||||
"""
|
||||
|
||||
from decnet.prober.worker import prober_worker
|
||||
|
||||
__all__ = ["prober_worker"]
|
||||
252
decnet/prober/hassh.py
Normal file
252
decnet/prober/hassh.py
Normal file
@@ -0,0 +1,252 @@
|
||||
"""
|
||||
HASSHServer — SSH server fingerprinting via KEX_INIT algorithm ordering.
|
||||
|
||||
Connects to an SSH server, completes the version exchange, captures the
|
||||
server's SSH_MSG_KEXINIT message, and hashes the server-to-client algorithm
|
||||
fields (kex, encryption, MAC, compression) into a 32-character MD5 digest.
|
||||
|
||||
This is the *server* variant of HASSH (HASSHServer). It fingerprints what
|
||||
the server *offers*, which identifies the SSH implementation (OpenSSH,
|
||||
Paramiko, libssh, Cobalt Strike SSH, etc.).
|
||||
|
||||
Stdlib only (socket, struct, hashlib) plus decnet.telemetry for tracing (zero-cost when disabled).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import socket
|
||||
import struct
|
||||
from typing import Any
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
# SSH protocol constants
|
||||
_SSH_MSG_KEXINIT = 20
|
||||
_KEX_INIT_COOKIE_LEN = 16
|
||||
_KEX_INIT_NAME_LISTS = 10 # 10 name-list fields in KEX_INIT
|
||||
|
||||
# Blend in as a normal OpenSSH client
|
||||
_CLIENT_BANNER = b"SSH-2.0-OpenSSH_9.6\r\n"
|
||||
|
||||
# Max bytes to read for server banner
|
||||
_MAX_BANNER_LEN = 256
|
||||
|
||||
# Max bytes for a single SSH packet (KEX_INIT is typically < 2KB)
|
||||
_MAX_PACKET_LEN = 35000
|
||||
|
||||
|
||||
# ─── SSH connection + KEX_INIT capture ──────────────────────────────────────
|
||||
|
||||
@_traced("prober.hassh_ssh_connect")
|
||||
def _ssh_connect(
|
||||
host: str,
|
||||
port: int,
|
||||
timeout: float,
|
||||
) -> tuple[str, bytes] | None:
|
||||
"""
|
||||
TCP connect, exchange version strings, read server's KEX_INIT.
|
||||
|
||||
Returns (server_banner, kex_init_payload) or None on failure.
|
||||
The kex_init_payload starts at the SSH_MSG_KEXINIT type byte.
|
||||
"""
|
||||
sock = None
|
||||
try:
|
||||
sock = socket.create_connection((host, port), timeout=timeout)
|
||||
sock.settimeout(timeout)
|
||||
|
||||
# 1. Read server banner (line ending \r\n or \n)
|
||||
banner = _read_banner(sock)
|
||||
if banner is None or not banner.startswith("SSH-"):
|
||||
return None
|
||||
|
||||
# 2. Send our client version string
|
||||
sock.sendall(_CLIENT_BANNER)
|
||||
|
||||
# 3. Read the server's first binary packet (should be KEX_INIT)
|
||||
payload = _read_ssh_packet(sock)
|
||||
if payload is None or len(payload) < 1:
|
||||
return None
|
||||
|
||||
if payload[0] != _SSH_MSG_KEXINIT:
|
||||
return None
|
||||
|
||||
return (banner, payload)
|
||||
|
||||
except (OSError, socket.timeout, TimeoutError, ConnectionError):
|
||||
return None
|
||||
finally:
|
||||
if sock is not None:
|
||||
try:
|
||||
sock.close()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _read_banner(sock: socket.socket) -> str | None:
|
||||
"""Read the SSH version banner line from the socket."""
|
||||
buf = b""
|
||||
while len(buf) < _MAX_BANNER_LEN:
|
||||
try:
|
||||
byte = sock.recv(1)
|
||||
except (OSError, socket.timeout, TimeoutError):
|
||||
return None
|
||||
if not byte:
|
||||
return None
|
||||
buf += byte
|
||||
if buf.endswith(b"\n"):
|
||||
break
|
||||
|
||||
try:
|
||||
return buf.decode("utf-8", errors="replace").rstrip("\r\n")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _read_ssh_packet(sock: socket.socket) -> bytes | None:
|
||||
"""
|
||||
Read a single SSH binary packet and return its payload.
|
||||
|
||||
SSH binary packet format:
|
||||
uint32 packet_length (not including itself or MAC)
|
||||
byte padding_length
|
||||
byte[] payload (packet_length - padding_length - 1)
|
||||
byte[] padding
|
||||
"""
|
||||
header = _recv_exact(sock, 4)
|
||||
if header is None:
|
||||
return None
|
||||
|
||||
packet_length = struct.unpack("!I", header)[0]
|
||||
if packet_length < 2 or packet_length > _MAX_PACKET_LEN:
|
||||
return None
|
||||
|
||||
rest = _recv_exact(sock, packet_length)
|
||||
if rest is None:
|
||||
return None
|
||||
|
||||
padding_length = rest[0]
|
||||
payload_length = packet_length - padding_length - 1
|
||||
if payload_length < 1 or payload_length > len(rest) - 1:
|
||||
return None
|
||||
|
||||
return rest[1 : 1 + payload_length]
|
||||
|
||||
|
||||
def _recv_exact(sock: socket.socket, n: int) -> bytes | None:
|
||||
"""Read exactly n bytes from socket, or None on failure."""
|
||||
buf = b""
|
||||
while len(buf) < n:
|
||||
try:
|
||||
chunk = sock.recv(n - len(buf))
|
||||
except (OSError, socket.timeout, TimeoutError):
|
||||
return None
|
||||
if not chunk:
|
||||
return None
|
||||
buf += chunk
|
||||
return buf
|
||||
|
||||
|
||||
# ─── KEX_INIT parsing ──────────────────────────────────────────────────────
|
||||
|
||||
def _parse_kex_init(payload: bytes) -> dict[str, str] | None:
|
||||
"""
|
||||
Parse SSH_MSG_KEXINIT payload and extract the 10 name-list fields.
|
||||
|
||||
Payload layout:
|
||||
byte SSH_MSG_KEXINIT (20)
|
||||
byte[16] cookie
|
||||
10 × name-list:
|
||||
uint32 length
|
||||
byte[] utf-8 string (comma-separated algorithm names)
|
||||
bool first_kex_packet_follows
|
||||
uint32 reserved
|
||||
|
||||
Returns dict with keys: kex_algorithms, server_host_key_algorithms,
|
||||
encryption_client_to_server, encryption_server_to_client,
|
||||
mac_client_to_server, mac_server_to_client,
|
||||
compression_client_to_server, compression_server_to_client,
|
||||
languages_client_to_server, languages_server_to_client.
|
||||
"""
|
||||
if len(payload) < 1 + _KEX_INIT_COOKIE_LEN + 4:
|
||||
return None
|
||||
|
||||
offset = 1 + _KEX_INIT_COOKIE_LEN # skip type byte + cookie
|
||||
|
||||
field_names = [
|
||||
"kex_algorithms",
|
||||
"server_host_key_algorithms",
|
||||
"encryption_client_to_server",
|
||||
"encryption_server_to_client",
|
||||
"mac_client_to_server",
|
||||
"mac_server_to_client",
|
||||
"compression_client_to_server",
|
||||
"compression_server_to_client",
|
||||
"languages_client_to_server",
|
||||
"languages_server_to_client",
|
||||
]
|
||||
|
||||
fields: dict[str, str] = {}
|
||||
for name in field_names:
|
||||
if offset + 4 > len(payload):
|
||||
return None
|
||||
length = struct.unpack("!I", payload[offset : offset + 4])[0]
|
||||
offset += 4
|
||||
if offset + length > len(payload):
|
||||
return None
|
||||
fields[name] = payload[offset : offset + length].decode(
|
||||
"utf-8", errors="replace"
|
||||
)
|
||||
offset += length
|
||||
|
||||
return fields
|
||||
|
||||
|
||||
# ─── HASSH computation ──────────────────────────────────────────────────────
|
||||
|
||||
def _compute_hassh(kex: str, enc: str, mac: str, comp: str) -> str:
|
||||
"""
|
||||
Compute HASSHServer hash: MD5 of "kex;enc_s2c;mac_s2c;comp_s2c".
|
||||
|
||||
Returns 32-character lowercase hex digest.
|
||||
"""
|
||||
raw = f"{kex};{enc};{mac};{comp}"
|
||||
return hashlib.md5(raw.encode("utf-8"), usedforsecurity=False).hexdigest()
|
||||
|
||||
|
||||
# ─── Public API ─────────────────────────────────────────────────────────────
|
||||
|
||||
@_traced("prober.hassh_server")
|
||||
def hassh_server(
|
||||
host: str,
|
||||
port: int,
|
||||
timeout: float = 5.0,
|
||||
) -> dict[str, Any] | None:
|
||||
"""
|
||||
Connect to an SSH server and compute its HASSHServer fingerprint.
|
||||
|
||||
Returns a dict with the hash, banner, and raw algorithm fields,
|
||||
or None if the host is not running an SSH server on the given port.
|
||||
"""
|
||||
result = _ssh_connect(host, port, timeout)
|
||||
if result is None:
|
||||
return None
|
||||
|
||||
banner, payload = result
|
||||
fields = _parse_kex_init(payload)
|
||||
if fields is None:
|
||||
return None
|
||||
|
||||
kex = fields["kex_algorithms"]
|
||||
enc = fields["encryption_server_to_client"]
|
||||
mac = fields["mac_server_to_client"]
|
||||
comp = fields["compression_server_to_client"]
|
||||
|
||||
return {
|
||||
"hassh_server": _compute_hassh(kex, enc, mac, comp),
|
||||
"banner": banner,
|
||||
"kex_algorithms": kex,
|
||||
"encryption_s2c": enc,
|
||||
"mac_s2c": mac,
|
||||
"compression_s2c": comp,
|
||||
}
|
||||
506
decnet/prober/jarm.py
Normal file
506
decnet/prober/jarm.py
Normal file
@@ -0,0 +1,506 @@
|
||||
"""
|
||||
JARM TLS fingerprinting — pure stdlib implementation.
|
||||
|
||||
JARM sends 10 crafted TLS ClientHello packets to a target, each varying
|
||||
TLS version, cipher suite order, extensions, and ALPN values. The
|
||||
ServerHello responses are parsed and hashed to produce a 62-character
|
||||
fingerprint that identifies the TLS server implementation.
|
||||
|
||||
Reference: https://github.com/salesforce/jarm
|
||||
|
||||
Only DECNET import is decnet.telemetry for tracing (zero-cost when disabled).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import socket
|
||||
import struct
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
# ─── Constants ────────────────────────────────────────────────────────────────
|
||||
|
||||
JARM_EMPTY_HASH = "0" * 62
|
||||
|
||||
_INTER_PROBE_DELAY = 0.1 # seconds between probes to avoid IDS triggers
|
||||
|
||||
# TLS version bytes
|
||||
_TLS_1_0 = b"\x03\x01"
|
||||
_TLS_1_1 = b"\x03\x02"
|
||||
_TLS_1_2 = b"\x03\x03"
|
||||
_TLS_1_3 = b"\x03\x03" # TLS 1.3 uses 0x0303 in record layer
|
||||
|
||||
# TLS record types
|
||||
_CONTENT_HANDSHAKE = 0x16
|
||||
_HANDSHAKE_CLIENT_HELLO = 0x01
|
||||
_HANDSHAKE_SERVER_HELLO = 0x02
|
||||
|
||||
# Extension types
|
||||
_EXT_SERVER_NAME = 0x0000
|
||||
_EXT_EC_POINT_FORMATS = 0x000B
|
||||
_EXT_SUPPORTED_GROUPS = 0x000A
|
||||
_EXT_SESSION_TICKET = 0x0023
|
||||
_EXT_ENCRYPT_THEN_MAC = 0x0016
|
||||
_EXT_EXTENDED_MASTER_SECRET = 0x0017
|
||||
_EXT_SIGNATURE_ALGORITHMS = 0x000D
|
||||
_EXT_SUPPORTED_VERSIONS = 0x002B
|
||||
_EXT_PSK_KEY_EXCHANGE_MODES = 0x002D
|
||||
_EXT_KEY_SHARE = 0x0033
|
||||
_EXT_ALPN = 0x0010
|
||||
_EXT_PADDING = 0x0015
|
||||
|
||||
# ─── Cipher suite lists per JARM spec ────────────────────────────────────────
|
||||
|
||||
# Forward cipher order (standard)
|
||||
_CIPHERS_FORWARD = [
|
||||
0x0016, 0x0033, 0x0067, 0xC09E, 0xC0A2, 0x009E, 0x0039, 0x006B,
|
||||
0xC09F, 0xC0A3, 0x009F, 0x0045, 0x00BE, 0x0088, 0x00C4, 0x009A,
|
||||
0xC008, 0xC009, 0xC023, 0xC0AC, 0xC0AE, 0xC02B, 0xC00A, 0xC024,
|
||||
0xC0AD, 0xC0AF, 0xC02C, 0xC072, 0xC073, 0xCCA8, 0x1301, 0x1302,
|
||||
0x1303, 0xC013, 0xC014, 0xC02F, 0x009C, 0xC02E, 0x002F, 0x0035,
|
||||
0x000A, 0x0005, 0x0004,
|
||||
]
|
||||
|
||||
# Reverse cipher order
|
||||
_CIPHERS_REVERSE = list(reversed(_CIPHERS_FORWARD))
|
||||
|
||||
# TLS 1.3-only ciphers
|
||||
_CIPHERS_TLS13 = [0x1301, 0x1302, 0x1303]
|
||||
|
||||
# Middle-out cipher order (interleaved from center)
|
||||
def _middle_out(lst: list[int]) -> list[int]:
|
||||
result: list[int] = []
|
||||
mid = len(lst) // 2
|
||||
for i in range(mid + 1):
|
||||
if mid + i < len(lst):
|
||||
result.append(lst[mid + i])
|
||||
if mid - i >= 0 and mid - i != mid + i:
|
||||
result.append(lst[mid - i])
|
||||
return result
|
||||
|
||||
_CIPHERS_MIDDLE_OUT = _middle_out(_CIPHERS_FORWARD)
|
||||
|
||||
# Rare/uncommon extensions cipher list
|
||||
_CIPHERS_RARE = [
|
||||
0x0016, 0x0033, 0xC011, 0xC012, 0x0067, 0xC09E, 0xC0A2, 0x009E,
|
||||
0x0039, 0x006B, 0xC09F, 0xC0A3, 0x009F, 0x0045, 0x00BE, 0x0088,
|
||||
0x00C4, 0x009A, 0xC008, 0xC009, 0xC023, 0xC0AC, 0xC0AE, 0xC02B,
|
||||
0xC00A, 0xC024, 0xC0AD, 0xC0AF, 0xC02C, 0xC072, 0xC073, 0xCCA8,
|
||||
0x1301, 0x1302, 0x1303, 0xC013, 0xC014, 0xC02F, 0x009C, 0xC02E,
|
||||
0x002F, 0x0035, 0x000A, 0x0005, 0x0004,
|
||||
]
|
||||
|
||||
|
||||
# ─── Probe definitions ────────────────────────────────────────────────────────
|
||||
|
||||
# Each probe: (tls_version, cipher_list, tls13_support, alpn, extensions_style)
|
||||
# tls_version: record-layer version bytes
|
||||
# cipher_list: which cipher suite ordering to use
|
||||
# tls13_support: whether to include TLS 1.3 extensions (supported_versions, key_share, psk)
|
||||
# alpn: ALPN protocol string or None
|
||||
# extensions_style: "standard", "rare", or "no_extensions"
|
||||
|
||||
_PROBE_CONFIGS: list[dict[str, Any]] = [
|
||||
# 0: TLS 1.2 forward
|
||||
{"version": _TLS_1_2, "ciphers": _CIPHERS_FORWARD, "tls13": False, "alpn": None, "style": "standard"},
|
||||
# 1: TLS 1.2 reverse
|
||||
{"version": _TLS_1_2, "ciphers": _CIPHERS_REVERSE, "tls13": False, "alpn": None, "style": "standard"},
|
||||
# 2: TLS 1.1 forward
|
||||
{"version": _TLS_1_1, "ciphers": _CIPHERS_FORWARD, "tls13": False, "alpn": None, "style": "standard"},
|
||||
# 3: TLS 1.3 forward
|
||||
{"version": _TLS_1_2, "ciphers": _CIPHERS_FORWARD, "tls13": True, "alpn": "h2", "style": "standard"},
|
||||
# 4: TLS 1.3 reverse
|
||||
{"version": _TLS_1_2, "ciphers": _CIPHERS_REVERSE, "tls13": True, "alpn": "h2", "style": "standard"},
|
||||
# 5: TLS 1.3 invalid (advertise 1.3 support but no key_share)
|
||||
{"version": _TLS_1_2, "ciphers": _CIPHERS_FORWARD, "tls13": "no_key_share", "alpn": None, "style": "standard"},
|
||||
# 6: TLS 1.3 middle-out
|
||||
{"version": _TLS_1_2, "ciphers": _CIPHERS_MIDDLE_OUT, "tls13": True, "alpn": None, "style": "standard"},
|
||||
# 7: TLS 1.0 forward
|
||||
{"version": _TLS_1_0, "ciphers": _CIPHERS_FORWARD, "tls13": False, "alpn": None, "style": "standard"},
|
||||
# 8: TLS 1.2 middle-out
|
||||
{"version": _TLS_1_2, "ciphers": _CIPHERS_MIDDLE_OUT, "tls13": False, "alpn": None, "style": "standard"},
|
||||
# 9: TLS 1.2 with rare extensions
|
||||
{"version": _TLS_1_2, "ciphers": _CIPHERS_RARE, "tls13": False, "alpn": "http/1.1", "style": "rare"},
|
||||
]
|
||||
|
||||
|
||||
# ─── Extension builders ──────────────────────────────────────────────────────
|
||||
|
||||
def _ext(ext_type: int, data: bytes) -> bytes:
|
||||
return struct.pack("!HH", ext_type, len(data)) + data
|
||||
|
||||
|
||||
def _ext_sni(host: str) -> bytes:
|
||||
host_bytes = host.encode("ascii")
|
||||
# ServerNameList: length(2) + ServerName: type(1) + length(2) + name
|
||||
sni_data = struct.pack("!HBH", len(host_bytes) + 3, 0, len(host_bytes)) + host_bytes
|
||||
return _ext(_EXT_SERVER_NAME, sni_data)
|
||||
|
||||
|
||||
def _ext_supported_groups() -> bytes:
|
||||
groups = [0x0017, 0x0018, 0x0019, 0x001D, 0x0100, 0x0101] # secp256r1, secp384r1, secp521r1, x25519, ffdhe2048, ffdhe3072
|
||||
data = struct.pack("!H", len(groups) * 2) + b"".join(struct.pack("!H", g) for g in groups)
|
||||
return _ext(_EXT_SUPPORTED_GROUPS, data)
|
||||
|
||||
|
||||
def _ext_ec_point_formats() -> bytes:
|
||||
formats = b"\x00" # uncompressed only
|
||||
return _ext(_EXT_EC_POINT_FORMATS, struct.pack("B", len(formats)) + formats)
|
||||
|
||||
|
||||
def _ext_signature_algorithms() -> bytes:
|
||||
algos = [
|
||||
0x0401, 0x0501, 0x0601, # RSA PKCS1 SHA256/384/512
|
||||
0x0201, # RSA PKCS1 SHA1
|
||||
0x0403, 0x0503, 0x0603, # ECDSA SHA256/384/512
|
||||
0x0203, # ECDSA SHA1
|
||||
0x0804, 0x0805, 0x0806, # RSA-PSS SHA256/384/512
|
||||
]
|
||||
data = struct.pack("!H", len(algos) * 2) + b"".join(struct.pack("!H", a) for a in algos)
|
||||
return _ext(_EXT_SIGNATURE_ALGORITHMS, data)
|
||||
|
||||
|
||||
def _ext_supported_versions_13() -> bytes:
|
||||
versions = [0x0304, 0x0303] # TLS 1.3, 1.2
|
||||
data = struct.pack("B", len(versions) * 2) + b"".join(struct.pack("!H", v) for v in versions)
|
||||
return _ext(_EXT_SUPPORTED_VERSIONS, data)
|
||||
|
||||
|
||||
def _ext_psk_key_exchange_modes() -> bytes:
|
||||
return _ext(_EXT_PSK_KEY_EXCHANGE_MODES, b"\x01\x01") # psk_dhe_ke
|
||||
|
||||
|
||||
def _ext_key_share() -> bytes:
|
||||
# x25519 key share with 32 random-looking bytes
|
||||
key_data = b"\x00" * 32
|
||||
entry = struct.pack("!HH", 0x001D, 32) + key_data # x25519 group
|
||||
data = struct.pack("!H", len(entry)) + entry
|
||||
return _ext(_EXT_KEY_SHARE, data)
|
||||
|
||||
|
||||
def _ext_alpn(protocol: str) -> bytes:
|
||||
proto_bytes = protocol.encode("ascii")
|
||||
proto_entry = struct.pack("B", len(proto_bytes)) + proto_bytes
|
||||
data = struct.pack("!H", len(proto_entry)) + proto_entry
|
||||
return _ext(_EXT_ALPN, data)
|
||||
|
||||
|
||||
def _ext_session_ticket() -> bytes:
|
||||
return _ext(_EXT_SESSION_TICKET, b"")
|
||||
|
||||
|
||||
def _ext_encrypt_then_mac() -> bytes:
|
||||
return _ext(_EXT_ENCRYPT_THEN_MAC, b"")
|
||||
|
||||
|
||||
def _ext_extended_master_secret() -> bytes:
|
||||
return _ext(_EXT_EXTENDED_MASTER_SECRET, b"")
|
||||
|
||||
|
||||
def _ext_padding(target_length: int, current_length: int) -> bytes:
|
||||
pad_needed = target_length - current_length - 4 # 4 bytes for ext type + length
|
||||
if pad_needed < 0:
|
||||
return b""
|
||||
return _ext(_EXT_PADDING, b"\x00" * pad_needed)
|
||||
|
||||
|
||||
# ─── ClientHello builder ─────────────────────────────────────────────────────
|
||||
|
||||
def _build_client_hello(probe_index: int, host: str = "localhost") -> bytes:
|
||||
"""
|
||||
Construct one of 10 JARM-specified ClientHello packets.
|
||||
|
||||
Args:
|
||||
probe_index: 0-9, selects the probe configuration
|
||||
host: target hostname for SNI extension
|
||||
|
||||
Returns:
|
||||
Complete TLS record bytes ready to send on the wire.
|
||||
"""
|
||||
cfg = _PROBE_CONFIGS[probe_index]
|
||||
version: bytes = cfg["version"]
|
||||
ciphers: list[int] = cfg["ciphers"]
|
||||
tls13 = cfg["tls13"]
|
||||
alpn: str | None = cfg["alpn"]
|
||||
|
||||
# Random (32 bytes)
|
||||
random_bytes = b"\x00" * 32
|
||||
|
||||
# Session ID (32 bytes, all zeros)
|
||||
session_id = b"\x00" * 32
|
||||
|
||||
# Cipher suites
|
||||
cipher_bytes = b"".join(struct.pack("!H", c) for c in ciphers)
|
||||
cipher_data = struct.pack("!H", len(cipher_bytes)) + cipher_bytes
|
||||
|
||||
# Compression methods (null only)
|
||||
compression = b"\x01\x00"
|
||||
|
||||
# Extensions
|
||||
extensions = b""
|
||||
extensions += _ext_sni(host)
|
||||
extensions += _ext_supported_groups()
|
||||
extensions += _ext_ec_point_formats()
|
||||
extensions += _ext_session_ticket()
|
||||
extensions += _ext_encrypt_then_mac()
|
||||
extensions += _ext_extended_master_secret()
|
||||
extensions += _ext_signature_algorithms()
|
||||
|
||||
if tls13 == True: # noqa: E712
|
||||
extensions += _ext_supported_versions_13()
|
||||
extensions += _ext_psk_key_exchange_modes()
|
||||
extensions += _ext_key_share()
|
||||
elif tls13 == "no_key_share":
|
||||
extensions += _ext_supported_versions_13()
|
||||
extensions += _ext_psk_key_exchange_modes()
|
||||
# Intentionally omit key_share
|
||||
|
||||
if alpn:
|
||||
extensions += _ext_alpn(alpn)
|
||||
|
||||
ext_data = struct.pack("!H", len(extensions)) + extensions
|
||||
|
||||
# ClientHello body
|
||||
body = (
|
||||
version # client_version (2)
|
||||
+ random_bytes # random (32)
|
||||
+ struct.pack("B", len(session_id)) + session_id # session_id
|
||||
+ cipher_data # cipher_suites
|
||||
+ compression # compression_methods
|
||||
+ ext_data # extensions
|
||||
)
|
||||
|
||||
# Handshake header: type(1) + length(3)
|
||||
handshake = struct.pack("B", _HANDSHAKE_CLIENT_HELLO) + struct.pack("!I", len(body))[1:] + body
|
||||
|
||||
# TLS record header: type(1) + version(2) + length(2)
|
||||
record = struct.pack("B", _CONTENT_HANDSHAKE) + _TLS_1_0 + struct.pack("!H", len(handshake)) + handshake
|
||||
|
||||
return record
|
||||
|
||||
|
||||
# ─── ServerHello parser ──────────────────────────────────────────────────────
|
||||
|
||||
def _parse_server_hello(data: bytes) -> str:
|
||||
"""
|
||||
Extract cipher suite and TLS version from a ServerHello response.
|
||||
|
||||
Returns a pipe-delimited string "cipher|version|extensions" that forms
|
||||
one component of the JARM hash, or "|||" on parse failure.
|
||||
"""
|
||||
try:
|
||||
if len(data) < 6:
|
||||
return "|||"
|
||||
|
||||
# TLS record header
|
||||
if data[0] != _CONTENT_HANDSHAKE:
|
||||
return "|||"
|
||||
|
||||
struct.unpack_from("!H", data, 1)[0] # record_version (unused)
|
||||
record_len = struct.unpack_from("!H", data, 3)[0]
|
||||
hs = data[5: 5 + record_len]
|
||||
|
||||
if len(hs) < 4:
|
||||
return "|||"
|
||||
|
||||
# Handshake header
|
||||
if hs[0] != _HANDSHAKE_SERVER_HELLO:
|
||||
return "|||"
|
||||
|
||||
hs_len = struct.unpack_from("!I", b"\x00" + hs[1:4])[0]
|
||||
body = hs[4: 4 + hs_len]
|
||||
|
||||
if len(body) < 34:
|
||||
return "|||"
|
||||
|
||||
pos = 0
|
||||
# Server version
|
||||
server_version = struct.unpack_from("!H", body, pos)[0]
|
||||
pos += 2
|
||||
|
||||
# Random (32 bytes)
|
||||
pos += 32
|
||||
|
||||
# Session ID
|
||||
if pos >= len(body):
|
||||
return "|||"
|
||||
sid_len = body[pos]
|
||||
pos += 1 + sid_len
|
||||
|
||||
# Cipher suite
|
||||
if pos + 2 > len(body):
|
||||
return "|||"
|
||||
cipher = struct.unpack_from("!H", body, pos)[0]
|
||||
pos += 2
|
||||
|
||||
# Compression method
|
||||
if pos >= len(body):
|
||||
return "|||"
|
||||
pos += 1
|
||||
|
||||
# Parse extensions for supported_versions (to detect actual TLS 1.3)
|
||||
actual_version = server_version
|
||||
extensions_str = ""
|
||||
if pos + 2 <= len(body):
|
||||
ext_total = struct.unpack_from("!H", body, pos)[0]
|
||||
pos += 2
|
||||
ext_end = pos + ext_total
|
||||
ext_types: list[str] = []
|
||||
while pos + 4 <= ext_end and pos + 4 <= len(body):
|
||||
ext_type = struct.unpack_from("!H", body, pos)[0]
|
||||
ext_len = struct.unpack_from("!H", body, pos + 2)[0]
|
||||
ext_types.append(f"{ext_type:04x}")
|
||||
|
||||
if ext_type == _EXT_SUPPORTED_VERSIONS and ext_len >= 2:
|
||||
actual_version = struct.unpack_from("!H", body, pos + 4)[0]
|
||||
|
||||
pos += 4 + ext_len
|
||||
extensions_str = "-".join(ext_types)
|
||||
|
||||
version_str = _version_to_str(actual_version)
|
||||
cipher_str = f"{cipher:04x}"
|
||||
|
||||
return f"{cipher_str}|{version_str}|{extensions_str}"
|
||||
|
||||
except Exception:
|
||||
return "|||"
|
||||
|
||||
|
||||
def _version_to_str(version: int) -> str:
|
||||
return {
|
||||
0x0304: "tls13",
|
||||
0x0303: "tls12",
|
||||
0x0302: "tls11",
|
||||
0x0301: "tls10",
|
||||
0x0300: "ssl30",
|
||||
}.get(version, f"{version:04x}")
|
||||
|
||||
|
||||
# ─── Probe sender ────────────────────────────────────────────────────────────
|
||||
|
||||
@_traced("prober.jarm_send_probe")
|
||||
def _send_probe(host: str, port: int, hello: bytes, timeout: float = 5.0) -> bytes | None:
|
||||
"""
|
||||
Open a TCP connection, send the ClientHello, and read the ServerHello.
|
||||
|
||||
Returns raw response bytes or None on any failure.
|
||||
"""
|
||||
try:
|
||||
sock = socket.create_connection((host, port), timeout=timeout)
|
||||
try:
|
||||
sock.sendall(hello)
|
||||
sock.settimeout(timeout)
|
||||
response = b""
|
||||
while True:
|
||||
chunk = sock.recv(1484)
|
||||
if not chunk:
|
||||
break
|
||||
response += chunk
|
||||
# We only need the first TLS record (ServerHello)
|
||||
if len(response) >= 5:
|
||||
record_len = struct.unpack_from("!H", response, 3)[0]
|
||||
if len(response) >= 5 + record_len:
|
||||
break
|
||||
return response if response else None
|
||||
finally:
|
||||
sock.close()
|
||||
except (OSError, socket.error, socket.timeout):
|
||||
return None
|
||||
|
||||
|
||||
# ─── JARM hash computation ───────────────────────────────────────────────────
|
||||
|
||||
def _compute_jarm(responses: list[str]) -> str:
|
||||
"""
|
||||
Compute the final 62-character JARM hash from 10 probe response strings.
|
||||
|
||||
The first 30 characters are the raw cipher/version concatenation.
|
||||
The remaining 32 characters are a truncated SHA256 of the extensions.
|
||||
"""
|
||||
if all(r == "|||" for r in responses):
|
||||
return JARM_EMPTY_HASH
|
||||
|
||||
# Build the fuzzy hash
|
||||
raw_parts: list[str] = []
|
||||
ext_parts: list[str] = []
|
||||
|
||||
for r in responses:
|
||||
parts = r.split("|")
|
||||
if len(parts) >= 3 and parts[0] != "":
|
||||
cipher = parts[0]
|
||||
version = parts[1]
|
||||
extensions = parts[2] if len(parts) > 2 else ""
|
||||
|
||||
# Map version to single char
|
||||
ver_char = {
|
||||
"tls13": "d", "tls12": "c", "tls11": "b",
|
||||
"tls10": "a", "ssl30": "0",
|
||||
}.get(version, "0")
|
||||
|
||||
raw_parts.append(f"{cipher}{ver_char}")
|
||||
ext_parts.append(extensions)
|
||||
else:
|
||||
raw_parts.append("000")
|
||||
ext_parts.append("")
|
||||
|
||||
# First 30 chars: cipher(4) + version(1) = 5 chars * 10 probes = 50... no
|
||||
# JARM spec: first part is c|v per probe joined, then SHA256 of extensions
|
||||
# Actual format: each response contributes 3 chars (cipher_first2 + ver_char)
|
||||
# to the first 30, then all extensions hashed for the remaining 32.
|
||||
|
||||
fuzzy_raw = ""
|
||||
for r in responses:
|
||||
parts = r.split("|")
|
||||
if len(parts) >= 3 and parts[0] != "":
|
||||
cipher = parts[0] # 4-char hex
|
||||
version = parts[1]
|
||||
ver_char = {
|
||||
"tls13": "d", "tls12": "c", "tls11": "b",
|
||||
"tls10": "a", "ssl30": "0",
|
||||
}.get(version, "0")
|
||||
fuzzy_raw += f"{cipher[0:2]}{ver_char}"
|
||||
else:
|
||||
fuzzy_raw += "000"
|
||||
|
||||
# fuzzy_raw is 30 chars (3 * 10)
|
||||
ext_str = ",".join(ext_parts)
|
||||
ext_hash = hashlib.sha256(ext_str.encode()).hexdigest()[:32]
|
||||
|
||||
return fuzzy_raw + ext_hash
|
||||
|
||||
|
||||
# ─── Public API ──────────────────────────────────────────────────────────────
|
||||
|
||||
@_traced("prober.jarm_hash")
|
||||
def jarm_hash(host: str, port: int, timeout: float = 5.0) -> str:
|
||||
"""
|
||||
Compute the JARM fingerprint for a TLS server.
|
||||
|
||||
Sends 10 crafted ClientHello packets and hashes the responses.
|
||||
|
||||
Args:
|
||||
host: target IP or hostname
|
||||
port: target port
|
||||
timeout: per-probe TCP timeout in seconds
|
||||
|
||||
Returns:
|
||||
62-character JARM hash string, or all-zeros on total failure.
|
||||
"""
|
||||
responses: list[str] = []
|
||||
|
||||
for i in range(10):
|
||||
hello = _build_client_hello(i, host=host)
|
||||
raw = _send_probe(host, port, hello, timeout=timeout)
|
||||
if raw is not None:
|
||||
parsed = _parse_server_hello(raw)
|
||||
responses.append(parsed)
|
||||
else:
|
||||
responses.append("|||")
|
||||
|
||||
if i < 9:
|
||||
time.sleep(_INTER_PROBE_DELAY)
|
||||
|
||||
return _compute_jarm(responses)
|
||||
227
decnet/prober/tcpfp.py
Normal file
227
decnet/prober/tcpfp.py
Normal file
@@ -0,0 +1,227 @@
|
||||
"""
|
||||
TCP/IP stack fingerprinting via SYN-ACK analysis.
|
||||
|
||||
Sends a crafted TCP SYN packet to a target host:port, captures the
|
||||
SYN-ACK response, and extracts OS/tool-identifying characteristics:
|
||||
TTL, window size, DF bit, MSS, window scale, SACK support, timestamps,
|
||||
and TCP options ordering.
|
||||
|
||||
Uses scapy for packet crafting and parsing. Requires root/CAP_NET_RAW.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import random
|
||||
from typing import Any
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
# Lazy-import scapy to avoid breaking non-root usage of HASSH/JARM.
|
||||
# The actual import happens inside functions that need it.
|
||||
|
||||
# ─── TCP option short codes ─────────────────────────────────────────────────
|
||||
|
||||
_OPT_CODES: dict[str, str] = {
|
||||
"MSS": "M",
|
||||
"WScale": "W",
|
||||
"SAckOK": "S",
|
||||
"SAck": "S",
|
||||
"Timestamp": "T",
|
||||
"NOP": "N",
|
||||
"EOL": "E",
|
||||
"AltChkSum": "A",
|
||||
"AltChkSumOpt": "A",
|
||||
"UTO": "U",
|
||||
}
|
||||
|
||||
|
||||
# ─── Packet construction ───────────────────────────────────────────────────
|
||||
|
||||
@_traced("prober.tcpfp_send_syn")
|
||||
def _send_syn(
|
||||
host: str,
|
||||
port: int,
|
||||
timeout: float,
|
||||
) -> Any | None:
|
||||
"""
|
||||
Craft a TCP SYN with common options and send it. Returns the
|
||||
SYN-ACK response packet or None on timeout/failure.
|
||||
"""
|
||||
from scapy.all import IP, TCP, conf, sr1
|
||||
|
||||
# Suppress scapy's noisy output
|
||||
conf.verb = 0
|
||||
|
||||
src_port = random.randint(49152, 65535) # nosec B311 — ephemeral port, not crypto
|
||||
|
||||
pkt = (
|
||||
IP(dst=host)
|
||||
/ TCP(
|
||||
sport=src_port,
|
||||
dport=port,
|
||||
flags="S",
|
||||
options=[
|
||||
("MSS", 1460),
|
||||
("NOP", None),
|
||||
("WScale", 7),
|
||||
("NOP", None),
|
||||
("NOP", None),
|
||||
("Timestamp", (0, 0)),
|
||||
("SAckOK", b""),
|
||||
("EOL", None),
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
resp = sr1(pkt, timeout=timeout, verbose=0)
|
||||
except (OSError, PermissionError):
|
||||
return None
|
||||
|
||||
if resp is None:
|
||||
return None
|
||||
|
||||
# Verify it's a SYN-ACK (flags == 0x12)
|
||||
from scapy.all import TCP as TCPLayer
|
||||
if not resp.haslayer(TCPLayer):
|
||||
return None
|
||||
if resp[TCPLayer].flags != 0x12: # SYN-ACK
|
||||
return None
|
||||
|
||||
# Send RST to clean up half-open connection
|
||||
_send_rst(host, port, src_port, resp)
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
def _send_rst(
|
||||
host: str,
|
||||
dport: int,
|
||||
sport: int,
|
||||
resp: Any,
|
||||
) -> None:
|
||||
"""Send RST to clean up the half-open connection."""
|
||||
try:
|
||||
from scapy.all import IP, TCP, send
|
||||
rst = (
|
||||
IP(dst=host)
|
||||
/ TCP(
|
||||
sport=sport,
|
||||
dport=dport,
|
||||
flags="R",
|
||||
seq=resp.ack,
|
||||
)
|
||||
)
|
||||
send(rst, verbose=0)
|
||||
except Exception: # nosec B110 — best-effort RST cleanup
|
||||
pass
|
||||
|
||||
|
||||
# ─── Response parsing ───────────────────────────────────────────────────────
|
||||
|
||||
def _parse_synack(resp: Any) -> dict[str, Any]:
|
||||
"""
|
||||
Extract fingerprint fields from a scapy SYN-ACK response packet.
|
||||
"""
|
||||
from scapy.all import IP, TCP
|
||||
|
||||
ip_layer = resp[IP]
|
||||
tcp_layer = resp[TCP]
|
||||
|
||||
# IP fields
|
||||
ttl = ip_layer.ttl
|
||||
df_bit = 1 if (ip_layer.flags & 0x2) else 0 # DF = bit 1
|
||||
ip_id = ip_layer.id
|
||||
|
||||
# TCP fields
|
||||
window_size = tcp_layer.window
|
||||
|
||||
# Parse TCP options
|
||||
mss = 0
|
||||
window_scale = -1
|
||||
sack_ok = 0
|
||||
timestamp = 0
|
||||
options_order = _extract_options_order(tcp_layer.options)
|
||||
|
||||
for opt_name, opt_value in tcp_layer.options:
|
||||
if opt_name == "MSS":
|
||||
mss = opt_value
|
||||
elif opt_name == "WScale":
|
||||
window_scale = opt_value
|
||||
elif opt_name in ("SAckOK", "SAck"):
|
||||
sack_ok = 1
|
||||
elif opt_name == "Timestamp":
|
||||
timestamp = 1
|
||||
|
||||
return {
|
||||
"ttl": ttl,
|
||||
"window_size": window_size,
|
||||
"df_bit": df_bit,
|
||||
"ip_id": ip_id,
|
||||
"mss": mss,
|
||||
"window_scale": window_scale,
|
||||
"sack_ok": sack_ok,
|
||||
"timestamp": timestamp,
|
||||
"options_order": options_order,
|
||||
}
|
||||
|
||||
|
||||
def _extract_options_order(options: list[tuple[str, Any]]) -> str:
|
||||
"""
|
||||
Map scapy TCP option tuples to a short-code string.
|
||||
|
||||
E.g. [("MSS", 1460), ("NOP", None), ("WScale", 7)] → "M,N,W"
|
||||
"""
|
||||
codes = []
|
||||
for opt_name, _ in options:
|
||||
code = _OPT_CODES.get(opt_name, "?")
|
||||
codes.append(code)
|
||||
return ",".join(codes)
|
||||
|
||||
|
||||
# ─── Fingerprint computation ───────────────────────────────────────────────
|
||||
|
||||
def _compute_fingerprint(fields: dict[str, Any]) -> tuple[str, str]:
|
||||
"""
|
||||
Compute fingerprint raw string and SHA256 hash from parsed fields.
|
||||
|
||||
Returns (raw_string, hash_hex_32).
|
||||
"""
|
||||
raw = (
|
||||
f"{fields['ttl']}:{fields['window_size']}:{fields['df_bit']}:"
|
||||
f"{fields['mss']}:{fields['window_scale']}:{fields['sack_ok']}:"
|
||||
f"{fields['timestamp']}:{fields['options_order']}"
|
||||
)
|
||||
h = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32]
|
||||
return raw, h
|
||||
|
||||
|
||||
# ─── Public API ─────────────────────────────────────────────────────────────
|
||||
|
||||
@_traced("prober.tcp_fingerprint")
|
||||
def tcp_fingerprint(
|
||||
host: str,
|
||||
port: int,
|
||||
timeout: float = 5.0,
|
||||
) -> dict[str, Any] | None:
|
||||
"""
|
||||
Send a TCP SYN to host:port and fingerprint the SYN-ACK response.
|
||||
|
||||
Returns a dict with the hash, raw fingerprint string, and individual
|
||||
fields, or None if no SYN-ACK was received.
|
||||
|
||||
Requires root/CAP_NET_RAW.
|
||||
"""
|
||||
resp = _send_syn(host, port, timeout)
|
||||
if resp is None:
|
||||
return None
|
||||
|
||||
fields = _parse_synack(resp)
|
||||
raw, h = _compute_fingerprint(fields)
|
||||
|
||||
return {
|
||||
"tcpfp_hash": h,
|
||||
"tcpfp_raw": raw,
|
||||
**fields,
|
||||
}
|
||||
478
decnet/prober/worker.py
Normal file
478
decnet/prober/worker.py
Normal file
@@ -0,0 +1,478 @@
|
||||
"""
|
||||
DECNET-PROBER standalone worker.
|
||||
|
||||
Runs as a detached host-level process. Discovers attacker IPs by tailing the
|
||||
collector's JSON log file, then fingerprints them via multiple active probes:
|
||||
- JARM (TLS server fingerprinting)
|
||||
- HASSHServer (SSH server fingerprinting)
|
||||
- TCP/IP stack fingerprinting (OS/tool identification)
|
||||
|
||||
Results are written as RFC 5424 syslog + JSON to the same log files.
|
||||
|
||||
Target discovery is fully automatic — every unique attacker IP seen in the
|
||||
log stream gets probed. No manual target list required.
|
||||
|
||||
Tech debt: writing directly to the collector's log files couples the
|
||||
prober to the collector's file format. A future refactor should introduce
|
||||
a shared log-sink abstraction.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.prober.hassh import hassh_server
|
||||
from decnet.prober.jarm import JARM_EMPTY_HASH, jarm_hash
|
||||
from decnet.prober.tcpfp import tcp_fingerprint
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
logger = get_logger("prober")
|
||||
|
||||
# ─── Default ports per probe type ───────────────────────────────────────────
|
||||
|
||||
# JARM: common C2 callback / TLS server ports
|
||||
DEFAULT_PROBE_PORTS: list[int] = [
|
||||
443, 8443, 8080, 4443, 50050, 2222, 993, 995, 8888, 9001,
|
||||
]
|
||||
|
||||
# HASSHServer: common SSH server ports
|
||||
DEFAULT_SSH_PORTS: list[int] = [22, 2222, 22222, 2022]
|
||||
|
||||
# TCP/IP stack: probe on ports commonly open on attacker machines.
|
||||
# Wide spread gives the best chance of a SYN-ACK for TTL/fingerprint extraction.
|
||||
DEFAULT_TCPFP_PORTS: list[int] = [22, 80, 443, 8080, 8443, 445, 3389]
|
||||
|
||||
# ─── RFC 5424 formatting (inline, mirrors templates/*/decnet_logging.py) ─────
|
||||
|
||||
_FACILITY_LOCAL0 = 16
|
||||
_SD_ID = "relay@55555"
|
||||
_SEVERITY_INFO = 6
|
||||
_SEVERITY_WARNING = 4
|
||||
|
||||
_MAX_HOSTNAME = 255
|
||||
_MAX_APPNAME = 48
|
||||
_MAX_MSGID = 32
|
||||
|
||||
|
||||
def _sd_escape(value: str) -> str:
|
||||
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
|
||||
|
||||
|
||||
def _sd_element(fields: dict[str, Any]) -> str:
|
||||
if not fields:
|
||||
return "-"
|
||||
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
|
||||
return f"[{_SD_ID} {params}]"
|
||||
|
||||
|
||||
def _syslog_line(
|
||||
event_type: str,
|
||||
severity: int = _SEVERITY_INFO,
|
||||
msg: str | None = None,
|
||||
**fields: Any,
|
||||
) -> str:
|
||||
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
|
||||
ts = datetime.now(timezone.utc).isoformat()
|
||||
hostname = "decnet-prober"
|
||||
appname = "prober"
|
||||
msgid = (event_type or "-")[:_MAX_MSGID]
|
||||
sd = _sd_element(fields)
|
||||
message = f" {msg}" if msg else ""
|
||||
return f"{pri}1 {ts} {hostname} {appname} - {msgid} {sd}{message}"
|
||||
|
||||
|
||||
# ─── RFC 5424 parser (subset of collector's, for JSON generation) ─────────────
|
||||
|
||||
_RFC5424_RE = re.compile(
|
||||
r"^<\d+>1 "
|
||||
r"(\S+) " # 1: TIMESTAMP
|
||||
r"(\S+) " # 2: HOSTNAME
|
||||
r"(\S+) " # 3: APP-NAME
|
||||
r"- " # PROCID
|
||||
r"(\S+) " # 4: MSGID (event_type)
|
||||
r"(.+)$", # 5: SD + MSG
|
||||
)
|
||||
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip", "target_ip")
|
||||
|
||||
|
||||
def _parse_to_json(line: str) -> dict[str, Any] | None:
|
||||
m = _RFC5424_RE.match(line)
|
||||
if not m:
|
||||
return None
|
||||
ts_raw, decky, service, event_type, sd_rest = m.groups()
|
||||
|
||||
fields: dict[str, str] = {}
|
||||
msg = ""
|
||||
|
||||
if sd_rest.startswith("["):
|
||||
block = _SD_BLOCK_RE.search(sd_rest)
|
||||
if block:
|
||||
for k, v in _PARAM_RE.findall(block.group(1)):
|
||||
fields[k] = v.replace('\\"', '"').replace("\\\\", "\\").replace("\\]", "]")
|
||||
msg_match = re.search(r'\]\s+(.+)$', sd_rest)
|
||||
if msg_match:
|
||||
msg = msg_match.group(1).strip()
|
||||
|
||||
attacker_ip = "Unknown"
|
||||
for fname in _IP_FIELDS:
|
||||
if fname in fields:
|
||||
attacker_ip = fields[fname]
|
||||
break
|
||||
|
||||
try:
|
||||
ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
ts_formatted = ts_raw
|
||||
|
||||
return {
|
||||
"timestamp": ts_formatted,
|
||||
"decky": decky,
|
||||
"service": service,
|
||||
"event_type": event_type,
|
||||
"attacker_ip": attacker_ip,
|
||||
"fields": fields,
|
||||
"msg": msg,
|
||||
"raw_line": line,
|
||||
}
|
||||
|
||||
|
||||
# ─── Log writer ──────────────────────────────────────────────────────────────
|
||||
|
||||
def _write_event(
|
||||
log_path: Path,
|
||||
json_path: Path,
|
||||
event_type: str,
|
||||
severity: int = _SEVERITY_INFO,
|
||||
msg: str | None = None,
|
||||
**fields: Any,
|
||||
) -> None:
|
||||
line = _syslog_line(event_type, severity=severity, msg=msg, **fields)
|
||||
|
||||
with open(log_path, "a", encoding="utf-8") as f:
|
||||
f.write(line + "\n")
|
||||
f.flush()
|
||||
|
||||
parsed = _parse_to_json(line)
|
||||
if parsed:
|
||||
with open(json_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(parsed) + "\n")
|
||||
f.flush()
|
||||
|
||||
|
||||
# ─── Target discovery from log stream ────────────────────────────────────────
|
||||
|
||||
@_traced("prober.discover_attackers")
|
||||
def _discover_attackers(json_path: Path, position: int) -> tuple[set[str], int]:
|
||||
"""
|
||||
Read new JSON log lines from the given position and extract unique
|
||||
attacker IPs. Returns (new_ips, new_position).
|
||||
|
||||
Only considers IPs that are not "Unknown" and come from events that
|
||||
indicate real attacker interaction (not prober's own events).
|
||||
"""
|
||||
new_ips: set[str] = set()
|
||||
|
||||
if not json_path.exists():
|
||||
return new_ips, position
|
||||
|
||||
size = json_path.stat().st_size
|
||||
if size < position:
|
||||
position = 0 # file rotated
|
||||
|
||||
if size == position:
|
||||
return new_ips, position
|
||||
|
||||
with open(json_path, "r", encoding="utf-8", errors="replace") as f:
|
||||
f.seek(position)
|
||||
while True:
|
||||
line = f.readline()
|
||||
if not line:
|
||||
break
|
||||
if not line.endswith("\n"):
|
||||
break # partial line
|
||||
|
||||
try:
|
||||
record = json.loads(line.strip())
|
||||
except json.JSONDecodeError:
|
||||
position = f.tell()
|
||||
continue
|
||||
|
||||
# Skip our own events
|
||||
if record.get("service") == "prober":
|
||||
position = f.tell()
|
||||
continue
|
||||
|
||||
ip = record.get("attacker_ip", "Unknown")
|
||||
if ip != "Unknown" and ip:
|
||||
new_ips.add(ip)
|
||||
|
||||
position = f.tell()
|
||||
|
||||
return new_ips, position
|
||||
|
||||
|
||||
# ─── Probe cycle ─────────────────────────────────────────────────────────────
|
||||
|
||||
@_traced("prober.probe_cycle")
|
||||
def _probe_cycle(
|
||||
targets: set[str],
|
||||
probed: dict[str, dict[str, set[int]]],
|
||||
jarm_ports: list[int],
|
||||
ssh_ports: list[int],
|
||||
tcpfp_ports: list[int],
|
||||
log_path: Path,
|
||||
json_path: Path,
|
||||
timeout: float = 5.0,
|
||||
) -> None:
|
||||
"""
|
||||
Probe all known attacker IPs with JARM, HASSH, and TCP/IP fingerprinting.
|
||||
|
||||
Args:
|
||||
targets: set of attacker IPs to probe
|
||||
probed: dict mapping IP -> {probe_type -> set of ports already probed}
|
||||
jarm_ports: TLS ports for JARM fingerprinting
|
||||
ssh_ports: SSH ports for HASSHServer fingerprinting
|
||||
tcpfp_ports: ports for TCP/IP stack fingerprinting
|
||||
log_path: RFC 5424 log file
|
||||
json_path: JSON log file
|
||||
timeout: per-probe TCP timeout
|
||||
"""
|
||||
for ip in sorted(targets):
|
||||
ip_probed = probed.setdefault(ip, {})
|
||||
|
||||
# Phase 1: JARM (TLS fingerprinting)
|
||||
_jarm_phase(ip, ip_probed, jarm_ports, log_path, json_path, timeout)
|
||||
|
||||
# Phase 2: HASSHServer (SSH fingerprinting)
|
||||
_hassh_phase(ip, ip_probed, ssh_ports, log_path, json_path, timeout)
|
||||
|
||||
# Phase 3: TCP/IP stack fingerprinting
|
||||
_tcpfp_phase(ip, ip_probed, tcpfp_ports, log_path, json_path, timeout)
|
||||
|
||||
|
||||
@_traced("prober.jarm_phase")
|
||||
def _jarm_phase(
|
||||
ip: str,
|
||||
ip_probed: dict[str, set[int]],
|
||||
ports: list[int],
|
||||
log_path: Path,
|
||||
json_path: Path,
|
||||
timeout: float,
|
||||
) -> None:
|
||||
"""JARM-fingerprint an IP on the given TLS ports."""
|
||||
done = ip_probed.setdefault("jarm", set())
|
||||
for port in ports:
|
||||
if port in done:
|
||||
continue
|
||||
try:
|
||||
h = jarm_hash(ip, port, timeout=timeout)
|
||||
done.add(port)
|
||||
if h == JARM_EMPTY_HASH:
|
||||
continue
|
||||
_write_event(
|
||||
log_path, json_path,
|
||||
"jarm_fingerprint",
|
||||
target_ip=ip,
|
||||
target_port=str(port),
|
||||
jarm_hash=h,
|
||||
msg=f"JARM {ip}:{port} = {h}",
|
||||
)
|
||||
logger.info("prober: JARM %s:%d = %s", ip, port, h)
|
||||
except Exception as exc:
|
||||
done.add(port)
|
||||
_write_event(
|
||||
log_path, json_path,
|
||||
"prober_error",
|
||||
severity=_SEVERITY_WARNING,
|
||||
target_ip=ip,
|
||||
target_port=str(port),
|
||||
error=str(exc),
|
||||
msg=f"JARM probe failed for {ip}:{port}: {exc}",
|
||||
)
|
||||
logger.warning("prober: JARM probe failed %s:%d: %s", ip, port, exc)
|
||||
|
||||
|
||||
@_traced("prober.hassh_phase")
|
||||
def _hassh_phase(
|
||||
ip: str,
|
||||
ip_probed: dict[str, set[int]],
|
||||
ports: list[int],
|
||||
log_path: Path,
|
||||
json_path: Path,
|
||||
timeout: float,
|
||||
) -> None:
|
||||
"""HASSHServer-fingerprint an IP on the given SSH ports."""
|
||||
done = ip_probed.setdefault("hassh", set())
|
||||
for port in ports:
|
||||
if port in done:
|
||||
continue
|
||||
try:
|
||||
result = hassh_server(ip, port, timeout=timeout)
|
||||
done.add(port)
|
||||
if result is None:
|
||||
continue
|
||||
_write_event(
|
||||
log_path, json_path,
|
||||
"hassh_fingerprint",
|
||||
target_ip=ip,
|
||||
target_port=str(port),
|
||||
hassh_server_hash=result["hassh_server"],
|
||||
ssh_banner=result["banner"],
|
||||
kex_algorithms=result["kex_algorithms"],
|
||||
encryption_s2c=result["encryption_s2c"],
|
||||
mac_s2c=result["mac_s2c"],
|
||||
compression_s2c=result["compression_s2c"],
|
||||
msg=f"HASSH {ip}:{port} = {result['hassh_server']}",
|
||||
)
|
||||
logger.info("prober: HASSH %s:%d = %s", ip, port, result["hassh_server"])
|
||||
except Exception as exc:
|
||||
done.add(port)
|
||||
_write_event(
|
||||
log_path, json_path,
|
||||
"prober_error",
|
||||
severity=_SEVERITY_WARNING,
|
||||
target_ip=ip,
|
||||
target_port=str(port),
|
||||
error=str(exc),
|
||||
msg=f"HASSH probe failed for {ip}:{port}: {exc}",
|
||||
)
|
||||
logger.warning("prober: HASSH probe failed %s:%d: %s", ip, port, exc)
|
||||
|
||||
|
||||
@_traced("prober.tcpfp_phase")
|
||||
def _tcpfp_phase(
|
||||
ip: str,
|
||||
ip_probed: dict[str, set[int]],
|
||||
ports: list[int],
|
||||
log_path: Path,
|
||||
json_path: Path,
|
||||
timeout: float,
|
||||
) -> None:
|
||||
"""TCP/IP stack fingerprint an IP on the given ports."""
|
||||
done = ip_probed.setdefault("tcpfp", set())
|
||||
for port in ports:
|
||||
if port in done:
|
||||
continue
|
||||
try:
|
||||
result = tcp_fingerprint(ip, port, timeout=timeout)
|
||||
done.add(port)
|
||||
if result is None:
|
||||
continue
|
||||
_write_event(
|
||||
log_path, json_path,
|
||||
"tcpfp_fingerprint",
|
||||
target_ip=ip,
|
||||
target_port=str(port),
|
||||
tcpfp_hash=result["tcpfp_hash"],
|
||||
tcpfp_raw=result["tcpfp_raw"],
|
||||
ttl=str(result["ttl"]),
|
||||
window_size=str(result["window_size"]),
|
||||
df_bit=str(result["df_bit"]),
|
||||
mss=str(result["mss"]),
|
||||
window_scale=str(result["window_scale"]),
|
||||
sack_ok=str(result["sack_ok"]),
|
||||
timestamp=str(result["timestamp"]),
|
||||
options_order=result["options_order"],
|
||||
msg=f"TCPFP {ip}:{port} = {result['tcpfp_hash']}",
|
||||
)
|
||||
logger.info("prober: TCPFP %s:%d = %s", ip, port, result["tcpfp_hash"])
|
||||
except Exception as exc:
|
||||
done.add(port)
|
||||
_write_event(
|
||||
log_path, json_path,
|
||||
"prober_error",
|
||||
severity=_SEVERITY_WARNING,
|
||||
target_ip=ip,
|
||||
target_port=str(port),
|
||||
error=str(exc),
|
||||
msg=f"TCPFP probe failed for {ip}:{port}: {exc}",
|
||||
)
|
||||
logger.warning("prober: TCPFP probe failed %s:%d: %s", ip, port, exc)
|
||||
|
||||
|
||||
# ─── Main worker ─────────────────────────────────────────────────────────────
|
||||
|
||||
@_traced("prober.worker")
|
||||
async def prober_worker(
|
||||
log_file: str,
|
||||
interval: int = 300,
|
||||
timeout: float = 5.0,
|
||||
ports: list[int] | None = None,
|
||||
ssh_ports: list[int] | None = None,
|
||||
tcpfp_ports: list[int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Main entry point for the standalone prober process.
|
||||
|
||||
Discovers attacker IPs automatically by tailing the JSON log file,
|
||||
then fingerprints each IP via JARM, HASSH, and TCP/IP stack probes.
|
||||
|
||||
Args:
|
||||
log_file: base path for log files (RFC 5424 to .log, JSON to .json)
|
||||
interval: seconds between probe cycles
|
||||
timeout: per-probe TCP timeout
|
||||
ports: JARM TLS ports (defaults to DEFAULT_PROBE_PORTS)
|
||||
ssh_ports: HASSH SSH ports (defaults to DEFAULT_SSH_PORTS)
|
||||
tcpfp_ports: TCP fingerprint ports (defaults to DEFAULT_TCPFP_PORTS)
|
||||
"""
|
||||
jarm_ports = ports or DEFAULT_PROBE_PORTS
|
||||
hassh_ports = ssh_ports or DEFAULT_SSH_PORTS
|
||||
tcp_ports = tcpfp_ports or DEFAULT_TCPFP_PORTS
|
||||
|
||||
all_ports_str = (
|
||||
f"jarm={','.join(str(p) for p in jarm_ports)} "
|
||||
f"ssh={','.join(str(p) for p in hassh_ports)} "
|
||||
f"tcpfp={','.join(str(p) for p in tcp_ports)}"
|
||||
)
|
||||
|
||||
log_path = Path(log_file)
|
||||
json_path = log_path.with_suffix(".json")
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info(
|
||||
"prober started interval=%ds %s log=%s",
|
||||
interval, all_ports_str, log_path,
|
||||
)
|
||||
|
||||
_write_event(
|
||||
log_path, json_path,
|
||||
"prober_startup",
|
||||
interval=str(interval),
|
||||
probe_ports=all_ports_str,
|
||||
msg=f"DECNET-PROBER started, interval {interval}s, {all_ports_str}",
|
||||
)
|
||||
|
||||
known_attackers: set[str] = set()
|
||||
probed: dict[str, dict[str, set[int]]] = {} # IP -> {type -> ports}
|
||||
log_position: int = 0
|
||||
|
||||
while True:
|
||||
# Discover new attacker IPs from the log stream
|
||||
new_ips, log_position = await asyncio.to_thread(
|
||||
_discover_attackers, json_path, log_position,
|
||||
)
|
||||
|
||||
if new_ips - known_attackers:
|
||||
fresh = new_ips - known_attackers
|
||||
known_attackers.update(fresh)
|
||||
logger.info(
|
||||
"prober: discovered %d new attacker(s), total=%d",
|
||||
len(fresh), len(known_attackers),
|
||||
)
|
||||
|
||||
if known_attackers:
|
||||
await asyncio.to_thread(
|
||||
_probe_cycle, known_attackers, probed,
|
||||
jarm_ports, hassh_ports, tcp_ports,
|
||||
log_path, json_path, timeout,
|
||||
)
|
||||
|
||||
await asyncio.sleep(interval)
|
||||
5
decnet/profiler/__init__.py
Normal file
5
decnet/profiler/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""DECNET profiler — standalone attacker profile builder worker."""
|
||||
|
||||
from decnet.profiler.worker import attacker_profile_worker
|
||||
|
||||
__all__ = ["attacker_profile_worker"]
|
||||
602
decnet/profiler/behavioral.py
Normal file
602
decnet/profiler/behavioral.py
Normal file
@@ -0,0 +1,602 @@
|
||||
"""
|
||||
Behavioral and timing analysis for DECNET attacker profiles.
|
||||
|
||||
Consumes the chronological `LogEvent` stream already built by
|
||||
`decnet.correlation.engine.CorrelationEngine` and derives per-IP metrics:
|
||||
|
||||
- Inter-event timing statistics (mean / median / stdev / min / max)
|
||||
- Coefficient-of-variation (jitter metric)
|
||||
- Beaconing vs. interactive vs. scanning vs. brute_force vs. slow_scan
|
||||
classification
|
||||
- Tool attribution against known C2 frameworks (Cobalt Strike, Sliver,
|
||||
Havoc, Mythic) using default beacon/jitter profiles — returns a list,
|
||||
since multiple tools can be in use simultaneously
|
||||
- Header-based tool detection (Nmap NSE, Gophish, Nikto, sqlmap, etc.)
|
||||
from HTTP request events
|
||||
- Recon → exfil phase sequencing (latency between the last recon event
|
||||
and the first exfil-like event)
|
||||
- OS / TCP fingerprint + retransmit rollup from sniffer-emitted events,
|
||||
with TTL-based fallback when p0f returns no match
|
||||
|
||||
Pure-Python; no external dependencies. All functions are safe to call from
|
||||
both sync and async contexts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import statistics
|
||||
from collections import Counter
|
||||
from typing import Any
|
||||
|
||||
from decnet.correlation.parser import LogEvent
|
||||
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
|
||||
|
||||
# ─── Event-type taxonomy ────────────────────────────────────────────────────
|
||||
|
||||
# Sniffer-emitted packet events that feed into fingerprint rollup.
|
||||
_SNIFFER_SYN_EVENT: str = "tcp_syn_fingerprint"
|
||||
_SNIFFER_FLOW_EVENT: str = "tcp_flow_timing"
|
||||
# Prober-emitted active-probe result (SYN-ACK fingerprint of attacker machine).
|
||||
_PROBER_TCPFP_EVENT: str = "tcpfp_fingerprint"
|
||||
|
||||
# Canonical initial TTL for each coarse OS bucket. Used to derive hop
|
||||
# distance when only the observed TTL is available (prober path).
|
||||
_INITIAL_TTL: dict[str, int] = {
|
||||
"linux": 64,
|
||||
"windows": 128,
|
||||
"embedded": 255,
|
||||
}
|
||||
|
||||
# Events that signal "recon" phase (scans, probes, auth attempts).
|
||||
_RECON_EVENT_TYPES: frozenset[str] = frozenset({
|
||||
"scan", "connection", "banner", "probe",
|
||||
"login_attempt", "auth", "auth_failure",
|
||||
})
|
||||
|
||||
# Events that signal "exfil" / action-on-objective phase.
|
||||
_EXFIL_EVENT_TYPES: frozenset[str] = frozenset({
|
||||
"download", "upload", "file_transfer", "data_exfil",
|
||||
"command", "exec", "query", "shell_input",
|
||||
})
|
||||
|
||||
# Fields carrying payload byte counts (for "large payload" detection).
|
||||
_PAYLOAD_SIZE_FIELDS: tuple[str, ...] = ("bytes", "size", "content_length")
|
||||
|
||||
# ─── C2 tool attribution signatures (beacon timing) ─────────────────────────
|
||||
#
|
||||
# Each entry lists the default beacon cadence profile of a popular C2.
|
||||
# A profile *matches* an attacker when:
|
||||
# - mean inter-event time is within ±`interval_tolerance` seconds, AND
|
||||
# - jitter (cv = stdev / mean) is within ±`jitter_tolerance`
|
||||
#
|
||||
# Multiple matches are all returned (attacker may run multiple implants).
|
||||
|
||||
_TOOL_SIGNATURES: tuple[dict[str, Any], ...] = (
|
||||
{
|
||||
"name": "cobalt_strike",
|
||||
"interval_s": 60.0,
|
||||
"interval_tolerance_s": 8.0,
|
||||
"jitter_cv": 0.20,
|
||||
"jitter_tolerance": 0.05,
|
||||
},
|
||||
{
|
||||
"name": "sliver",
|
||||
"interval_s": 60.0,
|
||||
"interval_tolerance_s": 10.0,
|
||||
"jitter_cv": 0.30,
|
||||
"jitter_tolerance": 0.08,
|
||||
},
|
||||
{
|
||||
"name": "havoc",
|
||||
"interval_s": 45.0,
|
||||
"interval_tolerance_s": 8.0,
|
||||
"jitter_cv": 0.10,
|
||||
"jitter_tolerance": 0.03,
|
||||
},
|
||||
{
|
||||
"name": "mythic",
|
||||
"interval_s": 30.0,
|
||||
"interval_tolerance_s": 6.0,
|
||||
"jitter_cv": 0.15,
|
||||
"jitter_tolerance": 0.03,
|
||||
},
|
||||
)
|
||||
|
||||
# ─── Header-based tool signatures ───────────────────────────────────────────
|
||||
#
|
||||
# Scanned against HTTP `request` events. `pattern` is a case-insensitive
|
||||
# substring (or a regex anchored with ^ if it starts with that character).
|
||||
# `header` is matched case-insensitively against the event's headers dict.
|
||||
|
||||
_HEADER_TOOL_SIGNATURES: tuple[dict[str, str], ...] = (
|
||||
{"name": "nmap", "header": "user-agent", "pattern": "Nmap Scripting Engine"},
|
||||
{"name": "gophish", "header": "x-mailer", "pattern": "gophish"},
|
||||
{"name": "nikto", "header": "user-agent", "pattern": "Nikto"},
|
||||
{"name": "sqlmap", "header": "user-agent", "pattern": "sqlmap"},
|
||||
{"name": "nuclei", "header": "user-agent", "pattern": "Nuclei"},
|
||||
{"name": "masscan", "header": "user-agent", "pattern": "masscan"},
|
||||
{"name": "zgrab", "header": "user-agent", "pattern": "zgrab"},
|
||||
{"name": "metasploit", "header": "user-agent", "pattern": "Metasploit"},
|
||||
{"name": "curl", "header": "user-agent", "pattern": "^curl/"},
|
||||
{"name": "python_requests", "header": "user-agent", "pattern": "python-requests"},
|
||||
{"name": "gobuster", "header": "user-agent", "pattern": "gobuster"},
|
||||
{"name": "dirbuster", "header": "user-agent", "pattern": "DirBuster"},
|
||||
{"name": "hydra", "header": "user-agent", "pattern": "hydra"},
|
||||
{"name": "wfuzz", "header": "user-agent", "pattern": "Wfuzz"},
|
||||
)
|
||||
|
||||
# ─── TTL → coarse OS bucket (fallback when p0f returns nothing) ─────────────
|
||||
|
||||
def _os_from_ttl(ttl_str: str | None) -> str | None:
|
||||
"""Derive a coarse OS guess from observed TTL when p0f has no match."""
|
||||
if not ttl_str:
|
||||
return None
|
||||
try:
|
||||
ttl = int(ttl_str)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if 55 <= ttl <= 70:
|
||||
return "linux"
|
||||
if 115 <= ttl <= 135:
|
||||
return "windows"
|
||||
if 235 <= ttl <= 255:
|
||||
return "embedded"
|
||||
return None
|
||||
|
||||
|
||||
# ─── Timing stats ───────────────────────────────────────────────────────────
|
||||
|
||||
@_traced("profiler.timing_stats")
|
||||
def timing_stats(events: list[LogEvent]) -> dict[str, Any]:
|
||||
"""
|
||||
Compute inter-arrival-time statistics across *events* (sorted by ts).
|
||||
|
||||
Returns a dict with:
|
||||
mean_iat_s, median_iat_s, stdev_iat_s, min_iat_s, max_iat_s, cv,
|
||||
event_count, duration_s
|
||||
|
||||
For n < 2 events the interval-based fields are None/0.
|
||||
"""
|
||||
if not events:
|
||||
return {
|
||||
"event_count": 0,
|
||||
"duration_s": 0.0,
|
||||
"mean_iat_s": None,
|
||||
"median_iat_s": None,
|
||||
"stdev_iat_s": None,
|
||||
"min_iat_s": None,
|
||||
"max_iat_s": None,
|
||||
"cv": None,
|
||||
}
|
||||
|
||||
sorted_events = sorted(events, key=lambda e: e.timestamp)
|
||||
duration_s = (sorted_events[-1].timestamp - sorted_events[0].timestamp).total_seconds()
|
||||
|
||||
if len(sorted_events) < 2:
|
||||
return {
|
||||
"event_count": len(sorted_events),
|
||||
"duration_s": round(duration_s, 3),
|
||||
"mean_iat_s": None,
|
||||
"median_iat_s": None,
|
||||
"stdev_iat_s": None,
|
||||
"min_iat_s": None,
|
||||
"max_iat_s": None,
|
||||
"cv": None,
|
||||
}
|
||||
|
||||
iats = [
|
||||
(sorted_events[i].timestamp - sorted_events[i - 1].timestamp).total_seconds()
|
||||
for i in range(1, len(sorted_events))
|
||||
]
|
||||
# Exclude spuriously-negative (clock-skew) intervals.
|
||||
iats = [v for v in iats if v >= 0]
|
||||
if not iats:
|
||||
return {
|
||||
"event_count": len(sorted_events),
|
||||
"duration_s": round(duration_s, 3),
|
||||
"mean_iat_s": None,
|
||||
"median_iat_s": None,
|
||||
"stdev_iat_s": None,
|
||||
"min_iat_s": None,
|
||||
"max_iat_s": None,
|
||||
"cv": None,
|
||||
}
|
||||
|
||||
mean = statistics.fmean(iats)
|
||||
median = statistics.median(iats)
|
||||
stdev = statistics.pstdev(iats) if len(iats) > 1 else 0.0
|
||||
cv = (stdev / mean) if mean > 0 else None
|
||||
|
||||
return {
|
||||
"event_count": len(sorted_events),
|
||||
"duration_s": round(duration_s, 3),
|
||||
"mean_iat_s": round(mean, 3),
|
||||
"median_iat_s": round(median, 3),
|
||||
"stdev_iat_s": round(stdev, 3),
|
||||
"min_iat_s": round(min(iats), 3),
|
||||
"max_iat_s": round(max(iats), 3),
|
||||
"cv": round(cv, 4) if cv is not None else None,
|
||||
}
|
||||
|
||||
|
||||
# ─── Behavior classification ────────────────────────────────────────────────
|
||||
|
||||
@_traced("profiler.classify_behavior")
|
||||
def classify_behavior(stats: dict[str, Any], services_count: int) -> str:
|
||||
"""
|
||||
Coarse behavior bucket:
|
||||
beaconing | interactive | scanning | brute_force | slow_scan | mixed | unknown
|
||||
|
||||
Heuristics (evaluated in priority order):
|
||||
* `scanning` — ≥ 3 services touched OR mean IAT < 2 s, ≥ 3 events
|
||||
* `brute_force` — 1 service, n ≥ 8, mean IAT < 5 s, CV < 0.6
|
||||
* `beaconing` — CV < 0.35, mean IAT ≥ 5 s, ≥ 4 events
|
||||
* `slow_scan` — ≥ 2 services, mean IAT ≥ 10 s, ≥ 4 events
|
||||
* `interactive` — mean IAT < 5 s AND CV ≥ 0.5, ≥ 6 events
|
||||
* `mixed` — catch-all for sessions with enough data
|
||||
* `unknown` — too few data points
|
||||
"""
|
||||
n = stats.get("event_count") or 0
|
||||
mean = stats.get("mean_iat_s")
|
||||
cv = stats.get("cv")
|
||||
|
||||
if n < 3 or mean is None:
|
||||
return "unknown"
|
||||
|
||||
# Slow scan / low-and-slow: multiple services with long gaps.
|
||||
# Must be checked before generic scanning so slow multi-service sessions
|
||||
# don't get mis-bucketed as a fast sweep.
|
||||
if services_count >= 2 and mean >= 10.0 and n >= 4:
|
||||
return "slow_scan"
|
||||
|
||||
# Scanning: broad service sweep (multi-service) or very rapid single-service bursts.
|
||||
if n >= 3 and (
|
||||
(services_count >= 3 and mean < 10.0)
|
||||
or (services_count >= 2 and mean < 2.0)
|
||||
):
|
||||
return "scanning"
|
||||
|
||||
# Brute force: hammering one service rapidly and repeatedly.
|
||||
if services_count == 1 and n >= 8 and mean < 5.0 and cv is not None and cv < 0.6:
|
||||
return "brute_force"
|
||||
|
||||
# Beaconing: regular cadence over multiple events.
|
||||
if cv is not None and cv < 0.35 and mean >= 5.0 and n >= 4:
|
||||
return "beaconing"
|
||||
|
||||
# Interactive: short but irregular bursts (human or tool with think time).
|
||||
if cv is not None and cv >= 0.5 and mean < 5.0 and n >= 6:
|
||||
return "interactive"
|
||||
|
||||
return "mixed"
|
||||
|
||||
|
||||
# ─── C2 tool attribution (beacon timing) ────────────────────────────────────
|
||||
|
||||
def guess_tools(mean_iat_s: float | None, cv: float | None) -> list[str]:
|
||||
"""
|
||||
Match (mean_iat, cv) against known C2 default beacon profiles.
|
||||
|
||||
Returns a list of all matching tool names (may be empty). Multiple
|
||||
matches are all returned because an attacker can run several implants.
|
||||
"""
|
||||
if mean_iat_s is None or cv is None:
|
||||
return []
|
||||
|
||||
hits: list[str] = []
|
||||
for sig in _TOOL_SIGNATURES:
|
||||
if abs(mean_iat_s - sig["interval_s"]) > sig["interval_tolerance_s"]:
|
||||
continue
|
||||
if abs(cv - sig["jitter_cv"]) > sig["jitter_tolerance"]:
|
||||
continue
|
||||
hits.append(sig["name"])
|
||||
|
||||
return hits
|
||||
|
||||
|
||||
# Keep the old name as an alias so callers that expected a single string still
|
||||
# compile, but mark it deprecated. Returns the first hit or None.
|
||||
def guess_tool(mean_iat_s: float | None, cv: float | None) -> str | None:
|
||||
"""Deprecated: use guess_tools() instead."""
|
||||
hits = guess_tools(mean_iat_s, cv)
|
||||
if len(hits) == 1:
|
||||
return hits[0]
|
||||
return None
|
||||
|
||||
|
||||
# ─── Header-based tool detection ────────────────────────────────────────────
|
||||
|
||||
@_traced("profiler.detect_tools_from_headers")
|
||||
def detect_tools_from_headers(events: list[LogEvent]) -> list[str]:
|
||||
"""
|
||||
Scan HTTP `request` events for tool-identifying headers.
|
||||
|
||||
Checks User-Agent, X-Mailer, and other headers case-insensitively
|
||||
against `_HEADER_TOOL_SIGNATURES`. Returns a deduplicated list of
|
||||
matched tool names in detection order.
|
||||
"""
|
||||
found: list[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
for e in events:
|
||||
if e.event_type != "request":
|
||||
continue
|
||||
|
||||
raw_headers = e.fields.get("headers")
|
||||
if not raw_headers:
|
||||
continue
|
||||
|
||||
# headers may arrive as a JSON string, a Python-repr string (legacy),
|
||||
# or a dict already (in-memory / test paths).
|
||||
if isinstance(raw_headers, str):
|
||||
try:
|
||||
headers: dict[str, str] = json.loads(raw_headers)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
# Backward-compat: events written before the JSON-encode fix
|
||||
# were serialized as Python repr via str(dict). ast.literal_eval
|
||||
# handles that safely (no arbitrary code execution).
|
||||
try:
|
||||
import ast as _ast
|
||||
_parsed = _ast.literal_eval(raw_headers)
|
||||
if isinstance(_parsed, dict):
|
||||
headers = _parsed
|
||||
else:
|
||||
continue
|
||||
except Exception: # nosec B112 — skip unparseable header values
|
||||
continue
|
||||
elif isinstance(raw_headers, dict):
|
||||
headers = raw_headers
|
||||
else:
|
||||
continue
|
||||
|
||||
# Normalise header keys to lowercase for matching.
|
||||
lc_headers: dict[str, str] = {k.lower(): str(v) for k, v in headers.items()}
|
||||
|
||||
for sig in _HEADER_TOOL_SIGNATURES:
|
||||
name = sig["name"]
|
||||
if name in seen:
|
||||
continue
|
||||
value = lc_headers.get(sig["header"])
|
||||
if value is None:
|
||||
continue
|
||||
pattern = sig["pattern"]
|
||||
if pattern.startswith("^"):
|
||||
if re.match(pattern, value, re.IGNORECASE):
|
||||
found.append(name)
|
||||
seen.add(name)
|
||||
else:
|
||||
if pattern.lower() in value.lower():
|
||||
found.append(name)
|
||||
seen.add(name)
|
||||
|
||||
return found
|
||||
|
||||
|
||||
# ─── Phase sequencing ───────────────────────────────────────────────────────
|
||||
|
||||
@_traced("profiler.phase_sequence")
|
||||
def phase_sequence(events: list[LogEvent]) -> dict[str, Any]:
|
||||
"""
|
||||
Derive recon→exfil phase transition info.
|
||||
|
||||
Returns:
|
||||
recon_end_ts : ISO timestamp of last recon-class event (or None)
|
||||
exfil_start_ts : ISO timestamp of first exfil-class event (or None)
|
||||
exfil_latency_s : seconds between them (None if not both present)
|
||||
large_payload_count: count of events whose *fields* report a payload
|
||||
≥ 1 MiB (heuristic for bulk data transfer)
|
||||
"""
|
||||
recon_end = None
|
||||
exfil_start = None
|
||||
large_payload_count = 0
|
||||
|
||||
for e in sorted(events, key=lambda x: x.timestamp):
|
||||
if e.event_type in _RECON_EVENT_TYPES:
|
||||
recon_end = e.timestamp
|
||||
elif e.event_type in _EXFIL_EVENT_TYPES and exfil_start is None:
|
||||
exfil_start = e.timestamp
|
||||
|
||||
for fname in _PAYLOAD_SIZE_FIELDS:
|
||||
raw = e.fields.get(fname)
|
||||
if raw is None:
|
||||
continue
|
||||
try:
|
||||
if int(raw) >= 1_048_576:
|
||||
large_payload_count += 1
|
||||
break
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
latency: float | None = None
|
||||
if recon_end is not None and exfil_start is not None and exfil_start >= recon_end:
|
||||
latency = round((exfil_start - recon_end).total_seconds(), 3)
|
||||
|
||||
return {
|
||||
"recon_end_ts": recon_end.isoformat() if recon_end else None,
|
||||
"exfil_start_ts": exfil_start.isoformat() if exfil_start else None,
|
||||
"exfil_latency_s": latency,
|
||||
"large_payload_count": large_payload_count,
|
||||
}
|
||||
|
||||
|
||||
# ─── Sniffer rollup (OS fingerprint + retransmits) ──────────────────────────
|
||||
|
||||
@_traced("profiler.sniffer_rollup")
|
||||
def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
|
||||
"""
|
||||
Roll up sniffer-emitted `tcp_syn_fingerprint` and `tcp_flow_timing`
|
||||
events into a per-attacker summary.
|
||||
|
||||
OS guess priority:
|
||||
1. Modal p0f label from os_guess field (if not "unknown"/empty).
|
||||
2. TTL-based coarse bucket (linux / windows / embedded) as fallback.
|
||||
Hop distance: median of non-zero reported values only.
|
||||
"""
|
||||
os_guesses: list[str] = []
|
||||
ttl_values: list[str] = []
|
||||
hops: list[int] = []
|
||||
tcp_fp: dict[str, Any] | None = None
|
||||
retransmits = 0
|
||||
|
||||
for e in events:
|
||||
if e.event_type == _SNIFFER_SYN_EVENT:
|
||||
og = e.fields.get("os_guess")
|
||||
if og and og != "unknown":
|
||||
os_guesses.append(og)
|
||||
|
||||
# Collect raw TTL for fallback OS derivation.
|
||||
ttl_raw = e.fields.get("ttl") or e.fields.get("initial_ttl")
|
||||
if ttl_raw:
|
||||
ttl_values.append(ttl_raw)
|
||||
|
||||
# Only include hop distances that are valid and non-zero.
|
||||
hop_raw = e.fields.get("hop_distance")
|
||||
if hop_raw:
|
||||
try:
|
||||
hop_val = int(hop_raw)
|
||||
if hop_val > 0:
|
||||
hops.append(hop_val)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Keep the latest fingerprint snapshot.
|
||||
tcp_fp = {
|
||||
"window": _int_or_none(e.fields.get("window")),
|
||||
"wscale": _int_or_none(e.fields.get("wscale")),
|
||||
"mss": _int_or_none(e.fields.get("mss")),
|
||||
"options_sig": e.fields.get("options_sig", ""),
|
||||
"has_sack": e.fields.get("has_sack") == "true",
|
||||
"has_timestamps": e.fields.get("has_timestamps") == "true",
|
||||
}
|
||||
|
||||
elif e.event_type == _SNIFFER_FLOW_EVENT:
|
||||
try:
|
||||
retransmits += int(e.fields.get("retransmits", "0"))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
elif e.event_type == _PROBER_TCPFP_EVENT:
|
||||
# Active-probe result: prober sent SYN to attacker, got SYN-ACK back.
|
||||
# Field names differ from the passive sniffer (different emitter).
|
||||
ttl_raw = e.fields.get("ttl")
|
||||
if ttl_raw:
|
||||
ttl_values.append(ttl_raw)
|
||||
|
||||
# Derive hop distance from observed TTL vs canonical initial TTL.
|
||||
os_hint = _os_from_ttl(ttl_raw)
|
||||
if os_hint:
|
||||
initial = _INITIAL_TTL.get(os_hint)
|
||||
if initial:
|
||||
try:
|
||||
hop_val = initial - int(ttl_raw)
|
||||
if hop_val > 0:
|
||||
hops.append(hop_val)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Prober uses window_size/window_scale/options_order instead of
|
||||
# the sniffer's window/wscale/options_sig.
|
||||
tcp_fp = {
|
||||
"window": _int_or_none(e.fields.get("window_size")),
|
||||
"wscale": _int_or_none(e.fields.get("window_scale")),
|
||||
"mss": _int_or_none(e.fields.get("mss")),
|
||||
"options_sig": e.fields.get("options_order", ""),
|
||||
"has_sack": e.fields.get("sack_ok") == "1",
|
||||
"has_timestamps": e.fields.get("timestamp") == "1",
|
||||
}
|
||||
|
||||
# Mode for the OS bucket — most frequently observed label.
|
||||
os_guess: str | None = None
|
||||
if os_guesses:
|
||||
os_guess = Counter(os_guesses).most_common(1)[0][0]
|
||||
else:
|
||||
# TTL-based fallback: use the most common observed TTL value.
|
||||
if ttl_values:
|
||||
modal_ttl = Counter(ttl_values).most_common(1)[0][0]
|
||||
os_guess = _os_from_ttl(modal_ttl)
|
||||
|
||||
# Median hop distance (robust to the occasional weird TTL).
|
||||
hop_distance: int | None = None
|
||||
if hops:
|
||||
hop_distance = int(statistics.median(hops))
|
||||
|
||||
return {
|
||||
"os_guess": os_guess,
|
||||
"hop_distance": hop_distance,
|
||||
"tcp_fingerprint": tcp_fp or {},
|
||||
"retransmit_count": retransmits,
|
||||
}
|
||||
|
||||
|
||||
def _int_or_none(v: Any) -> int | None:
|
||||
if v is None or v == "":
|
||||
return None
|
||||
try:
|
||||
return int(v)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
# ─── Composite: build the full AttackerBehavior record ──────────────────────
|
||||
|
||||
@_traced("profiler.build_behavior_record")
|
||||
def build_behavior_record(events: list[LogEvent]) -> dict[str, Any]:
|
||||
"""
|
||||
Build the dict to persist in the `attacker_behavior` table.
|
||||
|
||||
Callers (profiler worker) pre-serialize JSON-typed fields; we do the
|
||||
JSON encoding here to keep the repo layer schema-agnostic.
|
||||
"""
|
||||
# Timing stats are computed across *all* events (not filtered), because
|
||||
# a C2 beacon often reuses the same "connection" event_type on each
|
||||
# check-in. Filtering would throw that signal away.
|
||||
stats = timing_stats(events)
|
||||
services = {e.service for e in events}
|
||||
behavior = classify_behavior(stats, len(services))
|
||||
rollup = sniffer_rollup(events)
|
||||
phase = phase_sequence(events)
|
||||
|
||||
# Combine beacon-timing tool matches with header-based detections.
|
||||
beacon_tools = guess_tools(stats.get("mean_iat_s"), stats.get("cv"))
|
||||
header_tools = detect_tools_from_headers(events)
|
||||
all_tools: list[str] = list(dict.fromkeys(beacon_tools + header_tools)) # dedup, preserve order
|
||||
|
||||
# Promote TCP-level scanner identification to tool_guesses.
|
||||
# p0f fingerprints nmap from the TCP handshake alone — this fires even
|
||||
# when no HTTP service is present, making it far more reliable than the
|
||||
# header-based path for raw port scans.
|
||||
if rollup["os_guess"] == "nmap" and "nmap" not in all_tools:
|
||||
all_tools.insert(0, "nmap")
|
||||
|
||||
# Beacon-specific projection: only surface interval/jitter when we've
|
||||
# classified the flow as beaconing (otherwise these numbers are noise).
|
||||
beacon_interval_s: float | None = None
|
||||
beacon_jitter_pct: float | None = None
|
||||
if behavior == "beaconing":
|
||||
beacon_interval_s = stats.get("mean_iat_s")
|
||||
cv = stats.get("cv")
|
||||
beacon_jitter_pct = round(cv * 100, 2) if cv is not None else None
|
||||
|
||||
_tracer = _get_tracer("profiler")
|
||||
with _tracer.start_as_current_span("profiler.behavior_summary") as _span:
|
||||
_span.set_attribute("behavior_class", behavior)
|
||||
_span.set_attribute("os_guess", rollup["os_guess"] or "unknown")
|
||||
_span.set_attribute("tool_count", len(all_tools))
|
||||
_span.set_attribute("event_count", stats.get("event_count", 0))
|
||||
if all_tools:
|
||||
_span.set_attribute("tools", ",".join(all_tools))
|
||||
|
||||
return {
|
||||
"os_guess": rollup["os_guess"],
|
||||
"hop_distance": rollup["hop_distance"],
|
||||
"tcp_fingerprint": json.dumps(rollup["tcp_fingerprint"]),
|
||||
"retransmit_count": rollup["retransmit_count"],
|
||||
"behavior_class": behavior,
|
||||
"beacon_interval_s": beacon_interval_s,
|
||||
"beacon_jitter_pct": beacon_jitter_pct,
|
||||
"tool_guesses": json.dumps(all_tools),
|
||||
"timing_stats": json.dumps(stats),
|
||||
"phase_sequence": json.dumps(phase),
|
||||
}
|
||||
215
decnet/profiler/worker.py
Normal file
215
decnet/profiler/worker.py
Normal file
@@ -0,0 +1,215 @@
|
||||
"""
|
||||
Attacker profile builder — incremental background worker.
|
||||
|
||||
Maintains a persistent CorrelationEngine and a log-ID cursor across cycles.
|
||||
On cold start (first cycle or process restart), performs one full build from
|
||||
all stored logs. Subsequent cycles fetch only new logs via the cursor,
|
||||
ingest them into the existing engine, and rebuild profiles for affected IPs
|
||||
only.
|
||||
|
||||
Complexity per cycle: O(new_logs + affected_ips) instead of O(total_logs²).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from decnet.correlation.engine import CorrelationEngine
|
||||
from decnet.correlation.parser import LogEvent
|
||||
from decnet.logging import get_logger
|
||||
from decnet.profiler.behavioral import build_behavior_record
|
||||
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
|
||||
logger = get_logger("attacker_worker")
|
||||
|
||||
_BATCH_SIZE = 500
|
||||
_STATE_KEY = "attacker_worker_cursor"
|
||||
|
||||
# Event types that indicate active command/query execution (not just connection/scan)
|
||||
_COMMAND_EVENT_TYPES = frozenset({
|
||||
"command", "exec", "query", "input", "shell_input",
|
||||
"execute", "run", "sql_query", "redis_command",
|
||||
})
|
||||
|
||||
# Fields that carry the executed command/query text
|
||||
_COMMAND_FIELDS = ("command", "query", "input", "line", "sql", "cmd")
|
||||
|
||||
|
||||
@dataclass
|
||||
class _WorkerState:
|
||||
engine: CorrelationEngine = field(default_factory=CorrelationEngine)
|
||||
last_log_id: int = 0
|
||||
initialized: bool = False
|
||||
|
||||
|
||||
async def attacker_profile_worker(repo: BaseRepository, *, interval: int = 30) -> None:
|
||||
"""Periodically updates the Attacker table incrementally. Designed to run as an asyncio Task."""
|
||||
logger.info("attacker profile worker started interval=%ds", interval)
|
||||
state = _WorkerState()
|
||||
_saved_cursor = await repo.get_state(_STATE_KEY)
|
||||
if _saved_cursor:
|
||||
state.last_log_id = _saved_cursor.get("last_log_id", 0)
|
||||
state.initialized = True
|
||||
logger.info("attacker worker: resumed from cursor last_log_id=%d", state.last_log_id)
|
||||
while True:
|
||||
await asyncio.sleep(interval)
|
||||
try:
|
||||
await _incremental_update(repo, state)
|
||||
except Exception as exc:
|
||||
logger.error("attacker worker: update failed: %s", exc)
|
||||
|
||||
|
||||
@_traced("profiler.incremental_update")
|
||||
async def _incremental_update(repo: BaseRepository, state: _WorkerState) -> None:
|
||||
was_cold = not state.initialized
|
||||
affected_ips: set[str] = set()
|
||||
|
||||
while True:
|
||||
batch = await repo.get_logs_after_id(state.last_log_id, limit=_BATCH_SIZE)
|
||||
if not batch:
|
||||
break
|
||||
|
||||
for row in batch:
|
||||
event = state.engine.ingest(row["raw_line"])
|
||||
if event and event.attacker_ip:
|
||||
affected_ips.add(event.attacker_ip)
|
||||
state.last_log_id = row["id"]
|
||||
|
||||
await asyncio.sleep(0) # yield to event loop after each batch
|
||||
|
||||
if len(batch) < _BATCH_SIZE:
|
||||
break
|
||||
|
||||
state.initialized = True
|
||||
|
||||
if not affected_ips:
|
||||
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
|
||||
return
|
||||
|
||||
await _update_profiles(repo, state, affected_ips)
|
||||
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
|
||||
|
||||
if was_cold:
|
||||
logger.info("attacker worker: cold start rebuilt %d profiles", len(affected_ips))
|
||||
else:
|
||||
logger.info("attacker worker: updated %d profiles (incremental)", len(affected_ips))
|
||||
|
||||
|
||||
@_traced("profiler.update_profiles")
|
||||
async def _update_profiles(
|
||||
repo: BaseRepository,
|
||||
state: _WorkerState,
|
||||
ips: set[str],
|
||||
) -> None:
|
||||
traversal_map = {t.attacker_ip: t for t in state.engine.traversals(min_deckies=2)}
|
||||
bounties_map = await repo.get_bounties_for_ips(ips)
|
||||
|
||||
_tracer = _get_tracer("profiler")
|
||||
for ip in ips:
|
||||
events = state.engine._events.get(ip, [])
|
||||
if not events:
|
||||
continue
|
||||
|
||||
with _tracer.start_as_current_span("profiler.process_ip") as _span:
|
||||
_span.set_attribute("attacker_ip", ip)
|
||||
_span.set_attribute("event_count", len(events))
|
||||
|
||||
traversal = traversal_map.get(ip)
|
||||
bounties = bounties_map.get(ip, [])
|
||||
commands = _extract_commands_from_events(events)
|
||||
|
||||
record = _build_record(ip, events, traversal, bounties, commands)
|
||||
attacker_uuid = await repo.upsert_attacker(record)
|
||||
|
||||
_span.set_attribute("is_traversal", traversal is not None)
|
||||
_span.set_attribute("bounty_count", len(bounties))
|
||||
_span.set_attribute("command_count", len(commands))
|
||||
|
||||
# Behavioral / fingerprint rollup lives in a sibling table so failures
|
||||
# here never block the core attacker profile upsert.
|
||||
try:
|
||||
behavior = build_behavior_record(events)
|
||||
await repo.upsert_attacker_behavior(attacker_uuid, behavior)
|
||||
except Exception as exc:
|
||||
_span.record_exception(exc)
|
||||
logger.error("attacker worker: behavior upsert failed for %s: %s", ip, exc)
|
||||
|
||||
|
||||
def _build_record(
|
||||
ip: str,
|
||||
events: list[LogEvent],
|
||||
traversal: Any,
|
||||
bounties: list[dict[str, Any]],
|
||||
commands: list[dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
services = sorted({e.service for e in events})
|
||||
deckies = (
|
||||
traversal.deckies
|
||||
if traversal
|
||||
else _first_contact_deckies(events)
|
||||
)
|
||||
fingerprints = [b for b in bounties if b.get("bounty_type") == "fingerprint"]
|
||||
credential_count = sum(1 for b in bounties if b.get("bounty_type") == "credential")
|
||||
|
||||
return {
|
||||
"ip": ip,
|
||||
"first_seen": min(e.timestamp for e in events),
|
||||
"last_seen": max(e.timestamp for e in events),
|
||||
"event_count": len(events),
|
||||
"service_count": len(services),
|
||||
"decky_count": len({e.decky for e in events}),
|
||||
"services": json.dumps(services),
|
||||
"deckies": json.dumps(deckies),
|
||||
"traversal_path": traversal.path if traversal else None,
|
||||
"is_traversal": traversal is not None,
|
||||
"bounty_count": len(bounties),
|
||||
"credential_count": credential_count,
|
||||
"fingerprints": json.dumps(fingerprints),
|
||||
"commands": json.dumps(commands),
|
||||
"updated_at": datetime.now(timezone.utc),
|
||||
}
|
||||
|
||||
|
||||
def _first_contact_deckies(events: list[LogEvent]) -> list[str]:
|
||||
"""Return unique deckies in first-contact order (for non-traversal attackers)."""
|
||||
seen: list[str] = []
|
||||
for e in sorted(events, key=lambda x: x.timestamp):
|
||||
if e.decky not in seen:
|
||||
seen.append(e.decky)
|
||||
return seen
|
||||
|
||||
|
||||
def _extract_commands_from_events(events: list[LogEvent]) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Extract executed commands from LogEvent objects.
|
||||
|
||||
Works directly on LogEvent.fields (already a dict), so no JSON parsing needed.
|
||||
"""
|
||||
commands: list[dict[str, Any]] = []
|
||||
for event in events:
|
||||
if event.event_type not in _COMMAND_EVENT_TYPES:
|
||||
continue
|
||||
|
||||
cmd_text: str | None = None
|
||||
for key in _COMMAND_FIELDS:
|
||||
val = event.fields.get(key)
|
||||
if val:
|
||||
cmd_text = str(val)
|
||||
break
|
||||
|
||||
if not cmd_text:
|
||||
continue
|
||||
|
||||
commands.append({
|
||||
"service": event.service,
|
||||
"decky": event.decky,
|
||||
"command": cmd_text,
|
||||
"timestamp": event.timestamp.isoformat(),
|
||||
})
|
||||
|
||||
return commands
|
||||
@@ -13,6 +13,7 @@ class BaseService(ABC):
|
||||
name: str # unique slug, e.g. "ssh", "smb"
|
||||
ports: list[int] # ports this service listens on inside the container
|
||||
default_image: str # Docker image tag, or "build" if a Dockerfile is needed
|
||||
fleet_singleton: bool = False # True = runs once fleet-wide, not per-decky
|
||||
|
||||
@abstractmethod
|
||||
def compose_fragment(
|
||||
|
||||
@@ -32,4 +32,4 @@ class ConpotService(BaseService):
|
||||
}
|
||||
|
||||
def dockerfile_context(self):
|
||||
return Path(__file__).parent.parent.parent / "templates" / "conpot"
|
||||
return Path(__file__).parent.parent / "templates" / "conpot"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "docker_api"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "docker_api"
|
||||
|
||||
|
||||
class DockerAPIService(BaseService):
|
||||
|
||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
||||
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "elasticsearch"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "elasticsearch"
|
||||
|
||||
|
||||
class ElasticsearchService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "ftp"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ftp"
|
||||
|
||||
|
||||
class FTPService(BaseService):
|
||||
|
||||
@@ -2,7 +2,7 @@ import json
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "http"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "http"
|
||||
|
||||
|
||||
class HTTPService(BaseService):
|
||||
|
||||
59
decnet/services/https.py
Normal file
59
decnet/services/https.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "https"
|
||||
|
||||
|
||||
class HTTPSService(BaseService):
|
||||
name = "https"
|
||||
ports = [443]
|
||||
default_image = "build"
|
||||
|
||||
def compose_fragment(
|
||||
self,
|
||||
decky_name: str,
|
||||
log_target: str | None = None,
|
||||
service_cfg: dict | None = None,
|
||||
) -> dict:
|
||||
cfg = service_cfg or {}
|
||||
fragment: dict = {
|
||||
"build": {"context": str(TEMPLATES_DIR)},
|
||||
"container_name": f"{decky_name}-https",
|
||||
"restart": "unless-stopped",
|
||||
"environment": {
|
||||
"NODE_NAME": decky_name,
|
||||
},
|
||||
}
|
||||
if log_target:
|
||||
fragment["environment"]["LOG_TARGET"] = log_target
|
||||
|
||||
# Optional persona overrides — only injected when explicitly set
|
||||
if "server_header" in cfg:
|
||||
fragment["environment"]["SERVER_HEADER"] = cfg["server_header"]
|
||||
if "response_code" in cfg:
|
||||
fragment["environment"]["RESPONSE_CODE"] = str(cfg["response_code"])
|
||||
if "fake_app" in cfg:
|
||||
fragment["environment"]["FAKE_APP"] = cfg["fake_app"]
|
||||
if "extra_headers" in cfg:
|
||||
val = cfg["extra_headers"]
|
||||
fragment["environment"]["EXTRA_HEADERS"] = (
|
||||
json.dumps(val) if isinstance(val, dict) else val
|
||||
)
|
||||
if "custom_body" in cfg:
|
||||
fragment["environment"]["CUSTOM_BODY"] = cfg["custom_body"]
|
||||
if "files" in cfg:
|
||||
files_path = str(Path(cfg["files"]).resolve())
|
||||
fragment["environment"]["FILES_DIR"] = "/opt/html_files"
|
||||
fragment.setdefault("volumes", []).append(f"{files_path}:/opt/html_files:ro")
|
||||
if "tls_cert" in cfg:
|
||||
fragment["environment"]["TLS_CERT"] = cfg["tls_cert"]
|
||||
if "tls_key" in cfg:
|
||||
fragment["environment"]["TLS_KEY"] = cfg["tls_key"]
|
||||
if "tls_cn" in cfg:
|
||||
fragment["environment"]["TLS_CN"] = cfg["tls_cn"]
|
||||
|
||||
return fragment
|
||||
|
||||
def dockerfile_context(self) -> Path | None:
|
||||
return TEMPLATES_DIR
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "imap"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "imap"
|
||||
|
||||
|
||||
class IMAPService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "k8s"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "k8s"
|
||||
|
||||
|
||||
class KubernetesAPIService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "ldap"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ldap"
|
||||
|
||||
|
||||
class LDAPService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "llmnr"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "llmnr"
|
||||
|
||||
|
||||
class LLMNRService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mongodb"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mongodb"
|
||||
|
||||
|
||||
class MongoDBService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mqtt"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mqtt"
|
||||
|
||||
|
||||
class MQTTService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mssql"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mssql"
|
||||
|
||||
|
||||
class MSSQLService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "mysql"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mysql"
|
||||
|
||||
|
||||
class MySQLService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "pop3"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "pop3"
|
||||
|
||||
|
||||
class POP3Service(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "postgres"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "postgres"
|
||||
|
||||
|
||||
class PostgresService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "rdp"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "rdp"
|
||||
|
||||
|
||||
class RDPService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "redis"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "redis"
|
||||
|
||||
|
||||
class RedisService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "sip"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "sip"
|
||||
|
||||
|
||||
class SIPService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "smb"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smb"
|
||||
|
||||
|
||||
class SMBService(BaseService):
|
||||
|
||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
||||
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "smtp"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smtp"
|
||||
|
||||
|
||||
class SMTPService(BaseService):
|
||||
|
||||
@@ -4,7 +4,7 @@ from decnet.services.base import BaseService
|
||||
|
||||
# Reuses the same template as the smtp service — only difference is
|
||||
# SMTP_OPEN_RELAY=1 in the environment, which enables the open relay persona.
|
||||
_TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "smtp"
|
||||
_TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smtp"
|
||||
|
||||
|
||||
class SMTPRelayService(BaseService):
|
||||
|
||||
41
decnet/services/sniffer.py
Normal file
41
decnet/services/sniffer.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "sniffer"
|
||||
|
||||
|
||||
class SnifferService(BaseService):
|
||||
"""
|
||||
Passive network sniffer deployed alongside deckies on the MACVLAN.
|
||||
|
||||
Captures TLS handshakes in promiscuous mode and extracts JA3/JA3S hashes
|
||||
plus connection metadata. Requires NET_RAW + NET_ADMIN capabilities.
|
||||
No inbound ports — purely passive.
|
||||
"""
|
||||
|
||||
name = "sniffer"
|
||||
ports: list[int] = []
|
||||
default_image = "build"
|
||||
fleet_singleton = True
|
||||
|
||||
def compose_fragment(
|
||||
self,
|
||||
decky_name: str,
|
||||
log_target: str | None = None,
|
||||
service_cfg: dict | None = None,
|
||||
) -> dict:
|
||||
fragment: dict = {
|
||||
"build": {"context": str(TEMPLATES_DIR)},
|
||||
"container_name": f"{decky_name}-sniffer",
|
||||
"restart": "unless-stopped",
|
||||
"cap_add": ["NET_RAW", "NET_ADMIN"],
|
||||
"environment": {
|
||||
"NODE_NAME": decky_name,
|
||||
},
|
||||
}
|
||||
if log_target:
|
||||
fragment["environment"]["LOG_TARGET"] = log_target
|
||||
return fragment
|
||||
|
||||
def dockerfile_context(self) -> Path | None:
|
||||
return TEMPLATES_DIR
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "snmp"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "snmp"
|
||||
|
||||
|
||||
class SNMPService(BaseService):
|
||||
|
||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
||||
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "ssh"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ssh"
|
||||
|
||||
|
||||
class SSHService(BaseService):
|
||||
@@ -32,16 +32,28 @@ class SSHService(BaseService):
|
||||
cfg = service_cfg or {}
|
||||
env: dict = {
|
||||
"SSH_ROOT_PASSWORD": cfg.get("password", "admin"),
|
||||
# NODE_NAME is the authoritative decky identifier for log
|
||||
# attribution — matches the host path used for the artifacts
|
||||
# bind mount below. The container hostname (optionally overridden
|
||||
# via SSH_HOSTNAME) is cosmetic and may differ to keep the
|
||||
# decoy looking heterogeneous.
|
||||
"NODE_NAME": decky_name,
|
||||
}
|
||||
if "hostname" in cfg:
|
||||
env["SSH_HOSTNAME"] = cfg["hostname"]
|
||||
|
||||
# File-catcher quarantine: bind-mount a per-decky host dir so attacker
|
||||
# drops (scp/sftp/wget) are mirrored out-of-band for forensic analysis.
|
||||
# The in-container path masquerades as systemd-coredump so `mount`/`df`
|
||||
# from inside the container looks benign.
|
||||
quarantine_host = f"/var/lib/decnet/artifacts/{decky_name}/ssh"
|
||||
return {
|
||||
"build": {"context": str(TEMPLATES_DIR)},
|
||||
"container_name": f"{decky_name}-ssh",
|
||||
"restart": "unless-stopped",
|
||||
"cap_add": ["NET_BIND_SERVICE"],
|
||||
"environment": env,
|
||||
"volumes": [f"{quarantine_host}:/var/lib/systemd/coredump:rw"],
|
||||
}
|
||||
|
||||
def dockerfile_context(self) -> Path:
|
||||
|
||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
||||
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "telnet"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "telnet"
|
||||
|
||||
|
||||
class TelnetService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "tftp"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "tftp"
|
||||
|
||||
|
||||
class TFTPService(BaseService):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
from decnet.services.base import BaseService
|
||||
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "vnc"
|
||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "vnc"
|
||||
|
||||
|
||||
class VNCService(BaseService):
|
||||
|
||||
11
decnet/sniffer/__init__.py
Normal file
11
decnet/sniffer/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
Fleet-wide MACVLAN sniffer microservice.
|
||||
|
||||
Runs as a single host-side background task (not per-decky) that sniffs
|
||||
all TLS traffic on the MACVLAN interface, extracts fingerprints, and
|
||||
feeds events into the existing log pipeline.
|
||||
"""
|
||||
|
||||
from decnet.sniffer.worker import sniffer_worker
|
||||
|
||||
__all__ = ["sniffer_worker"]
|
||||
1166
decnet/sniffer/fingerprint.py
Normal file
1166
decnet/sniffer/fingerprint.py
Normal file
File diff suppressed because it is too large
Load Diff
238
decnet/sniffer/p0f.py
Normal file
238
decnet/sniffer/p0f.py
Normal file
@@ -0,0 +1,238 @@
|
||||
"""
|
||||
Passive OS fingerprinting (p0f-lite) for the DECNET sniffer.
|
||||
|
||||
Pure-Python lookup module. Given the values of an incoming TCP SYN packet
|
||||
(TTL, window, MSS, window-scale, and TCP option ordering), returns a coarse
|
||||
OS bucket (linux / windows / macos_ios / freebsd / openbsd / nmap / unknown)
|
||||
plus derived hop distance and inferred initial TTL.
|
||||
|
||||
Rationale
|
||||
---------
|
||||
Full p0f v3 distinguishes several dozen OS/tool profiles by combining dozens
|
||||
of low-level quirks (OLEN, WSIZE, EOL padding, PCLASS, quirks, payload class).
|
||||
For DECNET we only need a coarse bucket — enough to tag an attacker as
|
||||
"linux beacon" vs "windows interactive" vs "active scan". The curated
|
||||
table below covers default stacks that dominate real-world attacker traffic.
|
||||
|
||||
References (public p0f v3 DB, nmap-os-db, and Mozilla OS Fingerprint table):
|
||||
https://github.com/p0f/p0f/blob/master/p0f.fp
|
||||
|
||||
No external dependencies.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
# ─── TTL → initial TTL bucket ───────────────────────────────────────────────
|
||||
|
||||
# Common "hop 0" TTLs. Packets decrement TTL once per hop, so we round up
|
||||
# the observed TTL to the nearest known starting value.
|
||||
_TTL_BUCKETS: tuple[int, ...] = (32, 64, 128, 255)
|
||||
|
||||
|
||||
def initial_ttl(ttl: int) -> int:
|
||||
"""
|
||||
Round *ttl* up to the nearest known initial-TTL bucket.
|
||||
|
||||
A SYN with TTL=59 was almost certainly emitted by a Linux/BSD host
|
||||
(initial 64) five hops away; TTL=120 by a Windows host (initial 128)
|
||||
eight hops away.
|
||||
"""
|
||||
for bucket in _TTL_BUCKETS:
|
||||
if ttl <= bucket:
|
||||
return bucket
|
||||
return 255
|
||||
|
||||
|
||||
def hop_distance(ttl: int) -> int:
|
||||
"""
|
||||
Estimate hops between the attacker and the sniffer based on TTL.
|
||||
|
||||
Upper-bounded at 64 (anything further has most likely been mangled
|
||||
by a misconfigured firewall or a TTL-spoofing NAT).
|
||||
"""
|
||||
dist = initial_ttl(ttl) - ttl
|
||||
if dist < 0:
|
||||
return 0
|
||||
if dist > 64:
|
||||
return 64
|
||||
return dist
|
||||
|
||||
|
||||
# ─── OS signature table (TTL bucket, window, MSS, wscale, option-order) ─────
|
||||
|
||||
# Each entry is a set of loose predicates. If all predicates match, the
|
||||
# OS label is returned. First-match wins. `None` means "don't care".
|
||||
#
|
||||
# The option signatures use the short-code alphabet from
|
||||
# decnet/prober/tcpfp.py :: _OPT_CODES (M=MSS, N=NOP, W=WScale,
|
||||
# T=Timestamp, S=SAckOK, E=EOL).
|
||||
|
||||
_SIGNATURES: tuple[tuple[dict, str], ...] = (
|
||||
# ── nmap -sS / -sT default probe ───────────────────────────────────────
|
||||
# nmap crafts very distinctive SYNs: tiny window (1024/4096/etc.), full
|
||||
# option set including WScale=10 and SAckOK. Match these first so they
|
||||
# don't get misclassified as Linux.
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window_in": {1024, 2048, 3072, 4096, 31337, 32768, 65535},
|
||||
"mss": 1460,
|
||||
"wscale": 10,
|
||||
"options": "M,W,T,S,S",
|
||||
},
|
||||
"nmap",
|
||||
),
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window_in": {1024, 2048, 3072, 4096, 31337, 32768, 65535},
|
||||
"options_starts_with": "M,W,T,S",
|
||||
},
|
||||
"nmap",
|
||||
),
|
||||
# ── macOS / iOS default SYN (match before Linux — shares TTL 64) ──────
|
||||
# TTL 64, window 65535, MSS 1460, WScale 6, specific option order
|
||||
# M,N,W,N,N,T,S,E (Darwin signature with EOL padding).
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window": 65535,
|
||||
"wscale": 6,
|
||||
"options": "M,N,W,N,N,T,S,E",
|
||||
},
|
||||
"macos_ios",
|
||||
),
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window_in": {65535},
|
||||
"wscale_in": {5, 6},
|
||||
"has_timestamps": True,
|
||||
"options_ends_with": "E",
|
||||
},
|
||||
"macos_ios",
|
||||
),
|
||||
# ── FreeBSD default SYN (TTL 64, no EOL) ───────────────────────────────
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window": 65535,
|
||||
"wscale": 6,
|
||||
"has_sack": True,
|
||||
"has_timestamps": True,
|
||||
"options_no_eol": True,
|
||||
},
|
||||
"freebsd",
|
||||
),
|
||||
# ── Linux (kernel 3.x – 6.x) default SYN ───────────────────────────────
|
||||
# TTL 64, window 29200 / 64240 / 65535, MSS 1460, WScale 7, full options.
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window_min": 5000,
|
||||
"wscale_in": {6, 7, 8, 9, 10, 11, 12, 13, 14},
|
||||
"has_sack": True,
|
||||
"has_timestamps": True,
|
||||
},
|
||||
"linux",
|
||||
),
|
||||
# ── OpenBSD default SYN ─────────────────────────────────────────────────
|
||||
# TTL 64, window 16384, WScale 3-6, MSS 1460
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window_in": {16384, 16960},
|
||||
"wscale_in": {3, 4, 5, 6},
|
||||
},
|
||||
"openbsd",
|
||||
),
|
||||
# ── Windows 10/11/Server default SYN ────────────────────────────────────
|
||||
# TTL 128, window 64240/65535, MSS 1460, WScale 8, SACK+TS
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 128,
|
||||
"window_min": 8192,
|
||||
"wscale_in": {2, 6, 7, 8},
|
||||
"has_sack": True,
|
||||
},
|
||||
"windows",
|
||||
),
|
||||
# ── Windows 7/XP (legacy) ───────────────────────────────────────────────
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 128,
|
||||
"window_in": {8192, 16384, 65535},
|
||||
},
|
||||
"windows",
|
||||
),
|
||||
# ── Embedded / Cisco / network gear ─────────────────────────────────────
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 255,
|
||||
},
|
||||
"embedded",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _match_signature(
|
||||
sig: dict,
|
||||
ttl: int,
|
||||
window: int,
|
||||
mss: int,
|
||||
wscale: int | None,
|
||||
options_sig: str,
|
||||
) -> bool:
|
||||
"""Evaluate every predicate in *sig* against the observed values."""
|
||||
tb = initial_ttl(ttl)
|
||||
if "ttl_bucket" in sig and sig["ttl_bucket"] != tb:
|
||||
return False
|
||||
if "window" in sig and sig["window"] != window:
|
||||
return False
|
||||
if "window_in" in sig and window not in sig["window_in"]:
|
||||
return False
|
||||
if "window_min" in sig and window < sig["window_min"]:
|
||||
return False
|
||||
if "mss" in sig and sig["mss"] != mss:
|
||||
return False
|
||||
if "wscale" in sig and sig["wscale"] != wscale:
|
||||
return False
|
||||
if "wscale_in" in sig and wscale not in sig["wscale_in"]:
|
||||
return False
|
||||
if "has_sack" in sig:
|
||||
if sig["has_sack"] != ("S" in options_sig):
|
||||
return False
|
||||
if "has_timestamps" in sig:
|
||||
if sig["has_timestamps"] != ("T" in options_sig):
|
||||
return False
|
||||
if "options" in sig and sig["options"] != options_sig:
|
||||
return False
|
||||
if "options_starts_with" in sig and not options_sig.startswith(sig["options_starts_with"]):
|
||||
return False
|
||||
if "options_ends_with" in sig and not options_sig.endswith(sig["options_ends_with"]):
|
||||
return False
|
||||
if "options_no_eol" in sig and sig["options_no_eol"] and "E" in options_sig:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@_traced("sniffer.p0f_guess_os")
|
||||
def guess_os(
|
||||
ttl: int,
|
||||
window: int,
|
||||
mss: int = 0,
|
||||
wscale: int | None = None,
|
||||
options_sig: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
Return a coarse OS bucket for the given SYN characteristics.
|
||||
|
||||
One of: "linux", "windows", "macos_ios", "freebsd", "openbsd",
|
||||
"embedded", "nmap", "unknown".
|
||||
"""
|
||||
for sig, label in _SIGNATURES:
|
||||
if _match_signature(sig, ttl, window, mss, wscale, options_sig):
|
||||
return label
|
||||
return "unknown"
|
||||
71
decnet/sniffer/syslog.py
Normal file
71
decnet/sniffer/syslog.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
RFC 5424 syslog formatting and log-file writing for the fleet sniffer.
|
||||
|
||||
Reuses the same wire format as templates/sniffer/decnet_logging.py so the
|
||||
existing collector parser and ingester can consume events without changes.
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from decnet.collector.worker import parse_rfc5424
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
# ─── Constants (must match templates/sniffer/decnet_logging.py) ──────────────
|
||||
|
||||
_FACILITY_LOCAL0 = 16
|
||||
_SD_ID = "relay@55555"
|
||||
_NILVALUE = "-"
|
||||
|
||||
SEVERITY_INFO = 6
|
||||
SEVERITY_WARNING = 4
|
||||
|
||||
_MAX_HOSTNAME = 255
|
||||
_MAX_APPNAME = 48
|
||||
_MAX_MSGID = 32
|
||||
|
||||
|
||||
# ─── Formatter ───────────────────────────────────────────────────────────────
|
||||
|
||||
def _sd_escape(value: str) -> str:
|
||||
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
|
||||
|
||||
|
||||
def _sd_element(fields: dict[str, Any]) -> str:
|
||||
if not fields:
|
||||
return _NILVALUE
|
||||
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
|
||||
return f"[{_SD_ID} {params}]"
|
||||
|
||||
|
||||
def syslog_line(
|
||||
service: str,
|
||||
hostname: str,
|
||||
event_type: str,
|
||||
severity: int = SEVERITY_INFO,
|
||||
msg: str | None = None,
|
||||
**fields: Any,
|
||||
) -> str:
|
||||
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
|
||||
ts = datetime.now(timezone.utc).isoformat()
|
||||
host = (hostname or _NILVALUE)[:_MAX_HOSTNAME]
|
||||
appname = (service or _NILVALUE)[:_MAX_APPNAME]
|
||||
msgid = (event_type or _NILVALUE)[:_MAX_MSGID]
|
||||
sd = _sd_element(fields)
|
||||
message = f" {msg}" if msg else ""
|
||||
return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}"
|
||||
|
||||
|
||||
@_traced("sniffer.write_event")
|
||||
def write_event(line: str, log_path: Path, json_path: Path) -> None:
|
||||
"""Append a syslog line to the raw log and its parsed JSON to the json log."""
|
||||
with open(log_path, "a", encoding="utf-8") as lf:
|
||||
lf.write(line + "\n")
|
||||
lf.flush()
|
||||
parsed = parse_rfc5424(line)
|
||||
if parsed:
|
||||
with open(json_path, "a", encoding="utf-8") as jf:
|
||||
jf.write(json.dumps(parsed) + "\n")
|
||||
jf.flush()
|
||||
176
decnet/sniffer/worker.py
Normal file
176
decnet/sniffer/worker.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""
|
||||
Fleet-wide MACVLAN sniffer worker.
|
||||
|
||||
Runs as a single host-side async background task that sniffs all TLS
|
||||
traffic on the MACVLAN host interface. Maps packets to deckies by IP
|
||||
and feeds fingerprint events into the existing log pipeline.
|
||||
|
||||
Modeled on decnet.collector.worker — same lifecycle pattern.
|
||||
Fault-isolated: any exception is logged and the worker exits cleanly.
|
||||
The API never depends on this worker being alive.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import subprocess # nosec B404 — needed for interface checks
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.network import HOST_IPVLAN_IFACE, HOST_MACVLAN_IFACE
|
||||
from decnet.sniffer.fingerprint import SnifferEngine
|
||||
from decnet.sniffer.syslog import write_event
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
logger = get_logger("sniffer")
|
||||
|
||||
_IP_MAP_REFRESH_INTERVAL: float = 60.0
|
||||
|
||||
|
||||
def _load_ip_to_decky() -> dict[str, str]:
|
||||
"""Build IP → decky-name mapping from decnet-state.json."""
|
||||
from decnet.config import load_state
|
||||
state = load_state()
|
||||
if state is None:
|
||||
return {}
|
||||
config, _ = state
|
||||
mapping: dict[str, str] = {}
|
||||
for decky in config.deckies:
|
||||
mapping[decky.ip] = decky.name
|
||||
return mapping
|
||||
|
||||
|
||||
def _interface_exists(iface: str) -> bool:
|
||||
"""Check if a network interface exists on this host."""
|
||||
try:
|
||||
result = subprocess.run( # nosec B603 B607 — hardcoded args
|
||||
["ip", "link", "show", iface],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
return result.returncode == 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
@_traced("sniffer.sniff_loop")
|
||||
def _sniff_loop(
|
||||
interface: str,
|
||||
log_path: Path,
|
||||
json_path: Path,
|
||||
stop_event: threading.Event,
|
||||
) -> None:
|
||||
"""Blocking sniff loop. Runs in a dedicated thread via asyncio.to_thread."""
|
||||
try:
|
||||
from scapy.sendrecv import sniff
|
||||
except ImportError:
|
||||
logger.error("scapy not installed — sniffer cannot start")
|
||||
return
|
||||
|
||||
ip_map = _load_ip_to_decky()
|
||||
if not ip_map:
|
||||
logger.warning("sniffer: no deckies in state — nothing to sniff")
|
||||
return
|
||||
|
||||
def _write_fn(line: str) -> None:
|
||||
write_event(line, log_path, json_path)
|
||||
|
||||
engine = SnifferEngine(ip_to_decky=ip_map, write_fn=_write_fn)
|
||||
|
||||
# Periodically refresh IP map in a background daemon thread
|
||||
def _refresh_loop() -> None:
|
||||
while not stop_event.is_set():
|
||||
stop_event.wait(_IP_MAP_REFRESH_INTERVAL)
|
||||
if stop_event.is_set():
|
||||
break
|
||||
try:
|
||||
new_map = _load_ip_to_decky()
|
||||
if new_map:
|
||||
engine.update_ip_map(new_map)
|
||||
except Exception as exc:
|
||||
logger.debug("sniffer: ip map refresh failed: %s", exc)
|
||||
|
||||
refresh_thread = threading.Thread(target=_refresh_loop, daemon=True)
|
||||
refresh_thread.start()
|
||||
|
||||
logger.info("sniffer: sniffing on interface=%s deckies=%d", interface, len(ip_map))
|
||||
|
||||
try:
|
||||
sniff(
|
||||
iface=interface,
|
||||
filter="tcp",
|
||||
prn=engine.on_packet,
|
||||
store=False,
|
||||
stop_filter=lambda pkt: stop_event.is_set(),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("sniffer: scapy sniff exited: %s", exc)
|
||||
finally:
|
||||
stop_event.set()
|
||||
logger.info("sniffer: sniff loop ended")
|
||||
|
||||
|
||||
@_traced("sniffer.worker")
|
||||
async def sniffer_worker(log_file: str) -> None:
|
||||
"""
|
||||
Async entry point — started as asyncio.create_task in the API lifespan.
|
||||
|
||||
Fully fault-isolated: catches all exceptions, logs them, and returns
|
||||
cleanly. The API continues running regardless of sniffer state.
|
||||
"""
|
||||
try:
|
||||
# Interface selection: explicit env override wins, otherwise probe
|
||||
# both the MACVLAN and IPvlan host-side names since the driver
|
||||
# choice is per-deploy (--ipvlan flag).
|
||||
env_iface = os.environ.get("DECNET_SNIFFER_IFACE")
|
||||
if env_iface:
|
||||
interface = env_iface
|
||||
elif _interface_exists(HOST_MACVLAN_IFACE):
|
||||
interface = HOST_MACVLAN_IFACE
|
||||
elif _interface_exists(HOST_IPVLAN_IFACE):
|
||||
interface = HOST_IPVLAN_IFACE
|
||||
else:
|
||||
logger.warning(
|
||||
"sniffer: neither %s nor %s found — sniffer disabled "
|
||||
"(fleet may not be deployed yet)",
|
||||
HOST_MACVLAN_IFACE, HOST_IPVLAN_IFACE,
|
||||
)
|
||||
return
|
||||
|
||||
if not _interface_exists(interface):
|
||||
logger.warning(
|
||||
"sniffer: interface %s not found — sniffer disabled "
|
||||
"(fleet may not be deployed yet)", interface,
|
||||
)
|
||||
return
|
||||
|
||||
log_path = Path(log_file)
|
||||
json_path = log_path.with_suffix(".json")
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
stop_event = threading.Event()
|
||||
|
||||
# Dedicated thread pool so the long-running sniff loop doesn't
|
||||
# occupy a slot in the default asyncio executor.
|
||||
sniffer_pool = ThreadPoolExecutor(
|
||||
max_workers=2, thread_name_prefix="decnet-sniffer",
|
||||
)
|
||||
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(
|
||||
sniffer_pool, _sniff_loop,
|
||||
interface, log_path, json_path, stop_event,
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
logger.info("sniffer: shutdown requested")
|
||||
stop_event.set()
|
||||
sniffer_pool.shutdown(wait=False)
|
||||
raise
|
||||
finally:
|
||||
sniffer_pool.shutdown(wait=False)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error("sniffer: worker failed — API continues without sniffing: %s", exc)
|
||||
7
decnet/swarm/__init__.py
Normal file
7
decnet/swarm/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""DECNET SWARM — multihost deployment subsystem.
|
||||
|
||||
Components:
|
||||
* ``pki`` — X.509 CA + CSR signing used by all swarm mTLS channels
|
||||
* ``client`` — master-side HTTP client that talks to remote workers
|
||||
* ``log_forwarder``— worker-side syslog-over-TLS (RFC 5425) forwarder
|
||||
"""
|
||||
200
decnet/swarm/client.py
Normal file
200
decnet/swarm/client.py
Normal file
@@ -0,0 +1,200 @@
|
||||
"""Master-side HTTP client that talks to a worker's DECNET agent.
|
||||
|
||||
All traffic is mTLS: the master presents a cert issued by its own CA (which
|
||||
workers trust) and the master validates the worker's cert against the same
|
||||
CA. In practice the "client cert" the master shows is just another cert
|
||||
signed by itself — the master is both the CA and the sole control-plane
|
||||
client.
|
||||
|
||||
Usage:
|
||||
|
||||
async with AgentClient(host) as agent:
|
||||
await agent.deploy(config)
|
||||
status = await agent.status()
|
||||
|
||||
The ``host`` is a SwarmHost dict returned by the repository.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pathlib
|
||||
import ssl
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from decnet.config import DecnetConfig
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm import pki
|
||||
|
||||
log = get_logger("swarm.client")
|
||||
|
||||
# How long a single HTTP operation can take. Deploy is the long pole —
|
||||
# docker compose up pulls images, builds contexts, etc. Tune via env in a
|
||||
# later iteration if the default proves too short.
|
||||
_TIMEOUT_DEPLOY = httpx.Timeout(connect=10.0, read=600.0, write=30.0, pool=5.0)
|
||||
_TIMEOUT_CONTROL = httpx.Timeout(connect=5.0, read=15.0, write=5.0, pool=5.0)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MasterIdentity:
|
||||
"""Paths to the master's own mTLS client bundle.
|
||||
|
||||
The master uses ONE master-client cert to talk to every worker. It is
|
||||
signed by the DECNET CA (same CA that signs worker certs). Stored
|
||||
under ``~/.decnet/ca/master/`` by ``ensure_master_identity``.
|
||||
"""
|
||||
key_path: pathlib.Path
|
||||
cert_path: pathlib.Path
|
||||
ca_cert_path: pathlib.Path
|
||||
|
||||
|
||||
def ensure_master_identity(
|
||||
ca_dir: pathlib.Path = pki.DEFAULT_CA_DIR,
|
||||
) -> MasterIdentity:
|
||||
"""Create (or load) the master's own client cert.
|
||||
|
||||
Called once by the swarm controller on startup and by the CLI before
|
||||
any master→worker call. Idempotent.
|
||||
"""
|
||||
ca = pki.ensure_ca(ca_dir)
|
||||
master_dir = ca_dir / "master"
|
||||
bundle = pki.load_worker_bundle(master_dir)
|
||||
if bundle is None:
|
||||
issued = pki.issue_worker_cert(ca, "decnet-master", ["127.0.0.1", "decnet-master"])
|
||||
pki.write_worker_bundle(issued, master_dir)
|
||||
return MasterIdentity(
|
||||
key_path=master_dir / "worker.key",
|
||||
cert_path=master_dir / "worker.crt",
|
||||
ca_cert_path=master_dir / "ca.crt",
|
||||
)
|
||||
|
||||
|
||||
class AgentClient:
|
||||
"""Thin async wrapper around the worker agent's HTTP API."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
host: dict[str, Any] | None = None,
|
||||
*,
|
||||
address: Optional[str] = None,
|
||||
agent_port: Optional[int] = None,
|
||||
identity: Optional[MasterIdentity] = None,
|
||||
verify_hostname: bool = False,
|
||||
):
|
||||
"""Either pass a SwarmHost dict, or explicit address/port.
|
||||
|
||||
``verify_hostname`` stays False by default because the worker's
|
||||
cert SAN is populated from the operator-supplied address list, not
|
||||
from modern TLS hostname-verification semantics. The mTLS client
|
||||
cert + CA pinning are what authenticate the peer.
|
||||
"""
|
||||
if host is not None:
|
||||
self._address = host["address"]
|
||||
self._port = int(host.get("agent_port") or 8765)
|
||||
self._host_uuid = host.get("uuid")
|
||||
self._host_name = host.get("name")
|
||||
else:
|
||||
if address is None or agent_port is None:
|
||||
raise ValueError(
|
||||
"AgentClient requires either a host dict or address+agent_port"
|
||||
)
|
||||
self._address = address
|
||||
self._port = int(agent_port)
|
||||
self._host_uuid = None
|
||||
self._host_name = None
|
||||
|
||||
self._identity = identity or ensure_master_identity()
|
||||
self._verify_hostname = verify_hostname
|
||||
self._client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
# --------------------------------------------------------------- lifecycle
|
||||
|
||||
def _build_client(self, timeout: httpx.Timeout) -> httpx.AsyncClient:
|
||||
# Build the SSL context manually — httpx.create_ssl_context layers on
|
||||
# purpose/ALPN/default-CA logic that doesn't compose with private-CA
|
||||
# mTLS in all combinations. A bare SSLContext is predictable.
|
||||
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
ctx.load_cert_chain(
|
||||
str(self._identity.cert_path), str(self._identity.key_path)
|
||||
)
|
||||
ctx.load_verify_locations(cafile=str(self._identity.ca_cert_path))
|
||||
ctx.verify_mode = ssl.CERT_REQUIRED
|
||||
# Pin by CA + cert chain, not by DNS — workers enroll with arbitrary
|
||||
# SANs (IPs, hostnames) and we don't want to force operators to keep
|
||||
# those in sync with whatever URL the master happens to use.
|
||||
ctx.check_hostname = self._verify_hostname
|
||||
return httpx.AsyncClient(
|
||||
base_url=f"https://{self._address}:{self._port}",
|
||||
verify=ctx,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
async def __aenter__(self) -> "AgentClient":
|
||||
self._client = self._build_client(_TIMEOUT_CONTROL)
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: Any) -> None:
|
||||
if self._client:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
|
||||
def _require_client(self) -> httpx.AsyncClient:
|
||||
if self._client is None:
|
||||
raise RuntimeError("AgentClient used outside `async with` block")
|
||||
return self._client
|
||||
|
||||
# ----------------------------------------------------------------- RPCs
|
||||
|
||||
async def health(self) -> dict[str, Any]:
|
||||
resp = await self._require_client().get("/health")
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
async def status(self) -> dict[str, Any]:
|
||||
resp = await self._require_client().get("/status")
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
async def deploy(
|
||||
self,
|
||||
config: DecnetConfig,
|
||||
*,
|
||||
dry_run: bool = False,
|
||||
no_cache: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
body = {
|
||||
"config": config.model_dump(mode="json"),
|
||||
"dry_run": dry_run,
|
||||
"no_cache": no_cache,
|
||||
}
|
||||
# Swap in a long-deploy timeout for this call only.
|
||||
old = self._require_client().timeout
|
||||
self._require_client().timeout = _TIMEOUT_DEPLOY
|
||||
try:
|
||||
resp = await self._require_client().post("/deploy", json=body)
|
||||
finally:
|
||||
self._require_client().timeout = old
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
async def teardown(self, decky_id: Optional[str] = None) -> dict[str, Any]:
|
||||
resp = await self._require_client().post(
|
||||
"/teardown", json={"decky_id": decky_id}
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
async def self_destruct(self) -> dict[str, Any]:
|
||||
"""Trigger the worker to stop services and wipe its install."""
|
||||
resp = await self._require_client().post("/self-destruct")
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
# -------------------------------------------------------------- diagnostics
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"AgentClient(name={self._host_name!r}, "
|
||||
f"address={self._address!r}, port={self._port})"
|
||||
)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user