Compare commits
2 Commits
v1.2.0
...
2ce076cd37
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2ce076cd37 | ||
|
|
e8d97281f7 |
@@ -2,7 +2,7 @@ name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [dev, testing]
|
||||
branches: [dev, testing, "temp/merge-*"]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
@@ -11,7 +11,6 @@ jobs:
|
||||
lint:
|
||||
name: Lint (ruff)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
@@ -23,7 +22,6 @@ jobs:
|
||||
bandit:
|
||||
name: SAST (bandit)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
@@ -35,7 +33,6 @@ jobs:
|
||||
pip-audit:
|
||||
name: Dependency audit (pip-audit)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
@@ -45,31 +42,10 @@ jobs:
|
||||
- run: pip install -e .[dev]
|
||||
- run: pip-audit --skip-editable --ignore-vuln CVE-2025-65896 --ignore-vuln CVE-2026-3219
|
||||
|
||||
merge-to-testing:
|
||||
name: Merge dev → testing
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint, bandit, pip-audit]
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge dev into testing
|
||||
run: |
|
||||
git fetch origin testing
|
||||
git checkout testing
|
||||
git merge origin/dev --no-ff -m "ci: auto-merge dev → testing"
|
||||
git push origin testing
|
||||
|
||||
test-standard:
|
||||
name: Test (Standard)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/testing'
|
||||
needs: [lint, bandit, pip-audit]
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11"]
|
||||
@@ -84,7 +60,6 @@ jobs:
|
||||
test-live:
|
||||
name: Test (Live)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/testing'
|
||||
needs: [test-standard]
|
||||
services:
|
||||
mysql:
|
||||
@@ -116,10 +91,31 @@ jobs:
|
||||
DECNET_MYSQL_PASSWORD: root
|
||||
DECNET_MYSQL_DATABASE: decnet_test
|
||||
|
||||
merge-to-main:
|
||||
name: Merge testing → main
|
||||
merge-to-testing:
|
||||
name: Merge dev → testing
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live]
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge dev into testing
|
||||
run: |
|
||||
git fetch origin testing
|
||||
git checkout testing
|
||||
git merge origin/dev --no-ff -m "ci: auto-merge dev → testing [skip ci]"
|
||||
git push origin testing
|
||||
|
||||
prepare-merge-to-main:
|
||||
name: Prepare Merge to Main
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live, test-fuzz]
|
||||
if: github.ref == 'refs/heads/testing'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -130,12 +126,33 @@ jobs:
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge testing into main
|
||||
- name: Create temp branch and sync with main
|
||||
run: |
|
||||
git fetch origin main
|
||||
git checkout -b temp/merge-testing-to-main
|
||||
echo "--- Switched to temp branch, merging main into it ---"
|
||||
git merge origin/main --no-edit || { echo "CONFLICT: Manual resolution required"; exit 1; }
|
||||
git push origin temp/merge-testing-to-main --force
|
||||
|
||||
finalize-merge-to-main:
|
||||
name: Finalize Merge to Main
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live, test-fuzz]
|
||||
if: startsWith(github.ref, 'refs/heads/temp/merge-')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge RC into main
|
||||
run: |
|
||||
git fetch origin main
|
||||
git checkout main
|
||||
git merge origin/testing --no-ff -m "ci: auto-merge testing → main" || {
|
||||
echo "CONFLICT: testing and main have diverged — manual resolution required"
|
||||
exit 1
|
||||
}
|
||||
git merge ${{ github.ref }} --no-ff -m "ci: auto-merge testing → main"
|
||||
git push origin main
|
||||
echo "--- Cleaning up temp branch ---"
|
||||
git push origin --delete ${{ github.ref_name }}
|
||||
|
||||
33
.gitignore
vendored
33
.gitignore
vendored
@@ -1,6 +1,5 @@
|
||||
.venv/
|
||||
.venv*/
|
||||
docker-compose.yaml
|
||||
.311/
|
||||
.3[0-9][0-9]/
|
||||
logs/
|
||||
@@ -19,8 +18,6 @@ decnet-topology-*-compose.yml
|
||||
.docker/
|
||||
decnet-state.json
|
||||
*.ini
|
||||
# tracked: Alembic CLI config (migrations live in decnet/web/db/migrations)
|
||||
!alembic.ini
|
||||
decnet.log*
|
||||
*.loggy
|
||||
*.nmap
|
||||
@@ -54,33 +51,3 @@ schem
|
||||
|
||||
# pydeps-style dependency graph dumps from local analysis runs.
|
||||
deps.txt
|
||||
|
||||
# Node modules vendored under decnet/canary/ for the obfuscator helper.
|
||||
# The package.json is the source of truth; modules are reinstalled at
|
||||
# build/deploy time.
|
||||
node_modules/
|
||||
package-lock.json
|
||||
|
||||
# TTP rule-precision corpus pulled from prod sqlite. Real attacker
|
||||
# payloads — operator-only artifact. The synthetic ``seed_*.jsonl``
|
||||
# files alongside ARE committed and exercise the harness in CI.
|
||||
tests/ttp/rule_precision/corpus/*.jsonl
|
||||
tests/ttp/rule_precision/corpus/seed_*.jsonl
|
||||
threatfox-api.json
|
||||
|
||||
# MITRE ATT&CK STIX bundle — 50 MB, fetched at runtime via attack_stix.py
|
||||
enterprise-attack-*.json
|
||||
|
||||
# pytest failure dump files
|
||||
testfail
|
||||
.phaseloop/
|
||||
|
||||
# Professional tier: proprietary, lives in a separate private repo
|
||||
# (github.com/DECNET-Foundation/decnet-professional), mounted at decnet/pro/.
|
||||
# Must NEVER be tracked by the open-core repo. src/pro-impl/ is where the pro
|
||||
# build copies the pro frontend so the JS toolchain resolves it.
|
||||
/decnet/pro/
|
||||
/decnet_web/src/pro-impl/
|
||||
|
||||
# Internal design/dev notes — not for publication
|
||||
/development/
|
||||
|
||||
109
CHANGELOG.md
109
CHANGELOG.md
@@ -1,109 +0,0 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to DECNET are documented here.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [1.2.0] - 2026-06-18
|
||||
|
||||
Prefork worker consolidation — share the import floor across *separate* processes
|
||||
(own GIL, full isolation) via copy-on-write, for the heavy/isolation-critical
|
||||
workers the in-process supervisor can't co-host.
|
||||
|
||||
### Added
|
||||
- `decnet.prefork` — prefork supervisor primitive: a master imports the base
|
||||
floor once, then forks one child per worker (own process/GIL, CoW-shared
|
||||
floor), reaps and restarts with backoff, and shuts down gracefully. CoW
|
||||
viability measured on CPython 3.14 (idle child ~1 MB private, ~71 MB shared;
|
||||
`gc.freeze()` unnecessary thanks to PEP 683 immortal objects).
|
||||
- `decnet fleet <name>` — prefork master that imports the shared base floor once
|
||||
then forks one child per worker. First fleet `heavy` = profiler + ttp (DB-only,
|
||||
process-isolated heavy tier); systemd unit `decnet-fleet-heavy.service`
|
||||
Conflicts= the units it replaces and carries no extra privilege.
|
||||
Verified live: fleet footprint ≈412 MB Pss (master 67 + profiler 81 + ttp 264)
|
||||
vs 661 MB standalone — profiler's RSS collapsed 353→110 MB (base floor now
|
||||
CoW-shared). ttp barely moved: its bulk is the privately-parsed ATT&CK bundle,
|
||||
which it alone consumes — so master-warming it was confirmed pointless and
|
||||
dropped. Lesson: prefork pays for base-floor-bound workers, not state-bound ones.
|
||||
|
||||
### Changed
|
||||
- MITRE ATT&CK Enterprise bundle pinned 19.0 → **19.1**. The bundle and its
|
||||
LICENSE now resolve from `decnet/data/` (hash-pinned in `attack_version.py`,
|
||||
fetched on demand via `python -m decnet.ttp.attack_stix fetch`, gitignored —
|
||||
not committed).
|
||||
|
||||
### Removed
|
||||
- Per-worker systemd unit templates superseded by consolidation:
|
||||
`decnet-{reconciler,enrich,orchestrator,mutator}` (→ `supervise batch`),
|
||||
`decnet-{clusterer,campaign-clusterer,attribution,reuse-correlator}`
|
||||
(→ `supervise cpu`), and `decnet-{profiler,ttp}` (→ `fleet heavy`).
|
||||
`decnet.target` now pulls in the 3 consolidated units. The underlying CLI
|
||||
commands remain for manual/standalone runs; a worker can be re-extracted to its
|
||||
own unit by editing the group/fleet spec.
|
||||
|
||||
## [1.1.1] - 2026-06-18
|
||||
|
||||
### Fixed
|
||||
- Test suite: corrected 4 lifter clip tests that encoded the pre-ASVS
|
||||
`confidence_max` semantics (treating it as a `base × ceiling` multiplier).
|
||||
`confidence_max` is a true ceiling — `min(base, ceiling)` — since the ASVS
|
||||
hardening pass (BUG-8); the tests now assert the ceiling. They were masked by
|
||||
the `make test-web` ATT&CK-bundle fail-fast. No production code change.
|
||||
- `test_topics_matches_documented_set`: added `attacker.fingerprinted` to the
|
||||
documented topic set — the TTP worker legitimately subscribes to it
|
||||
(JARM/HASSH/tcpfp/ipv6_leak fingerprint results feed TTP tagging).
|
||||
|
||||
## [1.1.0] - 2026-06-18
|
||||
|
||||
Worker consolidation: cut the long-running worker fleet's resident memory by
|
||||
hosting co-resident workers in shared supervisor processes instead of one OS
|
||||
process per worker. Behaviour-preserving — workers run the same code; only
|
||||
*where* they are hosted changes, and any worker remains extractable back to its
|
||||
own unit.
|
||||
|
||||
### Added
|
||||
- `decnet supervise <group>` — hosts a co-resident worker group in one process,
|
||||
paying the Python import floor and the DB connection pool once instead of once
|
||||
per worker. Groups: `batch` and `cpu`.
|
||||
- `decnet.supervisor` — in-process supervision primitive: each worker runs in its
|
||||
own restart loop with exponential backoff (in-process `Restart=on-failure`),
|
||||
run concurrently so one worker crashing never cancels its siblings.
|
||||
Deliberately not `asyncio.TaskGroup`, whose all-or-nothing cancellation would
|
||||
break worker isolation.
|
||||
- `decnet.offload` — shared-pool CPU-kernel offload. The `cpu` group runs its two
|
||||
O(n²) connected-components kernels (`cluster_observations`, `cluster_identities`)
|
||||
in one shared `ProcessPoolExecutor` (forkserver) so they run in parallel
|
||||
instead of serialising under the GIL. Inline when no pool is installed, so
|
||||
standalone workers and tests are unchanged.
|
||||
- systemd units `decnet-supervise-batch.service` and `decnet-supervise-cpu.service`
|
||||
(auto-rendered by `decnet init`); each `Conflicts=` the individual units it
|
||||
replaces, preventing accidental double-run.
|
||||
|
||||
### Changed
|
||||
- `decnet.topology` no longer eagerly imports the topology generator (and the
|
||||
SQLModel ORM behind it) at package import. `generate` is now a lazy PEP 562
|
||||
re-export; the public API is unchanged.
|
||||
|
||||
### Performance
|
||||
- **batch** group (`reconcile` + `enrich` + `orchestrate` + `mutate`):
|
||||
509 MB across 4 processes → **129 MB** in one. **−380 MB (75%)**, verified live.
|
||||
- **cpu** group (`clusterer` + `campaign-clusterer` + `attribution` +
|
||||
`reuse-correlate`): 502 MB → **~146 MB** (incl. forkserver). **−357 MB (71%)**,
|
||||
verified live.
|
||||
- Fleet total: **2.57 GB → ~1.83 GB (−737 MB)**.
|
||||
|
||||
### Notes
|
||||
- `webhook` (external-HTTP egress; needs hard timeouts) and `canary` (manages its
|
||||
own repo) intentionally remain standalone for now.
|
||||
- `bus`, `api`/`web`, `profiler`, and `ttp` remain separate by design (broker /
|
||||
multiprocess servers / heavy resident state + sustained CPU).
|
||||
|
||||
## [1.0.0] - 2026
|
||||
|
||||
Initial 1.0 release. See tag `v1.0.0`.
|
||||
|
||||
[1.2.0]: https://git.resacachile.cl/anti/DECNET/compare/v1.1.1...v1.2.0
|
||||
[1.1.1]: https://git.resacachile.cl/anti/DECNET/compare/v1.1.0...v1.1.1
|
||||
[1.1.0]: https://git.resacachile.cl/anti/DECNET/compare/v1.0.0...v1.1.0
|
||||
[1.0.0]: https://git.resacachile.cl/anti/DECNET/releases/tag/v1.0.0
|
||||
17
COPYRIGHT
17
COPYRIGHT
@@ -1,17 +0,0 @@
|
||||
DECNET - Deception Network
|
||||
Copyright (C) 2026 Samuel Paschuan <samsam70000@gmail.com>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public
|
||||
License along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
141
LICENSE
141
LICENSE
@@ -1,5 +1,5 @@
|
||||
GNU AFFERO GENERAL PUBLIC LICENSE
|
||||
Version 3, 19 November 2007
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
@@ -7,15 +7,17 @@
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU Affero General Public License is a free, copyleft license for
|
||||
software and other kinds of works, specifically designed to ensure
|
||||
cooperation with the community in the case of network server software.
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
our General Public Licenses are intended to guarantee your freedom to
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users.
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
@@ -24,34 +26,44 @@ them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
Developers that use our General Public Licenses protect your rights
|
||||
with two steps: (1) assert copyright on the software, and (2) offer
|
||||
you this License which gives you legal permission to copy, distribute
|
||||
and/or modify the software.
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
A secondary benefit of defending all users' freedom is that
|
||||
improvements made in alternate versions of the program, if they
|
||||
receive widespread use, become available for other developers to
|
||||
incorporate. Many developers of free software are heartened and
|
||||
encouraged by the resulting cooperation. However, in the case of
|
||||
software used on network servers, this result may fail to come about.
|
||||
The GNU General Public License permits making a modified version and
|
||||
letting the public access it on a server without ever releasing its
|
||||
source code to the public.
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
The GNU Affero General Public License is designed specifically to
|
||||
ensure that, in such cases, the modified source code becomes available
|
||||
to the community. It requires the operator of a network server to
|
||||
provide the source code of the modified version running there to the
|
||||
users of that server. Therefore, public use of a modified version, on
|
||||
a publicly accessible server, gives the public access to the source
|
||||
code of the modified version.
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
An older license, called the Affero General Public License and
|
||||
published by Affero, was designed to accomplish similar goals. This is
|
||||
a different license, not a version of the Affero GPL, but Affero has
|
||||
released a new version of the Affero GPL which permits relicensing under
|
||||
this license.
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
@@ -60,7 +72,7 @@ modification follow.
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU Affero General Public License.
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
@@ -537,45 +549,35 @@ to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Remote Network Interaction; Use with the GNU General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, if you modify the
|
||||
Program, your modified version must prominently offer all users
|
||||
interacting with it remotely through a computer network (if your version
|
||||
supports such interaction) an opportunity to receive the Corresponding
|
||||
Source of your version by providing access to the Corresponding Source
|
||||
from a network server at no charge, through some standard or customary
|
||||
means of facilitating copying of software. This Corresponding Source
|
||||
shall include the Corresponding Source for any work covered by version 3
|
||||
of the GNU General Public License that is incorporated pursuant to the
|
||||
following paragraph.
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU General Public License into a single
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the work with which it is combined will remain governed by version
|
||||
3 of the GNU General Public License.
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU Affero General Public License from time to time. Such new versions
|
||||
will be similar in spirit to the present version, but may differ in detail to
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU Affero General
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU Affero General Public License, you may choose any version ever published
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU Affero General Public License can be used, that proxy's
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
@@ -633,29 +635,40 @@ the "copyright" line and a pointer to where the full notice is found.
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If your software can interact with users remotely through a computer
|
||||
network, you should also make sure that it provides a way for users to
|
||||
get its source. For example, if your program is a web application, its
|
||||
interface could display a "Source" link that leads users to an archive
|
||||
of the code. There are many ways you could offer source, and different
|
||||
solutions will be better for different programs; see section 13 for the
|
||||
specific requirements.
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU AGPL, see
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<https://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<https://www.gnu.org/licenses/why-not-lgpl.html>.
|
||||
|
||||
40
LICENSING.md
40
LICENSING.md
@@ -1,40 +0,0 @@
|
||||
# Licensing
|
||||
|
||||
DECNET is **dual-licensed open core**.
|
||||
|
||||
## Community (this repository)
|
||||
|
||||
DECNET core — everything in this repository — is licensed under the **GNU Affero
|
||||
General Public License v3.0 or later (AGPL-3.0-or-later)**. See [LICENSE](./LICENSE).
|
||||
|
||||
AGPL (not GPL) is deliberate: DECNET is a network-deployed honeypot platform, so
|
||||
the AGPL §13 network-use clause matters — anyone who offers DECNET to others over
|
||||
a network must make their source available. GPLv3 would leave that loophole open.
|
||||
|
||||
## Commercial / Professional
|
||||
|
||||
Because the DECNET Foundation holds copyright in the core, the core is **also
|
||||
available under a commercial license**. A commercial core license is what lets
|
||||
the proprietary **DECNET Professional** add-on (advanced honeypots, distributed
|
||||
separately) be combined and shipped with the core without triggering the AGPL's
|
||||
copyleft obligations.
|
||||
|
||||
DECNET Professional itself is closed source, licensed under the
|
||||
[DECNET Commercial EULA](https://github.com/DECNET-Foundation/decnet-professional),
|
||||
and is **not** part of this repository. The open-core build neither contains nor
|
||||
depends on it.
|
||||
|
||||
| Tier | Code | License |
|
||||
|--------------|----------------------------------------|----------------------------|
|
||||
| Community | this repo | AGPL-3.0-or-later |
|
||||
| Professional | `decnet/pro/` (private repo) | DECNET Commercial EULA |
|
||||
|
||||
To use DECNET core under terms other than the AGPL, or to obtain DECNET
|
||||
Professional, contact **licensing@decnet.cl**.
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions to the core are accepted under the AGPL. Because the project is
|
||||
dual-licensed, contributors must agree that their contributions may also be
|
||||
distributed under the commercial license (a CLA / DCO sign-off). Relicensing
|
||||
requires that the Foundation hold or be granted rights to all contributed code.
|
||||
261
Makefile
261
Makefile
@@ -1,261 +0,0 @@
|
||||
PYTEST := .311/bin/pytest
|
||||
FAIL_FAST ?= 1
|
||||
NO_CACHE ?= 0
|
||||
ARGS :=
|
||||
|
||||
# addopts in pyproject.toml already provides -v -q -x -n 4 --dist load.
|
||||
# Unit suites inherit that; special suites clear it with --override-ini.
|
||||
UNIT_FLAGS := --timeout=30 --timeout-method=thread
|
||||
SEQ_FLAGS := --override-ini="addopts=-v -x" -n logical --timeout=120 --timeout-method=thread
|
||||
FUZZ_FLAGS := --override-ini="addopts=-v -x" -n logical -m fuzz \
|
||||
--ignore=tests/api/test_schemathesis.py \
|
||||
--ignore=tests/api/test_schemathesis_agent.py \
|
||||
--ignore=tests/api/test_schemathesis_swarm.py \
|
||||
--ignore=tests/api/test_schemathesis_ttp.py
|
||||
SCHEMA_QUICK ?= 0
|
||||
SCHEMA_FLAGS := --override-ini="addopts=-v -x" -n 4 -m fuzz --timeout=600 --timeout-method=thread
|
||||
BENCH_FLAGS := --override-ini="addopts=-v" -p no:xdist --benchmark-only -m bench
|
||||
|
||||
# ── Unit suites (xdist, 30s timeout) ─────────────────────────────────────────
|
||||
|
||||
.PHONY: test-core
|
||||
test-core:
|
||||
$(PYTEST) tests/core tests/config tests/factories tests/fixtures $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-web
|
||||
test-web:
|
||||
$(PYTEST) tests/web tests/services $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-db
|
||||
test-db:
|
||||
$(PYTEST) tests/db tests/vectorstore $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-bus
|
||||
test-bus:
|
||||
$(PYTEST) tests/bus tests/logging tests/telemetry $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-ttp
|
||||
test-ttp:
|
||||
$(PYTEST) tests/ttp $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-intel
|
||||
test-intel:
|
||||
$(PYTEST) tests/intel tests/asn tests/geoip $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-analysis
|
||||
test-analysis:
|
||||
$(PYTEST) tests/clustering tests/correlation $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-infra
|
||||
test-infra:
|
||||
$(PYTEST) tests/agent tests/collector tests/sniffer tests/profiler $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-fleet
|
||||
test-fleet:
|
||||
$(PYTEST) tests/fleet tests/swarm tests/topology tests/orchestrator tests/deploy tests/updater $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-cli
|
||||
test-cli:
|
||||
$(PYTEST) tests/cli tests/engine tests/mutator tests/realism $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-features
|
||||
test-features:
|
||||
$(PYTEST) tests/canary tests/artifacts tests/webhook tests/decky_io tests/prober $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
# ── Go and React suites ───────────────────────────────────────────────────────
|
||||
|
||||
_GO_MODULES := \
|
||||
decnet/templates/_caddy_modules/decnetfp \
|
||||
decnet/templates/http/_caddy_modules/decnetfp \
|
||||
decnet/templates/https/_caddy_modules/decnetfp
|
||||
|
||||
.PHONY: test-go
|
||||
test-go:
|
||||
@failed=""; \
|
||||
for mod in $(_GO_MODULES); do \
|
||||
echo "=== go test: $$mod ==="; \
|
||||
if (cd "$$mod" && go test ./...); then \
|
||||
echo "[PASS] $$mod"; \
|
||||
else \
|
||||
echo "[FAIL] $$mod"; \
|
||||
failed="$$failed $$mod"; \
|
||||
if [ "$(FAIL_FAST)" = "1" ]; then exit 1; fi; \
|
||||
fi; \
|
||||
done; \
|
||||
[ -z "$$failed" ]
|
||||
|
||||
.PHONY: test-react
|
||||
test-react:
|
||||
cd decnet_web && npm run test:run $(ARGS)
|
||||
|
||||
# ── Special suites (sequential, longer timeout) ───────────────────────────────
|
||||
|
||||
.PHONY: test-live
|
||||
test-live:
|
||||
$(PYTEST) tests/live -m live $(SEQ_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-api
|
||||
test-api:
|
||||
$(PYTEST) tests/api $(SEQ_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-stress
|
||||
test-stress:
|
||||
$(PYTEST) tests/stress -m stress $(SEQ_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-service
|
||||
test-service:
|
||||
$(PYTEST) tests/service_testing $(SEQ_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-fuzz
|
||||
test-fuzz:
|
||||
$(PYTEST) $(FUZZ_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-schema
|
||||
test-schema:
|
||||
SCHEMA_QUICK=$(SCHEMA_QUICK) $(PYTEST) \
|
||||
tests/api/test_schemathesis.py \
|
||||
tests/api/test_schemathesis_agent.py \
|
||||
tests/api/test_schemathesis_swarm.py \
|
||||
tests/api/test_schemathesis_ttp.py \
|
||||
$(SCHEMA_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-bench
|
||||
test-bench:
|
||||
$(PYTEST) tests/perf $(BENCH_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-docker
|
||||
test-docker:
|
||||
DECNET_LIVE_DOCKER=1 $(PYTEST) tests/docker -m docker $(SEQ_FLAGS) $(ARGS)
|
||||
|
||||
# ── Static analysis ───────────────────────────────────────────────────────────
|
||||
|
||||
.PHONY: test-mypy
|
||||
test-mypy:
|
||||
.311/bin/mypy decnet --ignore-missing-imports --no-error-summary
|
||||
|
||||
.PHONY: test-bandit
|
||||
test-bandit:
|
||||
.311/bin/bandit -r decnet -c pyproject.toml
|
||||
|
||||
.PHONY: test-vulture
|
||||
test-vulture:
|
||||
.311/bin/vulture decnet --min-confidence 80
|
||||
|
||||
.PHONY: test-pip-audit
|
||||
test-pip-audit:
|
||||
.311/bin/pip-audit
|
||||
|
||||
# ── Composite: all suites ─────────────────────────────────────────────────────
|
||||
|
||||
_ALL_SUITES := core web db bus ttp intel analysis infra fleet cli features \
|
||||
go react \
|
||||
live api schema stress service fuzz bench docker \
|
||||
mypy bandit vulture pip-audit
|
||||
|
||||
.PHONY: test-all test
|
||||
test-all test:
|
||||
@failed=""; \
|
||||
for suite in $(_ALL_SUITES); do \
|
||||
echo ""; \
|
||||
echo "══════════════════════════ $$suite ══════════════════════════"; \
|
||||
if $(MAKE) --no-print-directory test-$$suite ARGS="$(ARGS)"; then \
|
||||
echo "[PASS] $$suite"; \
|
||||
else \
|
||||
echo "[FAIL] $$suite"; \
|
||||
failed="$$failed $$suite"; \
|
||||
if [ "$(FAIL_FAST)" = "1" ]; then \
|
||||
echo "Stopping at first failure. Use FAIL_FAST=0 to run all suites."; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
fi; \
|
||||
done; \
|
||||
if [ -n "$$failed" ]; then \
|
||||
echo ""; \
|
||||
echo "Failed:$$failed"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
echo ""; \
|
||||
echo "All suites passed."
|
||||
|
||||
# ── Decky image pre-build ─────────────────────────────────────────────────────
|
||||
|
||||
_DECKY_TEMPLATES := \
|
||||
conpot docker_api elasticsearch ftp http https imap k8s ldap \
|
||||
llmnr mongodb mqtt mssql mysql pop3 postgres rdp redis sip smb smtp \
|
||||
sniffer snmp ssh telnet tftp vnc
|
||||
|
||||
.PHONY: build-all
|
||||
build-all:
|
||||
@failed=""; \
|
||||
for svc in $(_DECKY_TEMPLATES); do \
|
||||
echo ""; \
|
||||
echo "══════════════════════════ $$svc ══════════════════════════"; \
|
||||
_nc=""; \
|
||||
if [ "$(NO_CACHE)" = "1" ]; then _nc="--no-cache"; fi; \
|
||||
if DOCKER_BUILDKIT=1 docker build $$_nc \
|
||||
-t decnet/$$svc:latest \
|
||||
decnet/templates/$$svc; then \
|
||||
echo "[BUILT] $$svc"; \
|
||||
else \
|
||||
echo "[FAIL] $$svc"; \
|
||||
failed="$$failed $$svc"; \
|
||||
if [ "$(FAIL_FAST)" = "1" ]; then \
|
||||
echo "Stopping at first failure. Use FAIL_FAST=0 to build all."; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
fi; \
|
||||
done; \
|
||||
if [ -n "$$failed" ]; then \
|
||||
echo ""; \
|
||||
echo "Failed:$$failed"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
echo ""; \
|
||||
echo "All decky images built."
|
||||
|
||||
.PHONY: help
|
||||
help:
|
||||
@echo "Unit suites (xdist, 30s timeout):"
|
||||
@echo " make test-core tests/core + config + factories + fixtures"
|
||||
@echo " make test-web tests/web + services"
|
||||
@echo " make test-db tests/db + vectorstore"
|
||||
@echo " make test-bus tests/bus + logging + telemetry"
|
||||
@echo " make test-ttp tests/ttp"
|
||||
@echo " make test-intel tests/intel + asn + geoip"
|
||||
@echo " make test-analysis tests/clustering + correlation"
|
||||
@echo " make test-infra tests/agent + collector + sniffer + profiler"
|
||||
@echo " make test-fleet tests/fleet + swarm + topology + orchestrator + deploy + updater"
|
||||
@echo " make test-cli tests/cli + engine + mutator + realism"
|
||||
@echo " make test-features tests/canary + artifacts + webhook + decky_io + prober"
|
||||
@echo ""
|
||||
@echo "Go / React suites:"
|
||||
@echo " make test-go go test ./... in each Caddy module variant"
|
||||
@echo " make test-react vitest run in decnet_web"
|
||||
@echo ""
|
||||
@echo "Special suites (sequential, 120s timeout):"
|
||||
@echo " make test-live tests/live"
|
||||
@echo " make test-api tests/api (schemathesis)"
|
||||
@echo " make test-stress tests/stress"
|
||||
@echo " make test-service tests/service_testing"
|
||||
@echo " make test-schema schemathesis contract tests (-m fuzz, xdist logical)"
|
||||
@echo " make test-schema SCHEMA_QUICK=1 same, capped at 100 examples per test"
|
||||
@echo " make test-fuzz hypothesis fuzz (all normal dirs, -m fuzz, skips schemathesis files)"
|
||||
@echo " make test-bench tests/perf"
|
||||
@echo " make test-docker tests/docker (needs DECNET_LIVE_DOCKER=1)"
|
||||
@echo ""
|
||||
@echo "Static analysis:"
|
||||
@echo " make test-mypy mypy type check on decnet/"
|
||||
@echo " make test-bandit bandit security scan on decnet/"
|
||||
@echo " make test-vulture vulture dead code scan (>=80% confidence)"
|
||||
@echo " make test-pip-audit pip-audit dependency vulnerability scan"
|
||||
@echo ""
|
||||
@echo "Composites:"
|
||||
@echo " make test-all ALL suites (unit + go + react + live + api + schema + fuzz + bench + stress + docker + static analysis)"
|
||||
@echo " make test-all FAIL_FAST=0 same, report all failures instead of stopping"
|
||||
@echo ""
|
||||
@echo "Passthrough: make test-web ARGS='--lf -s'"
|
||||
@echo ""
|
||||
@echo "Decky images:"
|
||||
@echo " make build-all build decnet/<svc>:latest for all 27 decky templates"
|
||||
@echo " make build-all NO_CACHE=1 same, bypassing Docker layer cache"
|
||||
@echo " make build-all FAIL_FAST=0 same, continue past failures"
|
||||
566
SIGNAL_CAPTURE_AUDIT.md
Normal file
566
SIGNAL_CAPTURE_AUDIT.md
Normal file
@@ -0,0 +1,566 @@
|
||||
# DECNET Capture Pipeline — Attacker-Profiling Signal Audit
|
||||
|
||||
**Date**: 2026-04-22
|
||||
**Scope**: v1 capture readiness for post-v1 profiler extraction
|
||||
**Methodology**: End-to-end verification (emission → transport → storage) for each signal against active code paths.
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
**Capture Status by Category**:
|
||||
|
||||
| Category | Captured | Partial | Not Captured | n/a |
|
||||
|----------|----------|---------|--------------|-----|
|
||||
| Session Environment | 0 | 1 | 3 | 0 |
|
||||
| Keystroke/Human | 0 | 2 | 6 | 2 |
|
||||
| SSH Transport | 2 | 2 | 2 | 0 |
|
||||
| Network/TCP | 3 | 2 | 5 | 0 |
|
||||
| TLS/L7 | 2 | 2 | 1 | 0 |
|
||||
| Aggregated/Derived | 0 | 0 | 5 | 0 |
|
||||
| **TOTAL** | **7** | **9** | **22** | **2** |
|
||||
|
||||
**Critical Pre-v1 Gaps** (blockers if signals are roadmap-committed):
|
||||
|
||||
1. **KEX algorithm ordering** — HASSH hash is stored, but raw `kex_algorithms` string is only emitted to syslog, not persisted to DB. Future extractor must parse syslog archives.
|
||||
2. **Per-keystroke timing** — Asciinema v2 `"i"` events with `t` timestamps are written to day-shard files on disk, but no database ingestion. Requires filesystem polling + parsing path.
|
||||
3. **TCP options order** — Captured in PCAP + sniffer logs (`options_sig`), but `options_sig` is a rolled-up signature string, not the raw per-connection sequence.
|
||||
4. **Terminal size (COLS×ROWS)** — Not captured from pty-req at all; would require SSH protocol-level interception.
|
||||
5. **SSH client version** — Server-side only sees RFC 4253 banner; full version string would require TLS cert inspection or prober modification.
|
||||
|
||||
**Biggest ROI capture improvements** (cheap, high-value):
|
||||
|
||||
1. Add `ssh_client_banner` column to Attacker table — capture SSH-2.0-* string from pty-req.
|
||||
2. Ingest asciinema keystroke timing into new `SessionProfile` table (v2 roadmap already designs this).
|
||||
3. Store raw KEX algorithm lists in `AttackerBehavior.kex_order_raw` (MEDIUMTEXT) instead of relying on syslog dedup.
|
||||
|
||||
---
|
||||
|
||||
## Per-Signal Classification
|
||||
|
||||
### Per-Session Environment (SessionProfile candidates)
|
||||
|
||||
#### TERM environment variable
|
||||
- **Status**: `partial`
|
||||
- **Where**: SSH server can read TERM from pty-req; emitted in syslog by `emit_capture.py` if implemented.
|
||||
- **Current path**: Not found in active code path. Check `decnet/templates/ssh/emit_capture.py` or syslog bridge.
|
||||
- **Missing**: Database column in a `SessionProfile` table; no structured ingestion.
|
||||
- **Cheap fix**: Modify SSH syslog bridge to emit `session_event` with `term=<value>`. Create `SessionProfile` table with `session_term` TEXT column.
|
||||
- **Priority**: V2 backlog (nice-to-have for human vs. automation, low discriminative power).
|
||||
|
||||
#### LANG / LC_ALL
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Server-side locale is baked into container image, not attacker-controlled. Attacker's client locale is not visible over SSH.
|
||||
- **Priority**: defer (non-capturable from server vantage point).
|
||||
|
||||
#### SSH client version string (full SSH-2.0-OpenSSH_9.2p1…)
|
||||
- **Status**: `partial`
|
||||
- **Where**: RFC 4253 banner string is transmitted in plaintext before encryption. Sniffer could capture it from TCP stream; prober `hassh.py` captures server banner (lines 58–101), not client.
|
||||
- **Missing**: Client-side banner capture. Sniffer would need TCP stream reconstruction to pluck the SSH banner from the raw payload.
|
||||
- **Cheap fix**: Extend sniffer to parse SSH banners from TCP stream (before TLS/encryption); emit `ssh_client_banner` event. Store in Attacker.`ssh_client_banners` (JSON list).
|
||||
- **Priority**: v1 blocker if client-profiling is committed. Currently partial via TLS fingerprint fallback.
|
||||
|
||||
#### Terminal size (COLS × ROWS)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: SSH pty-req extension carries `terminal mode` (COLS, ROWS, speeds); server-side sshd parses this but does not log it by default. Would require patching sshd or intercepting at the protocol layer.
|
||||
- **Missing**: No access to pty-req payload without protocol-level instrumentation.
|
||||
- **Cheap fix**: Patch SSH entrypoint to log pty-req to syslog before accepting the request (requires custom OpenSSH build).
|
||||
- **Priority**: V2 backlog (interesting for typing-space reconstruction, but not blocky).
|
||||
|
||||
---
|
||||
|
||||
### Per-Session, Keyboard/Human (SessionProfile candidates)
|
||||
|
||||
#### Per-keystroke timing (t in asciinema "i" events)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Sessrec pipeline (`decnet/templates/ssh/sessrec/`) writes asciinema v2 day-shards with per-keystroke `"i"` (input) events carrying `t` (timestamp in seconds since session start). Files on disk: `/var/lib/decnet/session_recordings/<decky>/<date>.json` (or similar).
|
||||
- **Missing**: No ingestion into database. Extractors must read asciinema files from filesystem and parse the `"i"` event stream post-hoc.
|
||||
- **Cheap fix**: Ingest keystroke timing stream into new `SessionProfile` table (design already in DEVELOPMENT_V2.md). Add job to parse day-shard files on rotation and compute IKI moments, burst ratio, etc.
|
||||
- **Priority**: v1 blocker if keystroke dynamics is roadmap-committed. Data exists but not queryable.
|
||||
|
||||
#### Control-character stream (backspace, ^W, ^U, ^C, ^D, arrows, tab)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Asciinema captures every keystroke as UTF-8/control byte in `"i"` events. Raw byte sequence is preserved.
|
||||
- **Missing**: Same as above — files on disk, no DB ingestion. Future extractor can parse control bytes from the `"data"` field of each `"i"` event.
|
||||
- **Cheap fix**: Same as keystroke timing — ingest asciinema events and compute `kd_ctrl_*` rates in SessionProfile.
|
||||
- **Priority**: v2 (depends on SessionProfile schema).
|
||||
|
||||
#### Inter-command think time (prompt-return to next-command-start gap)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires prompt boundary detection in the asciinema stream (heuristic: line ending in `$` or `#` + pause > 100ms). No active code marks prompts.
|
||||
- **Missing**: Prompt-boundary markers in asciinema. Would require ML or regex-based post-processing.
|
||||
- **Cheap fix**: Add prompt-regex configuration + marker injection during sessrec playback, or post-hoc analysis over asciinema.
|
||||
- **Priority**: V2 (interesting but requires heuristic or attacker-side annotation).
|
||||
|
||||
#### Pause before sensitive commands
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires command-boundary detection (typing a full command, then detecting gap before Enter). Asciinema captures this timing, but no code marks command boundaries.
|
||||
- **Missing**: Command-line parsing + gap detection logic.
|
||||
- **Cheap fix**: Off-line analysis: parse `"i"` events, detect Enter (`\r`), measure gap before Enter. Correlate with command content from `"o"` (output) events.
|
||||
- **Priority**: V2 backlog (post-extraction analysis; interesting for psychological profiling).
|
||||
|
||||
#### Command n-grams
|
||||
- **Status**: `partial`
|
||||
- **Where**: SSH service logs individual commands to syslog when pty input is detected. Attacker.`commands` JSON array stores seen commands (but coarse-grained per service/decky, not per-session).
|
||||
- **Missing**: Per-session, per-command sequencing. No n-gram bigrams/trigrams computed.
|
||||
- **Cheap fix**: Parse asciinema `"i"` + `"o"` stream to extract full command lines, store as JSON list in SessionProfile.`cmd_sequence` or new `SessionCommand` table.
|
||||
- **Priority**: V2 (foundation for command chaining fingerprint).
|
||||
|
||||
#### Flag preferences (ls -la vs ls -al, ps -ef vs ps aux)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Asciinema records the **typed** command line exactly, but no code parses flag ordering or normalizes commands for pattern comparison.
|
||||
- **Missing**: Canonical command parsing + flag-order extraction.
|
||||
- **Cheap fix**: Off-line: regex-parse commands from asciinema, extract flag sequences, compute n-grams over flag positions.
|
||||
- **Priority**: V2 (cheap post-processing, good human-vs-tool separator).
|
||||
|
||||
#### Typo patterns (suod, sl)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Asciinema records corrected command line after backspacing, not the raw keystrokes with typos visible.
|
||||
- **Example**: typing `suod<backspace>` then `ddo<backspace>` then `o` shows as `sudo` in `"o"` output; the intermediate typos are **visible** in the `"i"` event stream but require careful keystroke-by-keystroke parsing.
|
||||
- **Missing**: Raw keystroke stream parsing to detect backspace/correction patterns.
|
||||
- **Cheap fix**: Parse `"i"` events, reconstruct line state keystroke-by-keystroke, log (typed_text, final_text) pairs to detect corrections.
|
||||
- **Priority**: V2 (unique human fingerprint, but requires manual asciinema parsing).
|
||||
|
||||
#### Editor choice (vi/vim/nano/ed)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Command launch (`vi`, `nano`, `ed`) is visible in asciinema `"i"` + `"o"` stream and captured in Attacker.`commands`.
|
||||
- **Missing**: No aggregation of editor invocations or time-in-editor statistics.
|
||||
- **Cheap fix**: Post-process commands, count editor launches, extract editor type. Could add to AttackerBehavior.`preferred_editor` or new SessionProfile.`editor_used`.
|
||||
- **Priority**: V2 (behavioral signal, low priority).
|
||||
|
||||
#### Shell history usage (!!,!$, ^old^new, fc)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Command input stream captures the actual invocation (if attacker types `!!`, it's visible in `"i"`). Output `"o"` shows the expanded command.
|
||||
- **Missing**: No parsing of history expansion syntax; requires post-processing to identify `!` / `^` patterns.
|
||||
- **Cheap fix**: Regex-scan asciinema input for shell history operators; count occurrences.
|
||||
- **Priority**: V2 (interesting tool-chain signal, but low volume).
|
||||
|
||||
---
|
||||
|
||||
### Per-Attacker, SSH Transport (AttackerBehavior candidates)
|
||||
|
||||
#### HASSH / HASSHServer
|
||||
- **Status**: `captured`
|
||||
- **Where**: Prober (`decnet/prober/hassh.py`) computes HASSHServer fingerprint; stored as `Attacker.fingerprints` JSON list (generic bounty store). Also emitted to syslog by prober worker.
|
||||
- **Note**: Roadmap says `[x]` (captured); verified in code at lines 244–252 of `hassh.py`.
|
||||
- **Storage**: `Attacker.fingerprints` (JSON list of `{type, value, ...}` dicts); not per-attacker-behavior, but queryable.
|
||||
- **Priority**: ✓ captured; v2: consider normalizing to `AttackerBehavior.hassh_server` for faster lookup.
|
||||
|
||||
#### KEX algorithm preference ORDER (beyond HASSH hash)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Sniffer logs raw `kex_algorithms`, `encryption_s2c`, `mac_s2c`, `compression_s2c` strings to syslog in `tls_session` and `tcp_syn_fingerprint` events (fingerprint.py lines 240–252).
|
||||
- **Missing**: Stored in **syslog only**, not in DB. Attacker table has `fingerprints` (bounty store) but no dedicated `kex_order_raw` column.
|
||||
- **Path to recovery**: Read syslog archives and parse `kex_algorithms` field. But this is not queryable at scale.
|
||||
- **Cheap fix**: Add `Attacker.kex_order_raw` (MEDIUMTEXT, JSON string list) and `kd_kex_order_hash` (similar to digraph simhash). Populate during sniffer event ingestion.
|
||||
- **Priority**: v1 blocker if KEX ordering is committed to roadmap (currently only hash stored, raw data must be re-parsed from syslog).
|
||||
|
||||
#### Public key comment field
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: SSH key comment is part of the OpenSSH wire format (only transmitted if key auth is used). Server-side sshd does not log it by default; would require PAM/auth hook instrumentation.
|
||||
- **Missing**: No interception of public key authentication payloads.
|
||||
- **Cheap fix**: Patch SSH server to emit auth_pubkey event with key comment extracted from wire format. Or use `net.ssh` library instrumentation.
|
||||
- **Priority**: V2 backlog (valuable for key reuse fingerprinting, but rare).
|
||||
|
||||
#### Private key type advertised (Ed25519 / RSA / ECDSA)
|
||||
- **Status**: `partial`
|
||||
- **Where**: SSH transport carries key type in the public key authentication message. Sniffer cannot decode this (traffic is encrypted after ServerHello). Server-side sshd doesn't log it.
|
||||
- **Missing**: Requires either passive PCAP of SSH-TRANSPORT (not available; encrypted) or server-side auth hook.
|
||||
- **Cheap fix**: Patch sshd to emit `auth_pubkey_type` event during authentication.
|
||||
- **Priority**: V2 (interesting but lower signal than key comment).
|
||||
|
||||
#### Agent forwarding requested?
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Agent forwarding is negotiated via SSH_MSG_SERVICE_REQUEST → ssh-userauth → "ssh-agent@openssh.com" extension. Encrypted after KEX.
|
||||
- **Missing**: Would require decrypting SSH transport or instrumenting sshd auth hook.
|
||||
- **Cheap fix**: Sshd can detect `SSH_AUTH_SOCK` or SSH_AGENT_FWD service request; add to syslog.
|
||||
- **Priority**: V2 (useful for lateral-movement detection).
|
||||
|
||||
#### Channel multiplexing pattern
|
||||
- **Status**: `partial`
|
||||
- **Where**: SSH service logs each command separately. Channel open/close events could be tracked, but no code currently does.
|
||||
- **Missing**: Per-session channel state machine (open channels, their types, lifetime).
|
||||
- **Cheap fix**: Instrument sshd or use SSH_MSG_CHANNEL_OPEN events in syslog to track simultaneous channels.
|
||||
- **Priority**: V2 (rare; most attackers use sequential commands).
|
||||
|
||||
#### SSH_CLIENT / SSH_CONNECTION environment variables
|
||||
- **Status**: `captured`
|
||||
- **Where**: SSH server **always** sets `SSH_CLIENT` and `SSH_CONNECTION` in the child shell. Server-side user code (bashrc, commands) can read them. If attacker runs `echo $SSH_CLIENT`, it's visible in asciinema output.
|
||||
- **Missing**: No **automatic** logging of these vars. Requires parsing asciinema for intentional queries or patching sshd to emit them.
|
||||
- **Cheap fix**: Patch SSH PAM or auth hook to log `SSH_CLIENT` on successful auth. Or parse asciinema for `echo $SSH_*` commands.
|
||||
- **Priority**: V2 (low value; mostly redundant with src_ip already in logs).
|
||||
|
||||
---
|
||||
|
||||
### Per-Attacker, Network/Transport (AttackerBehavior candidates)
|
||||
|
||||
#### TCP timestamp clock skew (Kohno 2005)
|
||||
- **Status**: `partial`
|
||||
- **Where**: PCAP contains TCP timestamps (if present). Sniffer code extracts MSS, window size, options (fingerprint.py line 77–94). TCP options include timestamp flag (`has_timestamps`).
|
||||
- **Missing**: Raw timestamp values (`opt_value` for "Timestamp" in scapy) are NOT extracted. Only boolean `has_timestamps` flag is stored. To compute clock skew, need timestamp values across multiple packets.
|
||||
- **Path to recovery**: Raw PCAP analysis (if PCAPs are retained on disk). Each TCP packet has `[TCP option: Timestamp x, y]` which can be parsed post-hoc.
|
||||
- **Cheap fix**: Extend sniffer to extract timestamp sequence numbers and RTT deltas. Store as per-flow timing summary in `tcp_flow_timing` event (which already captures flow metrics).
|
||||
- **Priority**: V2 (requires PCAP or extended sniffer capture; useful for OS fingerprinting).
|
||||
|
||||
#### TCP ISN generator characteristics
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: ISN is visible in PCAP (TCP seq number on SYN). Sniffer code tracks flow seqs for retransmit detection (line 850) but does not extract the initial SYN seq across multiple connections to analyze ISN patterns.
|
||||
- **Missing**: No per-connection ISN logging. Would need to roll up ISN sequences across multiple SYNs to the same port.
|
||||
- **Cheap fix**: On every SYN, log `syn_seq` in `tcp_syn_fingerprint` event. Post-hoc analysis can compute randomness metrics.
|
||||
- **Priority**: V2 backlog (weak signal; ISN randomization is standard on modern OS).
|
||||
|
||||
#### TCP options ordering in SYN
|
||||
- **Status**: `partial`
|
||||
- **Where**: Sniffer extracts `options_sig` (line 87) via `_extract_options_order()` from scapy TCP options. This is a **signature string** (e.g., `"MSS,WScale,SAckOK,Timestamp"`).
|
||||
- **Missing**: The signature is **aggregated**; we don't store the raw per-packet ordering. Also, `options_sig` is deduplicated in logs (only one event per unique signature per dedup window).
|
||||
- **Path to recovery**: Raw PCAP analysis or re-parsing sniffer logs to extract the signature. But the signature is a good enough feature for OS fingerprinting.
|
||||
- **Cheap fix**: Store `tcp_fingerprint` JSON in AttackerBehavior with raw options list (not just signature). Current schema (models.py line 174–177) only stores aggregated `{window, wscale, mss, options_sig}`.
|
||||
- **Priority**: v1 improvement (low effort, already have options_sig; add raw list).
|
||||
|
||||
#### Initial congestion window ramp-up
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires detailed TCP state machine tracking (SYN, SYN-ACK, ACK sequence with packet sizes). Sniffer tracks `packets` count and `bytes` total per flow (line 844–868), but not per-packet sequence or ACK-clock dynamics.
|
||||
- **Missing**: Per-packet payload sizes and ACK timing.
|
||||
- **Cheap fix**: Extend `tcp_flow_timing` event to include per-packet sizes (as JSON list) or CWND estimation from ACK patterns.
|
||||
- **Priority**: V2 backlog (very niche; useful for Reno vs. Cubic vs. BBR detection, but rare in honeypot context).
|
||||
|
||||
#### Retransmit timing and backoff
|
||||
- **Status**: `captured`
|
||||
- **Where**: Sniffer tracks `retransmits` count per flow (lines 873–877, 922). Emitted in `tcp_flow_timing` event. No **timing** of retransmits, only count.
|
||||
- **Missing**: Timing deltas between retransmit pairs (RTO, exponential backoff pattern).
|
||||
- **Path to recovery**: Raw PCAP; sequence numbers in `tcp_flow_timing` are not logged.
|
||||
- **Cheap fix**: Extend event to include retransmit timing deltas (list of RTOs).
|
||||
- **Priority**: V2 (useful for network condition inference; low value on honeypots).
|
||||
|
||||
#### MTU / path-MTU discovery behavior
|
||||
- **Status**: `partial`
|
||||
- **Where**: Sniffer tracks per-flow byte counts (line 868); can infer effective MSS from packet sizes. TCP fingerprint includes extracted MSS (line 77–94, emitted in `tcp_syn_fingerprint`).
|
||||
- **Missing**: No multi-flow MTU tracking or ICMP fragmentation-needed response detection. Would require ICMP processing.
|
||||
- **Cheap fix**: Log ICMP unreachable (frag needed) events separately; correlate with TCP flows to infer PMTUD behavior.
|
||||
- **Priority**: V2 backlog (VPN detection is interesting but niche).
|
||||
|
||||
#### Packet pacing (microsecond-resolution egress timing)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Sniffer computes mean/min/max inter-arrival time in milliseconds (lines 904–906), not microseconds. Modern pacing requires sub-millisecond precision.
|
||||
- **Missing**: Sniffer uses `time.monotonic()` (typically millisecond granularity on Linux); would need OS-level timing hooks or PCAP with hardware timestamps.
|
||||
- **Cheap fix**: Upgrade sniffer to use PCAP timestamps (pcap.ts_resolution) if available; log microsecond-resolution inter-packet gaps.
|
||||
- **Priority**: V2 backlog (requires infrastructure upgrade; marginal value on honeypots).
|
||||
|
||||
#### Window scaling multipliers
|
||||
- **Status**: `captured`
|
||||
- **Where**: Sniffer extracts `wscale` from TCP options (line 80); stored in `tcp_fingerprint` JSON and emitted in `tcp_syn_fingerprint` event.
|
||||
- **Storage**: AttackerBehavior.`tcp_fingerprint` (JSON: `{window, wscale, mss, ...}`); queryable.
|
||||
- **Priority**: ✓ captured (sufficient for OS fingerprinting and congestion algorithm inference).
|
||||
|
||||
#### ECN negotiation
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: ECN is signaled via TCP flags (CWR, ECE) and the SYN's TCP options. Scapy's TCP layer does not expose ECN flags in the options extraction.
|
||||
- **Missing**: No code to parse ECN negotiation from TCP header.
|
||||
- **Cheap fix**: Extend TCP fingerprint extraction to check for ECN flag bits.
|
||||
- **Priority**: V2 backlog (rarely used; low value).
|
||||
|
||||
---
|
||||
|
||||
### Per-Attacker, L7 (TLS/HTTP)
|
||||
|
||||
#### TLS fingerprint (JA3/JA4)
|
||||
- **Status**: `captured`
|
||||
- **Where**: Sniffer fingerprint engine computes JA3/JA3S/JA4/JA4S (lines 565–662); emitted in syslog and stored in `Attacker.fingerprints` (bounty store).
|
||||
- **Storage**: Logs are queryable; fingerprints stored as JSON in bounty table (generic).
|
||||
- **Roadmap**: `[x]` JA3/JA3S, `[x]` JA4+. Verified in code.
|
||||
- **Priority**: ✓ captured (good).
|
||||
|
||||
#### TLS session resumption behavior
|
||||
- **Status**: `captured`
|
||||
- **Where**: Sniffer extracts resumption mechanisms (session_ticket, PSK, early_data, session_id) in `_session_resumption_info()` (lines 675–689). Emitted in `tls_client_hello` event.
|
||||
- **Storage**: Logged to syslog; `Attacker.fingerprints` stores resumption=`[mechanism list]`.
|
||||
- **Priority**: ✓ captured (good).
|
||||
|
||||
#### HTTP/2 SETTINGS frame ordering + values
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: HTTP/2 is encrypted (after TLS handshake). Sniffer cannot see plaintext SETTINGS frames.
|
||||
- **Missing**: Would require decryption (not viable passively) or attacker-side TLS instrumentation.
|
||||
- **Cheap fix**: Instrument HTTP/2 services (h2c, HTTP/2 over plain TCP on rare deployments) or use TLS key log for offline analysis.
|
||||
- **Priority**: defer (not capturable from passive vantage point).
|
||||
|
||||
#### HTTP/2 stream prioritization
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Encrypted in TLS.
|
||||
- **Missing**: Same as above.
|
||||
- **Priority**: defer (not capturable).
|
||||
|
||||
#### HTTP header ordering
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Inside encrypted TLS. Sniffer cannot see plaintext HTTP headers.
|
||||
- **Missing**: Would require server-side HTTP request logging (not implemented).
|
||||
- **Cheap fix**: Instrument HTTP service to log raw header order in syslog.
|
||||
- **Priority**: V2 (useful for bot/tool detection, but requires service-level capture).
|
||||
|
||||
#### Cookie handling behavior (expiry, domain scope)
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Encrypted TLS + requires HTTP state machine tracking (Set-Cookie responses vs. Cookie requests).
|
||||
- **Missing**: Would need server-side HTTP middleware or browser instrumentation.
|
||||
- **Cheap fix**: Add cookie jar logging to HTTP service (track which attacker cookies were accepted, rejected, resent).
|
||||
- **Priority**: V2 (behavioral signal; interesting but niche).
|
||||
|
||||
---
|
||||
|
||||
### Per-Attacker, Aggregated/Derived (would live in new `AttackerAggregate` table)
|
||||
|
||||
#### Time-of-day activity distribution (chronotyping)
|
||||
- **Status**: `partial`
|
||||
- **Where**: Log entries have `timestamp` (datetime). All events are timestamped. Can compute hour-of-day histogram post-hoc.
|
||||
- **Missing**: No aggregation table or computed features. Would live in new AttackerAggregate.
|
||||
- **Cheap fix**: Batch job: group events by attacker + hour-of-day, compute distribution histogram. Store as JSON or new table.
|
||||
- **Priority**: V2 (simple aggregation; good for clustering).
|
||||
|
||||
#### Session duration distribution
|
||||
- **Status**: `partial`
|
||||
- **Where**: SessionProfile schema (DEVELOPMENT_V2.md) includes `session_duration_s`. Asciinema files are per-decky-per-day, so duration can be computed.
|
||||
- **Missing**: No SessionProfile table yet; no aggregation of durations across sessions.
|
||||
- **Cheap fix**: Implement SessionProfile table + compute per-attacker duration histogram in AttackerAggregate.
|
||||
- **Priority**: V2 (depends on SessionProfile; good for behavioral clustering).
|
||||
|
||||
#### Recon-to-action ratio
|
||||
- **Status**: `partial`
|
||||
- **Where**: Profiler already computes recon vs. exfil phase sequencing (behavioral.py lines 52–62, 188–191). Stored in `AttackerBehavior.phase_sequence` (JSON: `{recon_end, exfil_start, latency}`).
|
||||
- **Missing**: No per-attacker ratio column in AttackerAggregate. Would be simple division: `exfil_events / recon_events`.
|
||||
- **Cheap fix**: Compute ratio in profiler job; store in new AttackerAggregate or as extension to AttackerBehavior.
|
||||
- **Priority**: V2 (low effort; useful for threat level scoring).
|
||||
|
||||
#### Lateral movement style
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires graph traversal (attacker hopping between deckies). Correlation engine (correlation/engine.py) should track this, but no explicit "lateral movement style" feature (sequential vs. parallel, target selection heuristic).
|
||||
- **Missing**: No code analyzing lateral movement pattern (which deckies were touched, in what order, dwell time per decky).
|
||||
- **Cheap fix**: Extend CorrelationEngine to build per-attacker decky traversal graph; compute metrics (average dwell time, fan-out ratio, revisit frequency).
|
||||
- **Priority**: V2 (interesting; requires traversal graph extraction from correlation engine).
|
||||
|
||||
#### Persistence-first vs. exfil-first
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires semantic tagging of events (is this persistence activity? exfil activity?). Profiler has `EXFIL_EVENT_TYPES` (line 59–62) but no persistence catalog.
|
||||
- **Missing**: No code to classify persistence attempts (cron jobs, reverse shells, privilege escalation).
|
||||
- **Cheap fix**: Add PERSISTENCE_EVENT_TYPES list; compute persistence_start vs. exfil_start timestamps; store in AttackerBehavior or AttackerAggregate.
|
||||
- **Priority**: V2 (requires event taxonomy; valuable for threat classification).
|
||||
|
||||
#### Tool-chain ordering
|
||||
- **Status**: `partial`
|
||||
- **Where**: Profiler logs tool guesses in AttackerBehavior.`tool_guesses` (line 183, behavioral.py lines 76–105). Tools are matched by beacon timing + header patterns.
|
||||
- **Missing**: No **ordering** — tools are listed but not sequenced by first-appearance time.
|
||||
- **Cheap fix**: Sort tool_guesses by first event timestamp; store as ordered list. Compute tool transition graph (tool A → tool B over time).
|
||||
- **Priority**: V2 (interesting; small extension to existing tool attribution).
|
||||
|
||||
#### Error-response psychology
|
||||
- **Status**: `not_captured`
|
||||
- **Why**: Requires analyzing how attacker reacts to failures (e.g., retry frequency after auth failure, command error recovery). Would need per-command success/failure tracking.
|
||||
- **Missing**: No error-categorization in logs; would need service-level event typing (auth_failure vs. auth_success, exec_error vs. exec_success).
|
||||
- **Cheap fix**: Extend service events to include success/failure indicators; compute attacker error-response metrics (retry rate, time-to-recovery, behavior change after error).
|
||||
- **Priority**: V2 backlog (niche; good for human vs. bot discrimination).
|
||||
|
||||
---
|
||||
|
||||
## Table Recommendations
|
||||
|
||||
### `AttackerBehavior` — Current & Recommended Additions
|
||||
|
||||
**Currently captured** (verified in models.py lines 161–194):
|
||||
- `tcp_fingerprint` (JSON) — window, wscale, mss, options_sig
|
||||
- `timing_stats` (JSON) — mean/median/stdev/min/max IAT
|
||||
- `phase_sequence` (JSON) — recon_end, exfil_start latency
|
||||
- `tool_guesses` (JSON list)
|
||||
- `beacon_interval_s`, `beacon_jitter_pct`
|
||||
- `behavior_class` (beaconing | interactive | scanning | …)
|
||||
|
||||
**Recommended additions for v1 (pre-v2, no schema bump)**:
|
||||
- `kex_order_raw` (MEDIUMTEXT, JSON list) — raw KEX algorithm strings from HASSH
|
||||
- `tls_fingerprints_full` (MEDIUMTEXT, JSON) — full JA3/JA4 raw strings, not just hashes
|
||||
- `ssh_client_banners` (MEDIUMTEXT, JSON list) — capture from TCP stream
|
||||
|
||||
**Reserved for v2**:
|
||||
- See SessionProfile below.
|
||||
|
||||
### `SessionProfile` — New Table (v2 roadmap in DEVELOPMENT_V2.md)
|
||||
|
||||
Design is already specified (lines 71–104). Implement in v1 as empty table + stubbed write path, ready for feature extraction post-v1.
|
||||
|
||||
**Columns** (from DEVELOPMENT_V2.md):
|
||||
- `sid` (TEXT PK)
|
||||
- `log_id` (FK to logs)
|
||||
- `schema_version` (INT, required for federation gossip)
|
||||
- Timing features: `kd_iki_mean`, `kd_iki_stdev`, `kd_iki_p50`, `kd_iki_p95`, `kd_enter_latency_p50`, `kd_enter_latency_p95`
|
||||
- Ratio features: `kd_burst_ratio`, `kd_think_ratio`
|
||||
- Control-char rates: `kd_ctrl_backspace`, `kd_ctrl_wkill`, `kd_ctrl_ukill`, `kd_ctrl_abort`, `kd_ctrl_eof`, `kd_arrow_rate`, `kd_tab_rate`
|
||||
- `kd_digraph_simhash` (BLOB, 8 bytes)
|
||||
- Derived: `total_keystrokes`, `session_duration_s`, `created_at`
|
||||
|
||||
**Note**: All keystroke-timing values are derivable from existing asciinema day-shard files on disk. Implement ingestion job in v2 (not v1 blocker).
|
||||
|
||||
### `AttackerAggregate` — New Table (v2+)
|
||||
|
||||
Columns (suggested):
|
||||
- `attacker_uuid` (PK, FK to attackers)
|
||||
- `activity_dist_by_hour` (JSON) — histogram of event counts by UTC hour
|
||||
- `session_duration_dist` (JSON) — percentiles of session durations
|
||||
- `recon_to_action_ratio` (REAL)
|
||||
- `lateral_movement_graph` (JSON) — decky traversal (src → dst edges with dwell times)
|
||||
- `tool_sequence` (JSON list) — tools in chronological order
|
||||
- `is_persistent` (BOOL) — persistence activity detected?
|
||||
- `updated_at` (TIMESTAMP)
|
||||
|
||||
---
|
||||
|
||||
## Full Per-Signal Capture Table
|
||||
|
||||
| Signal | Status | Where Captured | What's Missing | Cheap Fix | Priority |
|
||||
|--------|--------|-----------------|-----------------|-----------|----------|
|
||||
| **Session Environment** |
|
||||
| TERM | partial | SSH pty-req, server-readable | No syslog emission, no DB | Patch SSH syslog bridge to emit term= | V2 |
|
||||
| LANG/LC_ALL | n/a | Server locale, not attacker-controlled | Not visible from server vantage | Defer (not capturable) | defer |
|
||||
| SSH client version | partial | TCP stream (plaintext banner before TLS) | Sniffer doesn't parse SSH banners; only TLS fingerprints | Extend sniffer to extract SSH banner from TCP stream | v1 blocker |
|
||||
| Terminal size (COLS×ROWS) | not_captured | SSH pty-req extension | Requires protocol interception or sshd patch | Patch sshd to log pty-req | V2 |
|
||||
| **Keyboard/Human** |
|
||||
| Per-keystroke timing | partial | Asciinema "i" events with t timestamps | Files on disk, not ingested to DB | Implement SessionProfile table + ingest job | v1 blocker |
|
||||
| Control-character stream | partial | Asciinema keystroke bytes | Same as above (files only) | Same as above | v1 blocker |
|
||||
| Inter-command think time | not_captured | Requires prompt detection | Heuristic (line ending in $/#) not implemented | Post-hoc: regex + gap detection over asciinema | V2 |
|
||||
| Pause before sensitive cmd | not_captured | Would be in asciinema timing | Requires command-line parsing + gap detection | Off-line analysis of asciinema | V2 |
|
||||
| Command n-grams | partial | Attacker.commands (generic list) | Per-session structure missing | Parse asciinema I/O; store in SessionProfile | V2 |
|
||||
| Flag preferences | not_captured | Asciinema input has typed flags | No parsing or normalization | Regex-parse and canonicalize flags from asciinema | V2 |
|
||||
| Typo patterns | not_captured | Raw keystroke sequence in asciinema "i" | Requires keystroke-by-keystroke reconstruction | Parse "i" events with backspace markers; reconstruct line state | V2 |
|
||||
| Editor choice | partial | Attacker.commands shows editor launch | No aggregation or time-in-editor | Count editor invocations; store preference in SessionProfile | V2 |
|
||||
| Shell history usage | partial | Command input shows !, ^, !! | No parsing for history operators | Regex-scan for shell history syntax; count | V2 |
|
||||
| **SSH Transport** |
|
||||
| HASSH/HASSHServer | captured | Prober (hassh.py); Attacker.fingerprints | ✓ (hash + raw algorithm strings in syslog) | Already done | — |
|
||||
| KEX algorithm order | partial | Syslog event kex_algorithms= field | Not persisted to DB (only in syslog) | Add AttackerBehavior.kex_order_raw (MEDIUMTEXT, JSON) | v1 blocker |
|
||||
| Public key comment | not_captured | SSH wire format (auth_pubkey) | Requires server-side auth hook | Patch sshd to emit auth_pubkey_comment event | V2 |
|
||||
| Private key type | partial | SSH wire format (auth algorithm OID) | Encrypted after KEX; needs sshd hook | Patch sshd to emit auth_key_type event | V2 |
|
||||
| Agent forwarding? | not_captured | SSH extension negotiation (encrypted) | Requires sshd instrumentation | Patch sshd to detect ssh-agent@openssh.com | V2 |
|
||||
| Channel multiplexing | partial | SSH service logs commands separately | No channel state machine | Instrument sshd SSH_MSG_CHANNEL_OPEN events | V2 |
|
||||
| SSH_CLIENT env vars | captured | Server sets automatically; queryable via shell | No automatic logging | Patch sshd PAM to emit SSH_CLIENT on auth | V2 |
|
||||
| **Network/Transport** |
|
||||
| TCP timestamp skew | partial | PCAP + sniffer has has_timestamps flag | Only boolean; not timestamp values | Extract timestamp seq numbers in sniffer | V2 |
|
||||
| TCP ISN generator | not_captured | PCAP SYN seq field | No per-connection ISN logging | Log syn_seq in tcp_syn_fingerprint event | V2 |
|
||||
| TCP options ordering | partial | Sniffer extracts options_sig signature | Aggregated string; no raw order per-packet | Extend tcp_fingerprint JSON with raw options list | v1 improvement |
|
||||
| Initial congestion window | not_captured | Would require per-packet ACK analysis | Not tracked in sniffer | Extend tcp_flow_timing to include payload sizes list | V2 |
|
||||
| Retransmit timing+backoff | partial | Sniffer counts retransmits; no timing | RTO/backoff timing not logged | Extend event to include RTO deltas | V2 |
|
||||
| MTU/path-MTU discovery | partial | MSS in TCP SYN; byte counts per flow | No ICMP fragmentation-needed events | Add ICMP processing; correlate with TCP flows | V2 |
|
||||
| Packet pacing (μs) | not_captured | Sniffer uses millisecond granularity | Needs PCAP hardware timestamps or OS hooks | Upgrade to sub-millisecond timing | V2+ |
|
||||
| Window scaling | captured | TCP fingerprint; wscale in AttackerBehavior | ✓ queryable | — | — |
|
||||
| ECN negotiation | not_captured | TCP SYN flags (CWR/ECE) + options | Not extracted from TCP header | Extend TCP fingerprint to parse ECN bits | V2 |
|
||||
| **L7 (TLS/HTTP)** |
|
||||
| TLS fingerprint (JA3/JA4) | captured | Sniffer fingerprint.py; Attacker.fingerprints | ✓ hashes stored + syslog | Already done | — |
|
||||
| HTTP/2 SETTINGS order | not_captured | Encrypted inside TLS | Passive inspection not viable | Defer (not capturable) | defer |
|
||||
| HTTP/2 prioritization | not_captured | Encrypted | Not capturable | defer | defer |
|
||||
| HTTP header ordering | not_captured | Encrypted; requires service logging | Service doesn't log raw headers | Patch HTTP service to log header order | V2 |
|
||||
| Cookie handling | not_captured | Requires HTTP state machine | Not tracked | Add cookie jar logging to HTTP service | V2 |
|
||||
| **Aggregated/Derived** |
|
||||
| Time-of-day distribution | partial | Timestamps on all events | No aggregation table | Batch job: hour-of-day histogram → AttackerAggregate | V2 |
|
||||
| Session duration dist | partial | SessionProfile would have duration | No SessionProfile table yet | Implement SessionProfile + duration stats | V2 |
|
||||
| Recon-to-action ratio | partial | AttackerBehavior.phase_sequence | No per-attacker ratio column | Compute ratio in profiler; store in AttackerAggregate | V2 |
|
||||
| Lateral movement style | not_captured | Correlation engine has traversal path | No traversal pattern analysis | Extend engine to compute dwell time + fan-out metrics | V2 |
|
||||
| Persistence-first vs. exfil | not_captured | No persistence event taxonomy | Needs event-type classification | Add PERSISTENCE_EVENT_TYPES; compute timings | V2 |
|
||||
| Tool-chain ordering | partial | tool_guesses list exists; unordered | No temporal ordering | Sort by first-event timestamp; build transition graph | V2 |
|
||||
| Error-response psych | not_captured | No success/failure event tagging | Requires per-command outcome tracking | Extend service events with status=success/failure | V2 |
|
||||
|
||||
---
|
||||
|
||||
## Pre-v1 Capture Gaps (Actionable, Blocky)
|
||||
|
||||
**Only tackle these if the signal is committed to the v1 roadmap:**
|
||||
|
||||
1. **KEX algorithm ordering** (ssh-transport)
|
||||
- **Action**: Add `AttackerBehavior.kex_order_raw` (MEDIUMTEXT, JSON list of algorithm strings).
|
||||
- **Effort**: 2 hrs (schema + sniffer event parser + profiler aggregator).
|
||||
- **Blocker?**: Only if roadmap demands full KEX analysis (currently only HASSH hash is promised).
|
||||
|
||||
2. **Per-keystroke timing ingestion** (keyboard/human)
|
||||
- **Action**: Create `SessionProfile` table (design in DEVELOPMENT_V2.md); stub write path with all NULLs.
|
||||
- **Effort**: 4 hrs (schema + migration + DAL).
|
||||
- **Blocker?**: Yes, if keystroke dynamics is v1 roadmap. Data exists on disk but is not queryable.
|
||||
|
||||
3. **SSH client banner capture** (ssh-transport)
|
||||
- **Action**: Extend sniffer to parse SSH banners from TCP stream before TLS; emit ssh_client_hello event.
|
||||
- **Effort**: 3 hrs (TCP stream parser + sniffer integration).
|
||||
- **Blocker?**: Yes, if full SSH client profiling is v1 roadmap (currently only server banner via HASSH).
|
||||
|
||||
4. **TCP options raw extraction** (network/transport)
|
||||
- **Action**: Extend `tcp_fingerprint` JSON to include raw options list (not just signature string).
|
||||
- **Effort**: 1 hr (minimal schema change + sniffer parser).
|
||||
- **Blocker?**: No (options_sig is good enough for current p0f-style fingerprinting; nice-to-have).
|
||||
|
||||
---
|
||||
|
||||
## Non-Capturable Signals (Explicit Deferral)
|
||||
|
||||
These require vantage-point changes or are architecturally infeasible:
|
||||
|
||||
| Signal | Why | Vantage Point Needed |
|
||||
|--------|-----|----------------------|
|
||||
| LANG / LC_ALL | Server locale is fixed; attacker's client locale invisible over SSH | Client-side instrumentation |
|
||||
| HTTP/2 SETTINGS frame order | Encrypted inside TLS stream | Server-side decryption or key log |
|
||||
| HTTP/2 stream prioritization | Encrypted | Server-side decryption |
|
||||
| Initial congestion window (CWND) | Requires detailed TCP ACK-clock tracking | Per-packet sniffer instrumentation |
|
||||
| Packet pacing (μs resolution) | Requires hardware-timestamped PCAP or kernel hooks | OS-level instrumentation |
|
||||
| Hold time / pressure / velocity (typing biometrics) | Not on SSH wire | Client-side TLS instrumentation |
|
||||
|
||||
---
|
||||
|
||||
## Summary for v1 Release
|
||||
|
||||
**Ship with these (already captured, queryable)**:
|
||||
- HASSH/HASSHServer ✓
|
||||
- JA3/JA3S/JA4/JA4S ✓
|
||||
- TLS session resumption ✓
|
||||
- TCP fingerprint (window, wscale, mss, options_sig) ✓
|
||||
- Behavioral timing stats (mean/median/stdev IAT) ✓
|
||||
- Phase sequencing (recon_end, exfil_start) ✓
|
||||
- Tool attribution (beacon timing + headers) ✓
|
||||
|
||||
**Data exists on disk, not queryable (v1 deferral acceptable)**:
|
||||
- Per-keystroke timing (asciinema day-shards) — needs SessionProfile ingestion job
|
||||
- SSH client banner (TCP stream) — needs sniffer enhancement
|
||||
- KEX algorithm order (syslog) — needs AttackerBehavior.kex_order_raw column
|
||||
|
||||
**Requires infrastructure changes (v2+)**:
|
||||
- Lateral movement graph analysis
|
||||
- HTTP header order + cookie jar behavior
|
||||
- Persistence-first vs. exfil-first classification
|
||||
- Error-response psychology
|
||||
- Chronotyping + session duration distribution
|
||||
|
||||
---
|
||||
|
||||
## Federation & Cross-Operator Gossip (v2 Implications)
|
||||
|
||||
The `SessionProfile` schema (table, schema_version field, numeric features) is designed to be the federation wire format. **No changes needed for v1**, but ensure schema_version is in the table definition from day one so gossip compatibility is straightforward in v2.
|
||||
|
||||
---
|
||||
|
||||
## Appendices
|
||||
|
||||
### A. Code Paths Audited
|
||||
|
||||
- `decnet/sniffer/fingerprint.py` — TLS + TCP fingerprinting engine
|
||||
- `decnet/services/ssh.py` — SSH service config + artifact paths
|
||||
- `decnet/prober/hassh.py` — HASSHServer computation
|
||||
- `decnet/web/db/models.py` — SQL schema (Attacker, AttackerBehavior, etc.)
|
||||
- `decnet/profiler/behavioral.py` — Timing + tool attribution
|
||||
- `decnet/correlation/parser.py` — RFC 5424 syslog ingestion
|
||||
- `decnet/templates/ssh/` — Session recording (asciinema), syslog bridge, capture.sh
|
||||
|
||||
### B. Storage Destinations Verified
|
||||
|
||||
- **Database**: SQLite/MySQL tables (Attacker, AttackerBehavior, Bounty, Log)
|
||||
- **Syslog**: RFC 5424 events (parsed by correlation engine, optionally piped to ELK)
|
||||
- **Disk**: Asciinema day-shards (`/var/lib/decnet/session_recordings/`), raw PCAP (retention TBD)
|
||||
- **Memory**: Sniffer state (sessions, flows, dedup cache) — lost on restart unless replayed from PCAP
|
||||
|
||||
### C. Roadmap Cross-Reference
|
||||
|
||||
- DEVELOPMENT.md lines 48–133: Attacker Intelligence Collection (TLS, behavioral, protocol fingerprinting, network topology, geolocation, service-level, aggregated).
|
||||
- `[x]` JA3/JA3S, JA4+, JARM, session resumption, TCP window/scaling, retransmits, beaconing, data exfil timing, HASSH/HASSHServer, HTTP/2 fingerprint, TLS session resumption, TTL values (partial), TCP stack fingerprinting.
|
||||
- `[ ]` (not v1): ISN patterns, HTTP header ordering, QUIC, DNS, IPv6/mDNS leakage, geolocation, service-level commands, credential reuse, payload signatures.
|
||||
|
||||
- DEVELOPMENT_V2.md: Keystroke dynamics, session profiling, federation.
|
||||
- SessionProfile schema (lines 71–104) — not yet implemented; ready-to-implement design.
|
||||
- Correlation via simhash (lines 50–56) — digraph rhythm fingerprinting.
|
||||
|
||||
---
|
||||
|
||||
147
alembic.ini
147
alembic.ini
@@ -1,147 +0,0 @@
|
||||
# A generic, single database configuration.
|
||||
|
||||
[alembic]
|
||||
# path to migration scripts.
|
||||
# this is typically a path given in POSIX (e.g. forward slashes)
|
||||
# format, relative to the token %(here)s which refers to the location of this
|
||||
# ini file
|
||||
script_location = %(here)s/decnet/web/db/migrations
|
||||
|
||||
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
||||
# Uncomment the line below if you want the files to be prepended with date and time
|
||||
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
||||
# for all available tokens
|
||||
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
||||
# Or organize into date-based subdirectories (requires recursive_version_locations = true)
|
||||
# file_template = %%(year)d/%%(month).2d/%%(day).2d_%%(hour).2d%%(minute).2d_%%(second).2d_%%(rev)s_%%(slug)s
|
||||
|
||||
# sys.path path, will be prepended to sys.path if present.
|
||||
# defaults to the current working directory. for multiple paths, the path separator
|
||||
# is defined by "path_separator" below.
|
||||
prepend_sys_path = .
|
||||
|
||||
|
||||
# timezone to use when rendering the date within the migration file
|
||||
# as well as the filename.
|
||||
# If specified, requires the tzdata library which can be installed by adding
|
||||
# `alembic[tz]` to the pip requirements.
|
||||
# string value is passed to ZoneInfo()
|
||||
# leave blank for localtime
|
||||
# timezone =
|
||||
|
||||
# max length of characters to apply to the "slug" field
|
||||
# truncate_slug_length = 40
|
||||
|
||||
# set to 'true' to run the environment during
|
||||
# the 'revision' command, regardless of autogenerate
|
||||
# revision_environment = false
|
||||
|
||||
# set to 'true' to allow .pyc and .pyo files without
|
||||
# a source .py file to be detected as revisions in the
|
||||
# versions/ directory
|
||||
# sourceless = false
|
||||
|
||||
# version location specification; This defaults
|
||||
# to <script_location>/versions. When using multiple version
|
||||
# directories, initial revisions must be specified with --version-path.
|
||||
# The path separator used here should be the separator specified by "path_separator"
|
||||
# below.
|
||||
# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
|
||||
|
||||
# path_separator; This indicates what character is used to split lists of file
|
||||
# paths, including version_locations and prepend_sys_path within configparser
|
||||
# files such as alembic.ini.
|
||||
# The default rendered in new alembic.ini files is "os", which uses os.pathsep
|
||||
# to provide os-dependent path splitting.
|
||||
#
|
||||
# Note that in order to support legacy alembic.ini files, this default does NOT
|
||||
# take place if path_separator is not present in alembic.ini. If this
|
||||
# option is omitted entirely, fallback logic is as follows:
|
||||
#
|
||||
# 1. Parsing of the version_locations option falls back to using the legacy
|
||||
# "version_path_separator" key, which if absent then falls back to the legacy
|
||||
# behavior of splitting on spaces and/or commas.
|
||||
# 2. Parsing of the prepend_sys_path option falls back to the legacy
|
||||
# behavior of splitting on spaces, commas, or colons.
|
||||
#
|
||||
# Valid values for path_separator are:
|
||||
#
|
||||
# path_separator = :
|
||||
# path_separator = ;
|
||||
# path_separator = space
|
||||
# path_separator = newline
|
||||
#
|
||||
# Use os.pathsep. Default configuration used for new projects.
|
||||
path_separator = os
|
||||
|
||||
# set to 'true' to search source files recursively
|
||||
# in each "version_locations" directory
|
||||
# new in Alembic version 1.10
|
||||
# recursive_version_locations = false
|
||||
|
||||
# the output encoding used when revision files
|
||||
# are written from script.py.mako
|
||||
# output_encoding = utf-8
|
||||
|
||||
# NOTE: no sqlalchemy.url here on purpose. env.py selects the engine from
|
||||
# DECNET_DB_TYPE (sqlite|mysql), mirroring decnet/web/db/factory.py.
|
||||
|
||||
|
||||
[post_write_hooks]
|
||||
# post_write_hooks defines scripts or Python functions that are run
|
||||
# on newly generated revision scripts. See the documentation for further
|
||||
# detail and examples
|
||||
|
||||
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
||||
# hooks = black
|
||||
# black.type = console_scripts
|
||||
# black.entrypoint = black
|
||||
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
||||
|
||||
# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
|
||||
# hooks = ruff
|
||||
# ruff.type = module
|
||||
# ruff.module = ruff
|
||||
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
|
||||
|
||||
# Alternatively, use the exec runner to execute a binary found on your PATH
|
||||
# hooks = ruff
|
||||
# ruff.type = exec
|
||||
# ruff.executable = ruff
|
||||
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
|
||||
|
||||
# Logging configuration. This is also consumed by the user-maintained
|
||||
# env.py script only.
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARNING
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARNING
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
1000
api-audit.md
Normal file
1000
api-audit.md
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 10 KiB |
@@ -1,5 +0,0 @@
|
||||
# bait/
|
||||
|
||||
Default operator-supplied email seed for IMAP/POP3 deckies. Drop `*.eml` and/or `*.json` files here; the IMAP/POP3 services bind-mount this dir read-only at `/var/spool/decnet-emails/seed` when no per-decky `email_seed` is configured. Entries concatenate onto the hardcoded bait baseline (additive to realism-engine output, never replacing).
|
||||
|
||||
JSON shape: list of dicts with required `from_addr`, `to_addr`, `subject`, `body`; optional `from_name`, `date`, `flags`. See `decnet/templates/imap/server.py` for the loader.
|
||||
BIN
decnet.tar
BIN
decnet.tar
Binary file not shown.
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""DECNET — honeypot deception-network framework.
|
||||
|
||||
This __init__ runs once, on the first `import decnet.*`. It seeds
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""DECNET worker agent — runs on every SWARM worker host.
|
||||
|
||||
Exposes an mTLS-protected FastAPI service the master's SWARM controller
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Worker-side FastAPI app.
|
||||
|
||||
Protected by mTLS at the ASGI/uvicorn transport layer: uvicorn is started
|
||||
@@ -26,7 +25,6 @@ from contextlib import asynccontextmanager
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
import contextlib
|
||||
@@ -183,7 +181,6 @@ class TeardownRequest(BaseModel):
|
||||
class MutateRequest(BaseModel):
|
||||
decky_id: str
|
||||
services: list[str]
|
||||
dry_run: bool = False
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ routes
|
||||
@@ -200,22 +197,15 @@ async def status() -> dict:
|
||||
|
||||
@app.post(
|
||||
"/deploy",
|
||||
status_code=202,
|
||||
responses={202: {"description": "Deploy accepted; runs in background; lifecycle deltas pushed via heartbeat"}},
|
||||
responses={500: {"description": "Deployer raised an exception materialising the config"}},
|
||||
)
|
||||
async def deploy(req: DeployRequest) -> dict:
|
||||
"""Spawn the deploy in the background and return 202 immediately.
|
||||
|
||||
The master tracks per-decky completion via lifecycle deltas pushed on
|
||||
the next heartbeat (one immediate push on completion, plus the
|
||||
scheduled 30 s ticks as a fallback). Holding the request open across
|
||||
a multi-minute compose build was the previous source of the wizard
|
||||
API-hang."""
|
||||
asyncio.create_task(
|
||||
_exec.deploy_async(req.config, dry_run=req.dry_run, no_cache=req.no_cache),
|
||||
name=f"deploy-{id(req)}",
|
||||
)
|
||||
return {"status": "accepted", "deckies": [d.name for d in req.config.deckies]}
|
||||
try:
|
||||
await _exec.deploy(req.config, dry_run=req.dry_run, no_cache=req.no_cache)
|
||||
except Exception as exc:
|
||||
log.exception("agent.deploy failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
return {"status": "deployed", "deckies": len(req.config.deckies)}
|
||||
|
||||
|
||||
@app.post(
|
||||
@@ -317,50 +307,14 @@ async def topology_state() -> dict:
|
||||
|
||||
@app.post(
|
||||
"/mutate",
|
||||
status_code=202,
|
||||
responses={
|
||||
202: {"description": "Mutate accepted; runs in background; lifecycle delta pushed via heartbeat"},
|
||||
404: {"description": "No active deployment, or unknown decky_id (dry_run validation only)"},
|
||||
},
|
||||
responses={501: {"description": "Worker-side mutate not yet implemented"}},
|
||||
)
|
||||
async def mutate(req: MutateRequest) -> Any:
|
||||
"""Spawn the mutate in the background and return 202 immediately.
|
||||
|
||||
Master tracks completion via a lifecycle delta pushed on the next
|
||||
heartbeat (immediate push on completion). ``dry_run`` is still
|
||||
synchronous — it validates against the worker's current state and
|
||||
returns the would-be services without spawning a task or touching
|
||||
docker, so the wizard's preview path stays cheap."""
|
||||
if req.dry_run:
|
||||
from decnet.config import load_state
|
||||
state = load_state()
|
||||
if state is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="no active deployment on this worker",
|
||||
)
|
||||
cfg, _ = state
|
||||
decky = next((d for d in cfg.deckies if d.name == req.decky_id), None)
|
||||
if decky is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"decky {req.decky_id!r} not found in worker state",
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=200,
|
||||
content={
|
||||
"status": "dry_run",
|
||||
"decky_id": req.decky_id,
|
||||
"services": list(req.services),
|
||||
},
|
||||
)
|
||||
|
||||
asyncio.create_task(
|
||||
_exec.mutate_async(req.decky_id, list(req.services)),
|
||||
name=f"mutate-{req.decky_id}",
|
||||
async def mutate(req: MutateRequest) -> dict:
|
||||
# TODO: implement worker-side mutate. Currently the master performs
|
||||
# mutation by re-sending a full /deploy with the updated DecnetConfig;
|
||||
# this avoids duplicating mutation logic on the worker for v1. When
|
||||
# ready, replace the 501 with a real redeploy-of-a-single-decky path.
|
||||
raise HTTPException(
|
||||
status_code=501,
|
||||
detail="Per-decky mutate is performed via /deploy with updated services",
|
||||
)
|
||||
return {
|
||||
"status": "accepted",
|
||||
"decky_id": req.decky_id,
|
||||
"services": list(req.services),
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Thin adapter between the agent's HTTP endpoints and the existing
|
||||
``decnet.engine.deployer`` code path.
|
||||
|
||||
@@ -81,99 +80,6 @@ async def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = F
|
||||
await asyncio.to_thread(_deployer.deploy, config, dry_run, no_cache, False)
|
||||
|
||||
|
||||
async def deploy_async(
|
||||
config: DecnetConfig, *, dry_run: bool = False, no_cache: bool = False,
|
||||
) -> None:
|
||||
"""Background-task body for /deploy: run the deploy, then push a
|
||||
lifecycle delta to the master so it observes terminal transitions
|
||||
immediately rather than waiting for the next scheduled heartbeat.
|
||||
|
||||
Per-decky lifecycle deltas — master pivots them onto the matching
|
||||
open DeckyLifecycle rows via the heartbeat handler. Errors are
|
||||
captured and pushed as ``failed`` deltas; the task itself never
|
||||
raises (a crashed task would just leave master rows wedged).
|
||||
"""
|
||||
from datetime import datetime, timezone
|
||||
from decnet.agent.heartbeat import push_lifecycle_delta
|
||||
|
||||
decky_names = [d.name for d in config.deckies]
|
||||
try:
|
||||
await deploy(config, dry_run=dry_run, no_cache=no_cache)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.exception("agent.deploy_async failed")
|
||||
err = f"{type(exc).__name__}: {exc}"
|
||||
deltas = [
|
||||
{
|
||||
"decky_name": name, "operation": "deploy",
|
||||
"status": "failed", "error": err[:2000],
|
||||
"completed_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
for name in decky_names
|
||||
]
|
||||
await push_lifecycle_delta(deltas)
|
||||
return
|
||||
deltas = [
|
||||
{
|
||||
"decky_name": name, "operation": "deploy",
|
||||
"status": "succeeded",
|
||||
"completed_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
for name in decky_names
|
||||
]
|
||||
await push_lifecycle_delta(deltas)
|
||||
|
||||
|
||||
async def mutate_async(decky_id: str, services: list[str]) -> None:
|
||||
"""Background-task body for /mutate. Same shape as deploy_async:
|
||||
perform the work, then push a single lifecycle delta on
|
||||
completion (success or failure)."""
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from decnet.composer import write_compose
|
||||
from decnet.config import load_state, save_state
|
||||
from decnet.engine import _compose_with_retry
|
||||
from decnet.agent.heartbeat import push_lifecycle_delta
|
||||
|
||||
def _delta(status: str, error: str | None = None) -> dict:
|
||||
out = {
|
||||
"decky_name": decky_id, "operation": "mutate",
|
||||
"status": status,
|
||||
"completed_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
if error is not None:
|
||||
out["error"] = error[:2000]
|
||||
return out
|
||||
|
||||
try:
|
||||
state = load_state()
|
||||
if state is None:
|
||||
await push_lifecycle_delta(
|
||||
[_delta("failed", "no active deployment on this worker")],
|
||||
)
|
||||
return
|
||||
cfg, compose_path = state
|
||||
decky = next((d for d in cfg.deckies if d.name == decky_id), None)
|
||||
if decky is None:
|
||||
await push_lifecycle_delta(
|
||||
[_delta("failed", f"decky {decky_id!r} not found in worker state")],
|
||||
)
|
||||
return
|
||||
decky.services = list(services)
|
||||
decky.last_mutated = time.time()
|
||||
save_state(cfg, compose_path)
|
||||
write_compose(cfg, compose_path)
|
||||
await asyncio.to_thread(
|
||||
_compose_with_retry, "up", "-d", "--remove-orphans",
|
||||
compose_file=compose_path,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.exception("agent.mutate_async failed decky=%s", decky_id)
|
||||
err = f"{type(exc).__name__}: {exc}"
|
||||
await push_lifecycle_delta([_delta("failed", err)])
|
||||
return
|
||||
await push_lifecycle_delta([_delta("succeeded")])
|
||||
|
||||
|
||||
async def teardown(decky_id: str | None = None) -> None:
|
||||
log.info("agent.teardown decky_id=%s", decky_id)
|
||||
await asyncio.to_thread(_deployer.teardown, decky_id)
|
||||
@@ -288,7 +194,7 @@ async def self_destruct() -> None:
|
||||
argv = ["/bin/bash", path]
|
||||
spawn_kwargs = {"start_new_session": True}
|
||||
|
||||
subprocess.Popen( # type: ignore[call-overload] # nosec B603
|
||||
subprocess.Popen( # nosec B603
|
||||
argv,
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Agent → master liveness heartbeat loop.
|
||||
|
||||
Every ``INTERVAL_S`` seconds the worker posts ``executor.status()`` to
|
||||
@@ -51,11 +50,7 @@ def _resolve_agent_dir() -> pathlib.Path:
|
||||
return pki.DEFAULT_AGENT_DIR
|
||||
|
||||
|
||||
async def _build_body(
|
||||
host_uuid: str,
|
||||
agent_version: str,
|
||||
lifecycle: Optional[list[dict]] = None,
|
||||
) -> dict:
|
||||
async def _tick(client: httpx.AsyncClient, url: str, host_uuid: str, agent_version: str) -> None:
|
||||
snap = await _exec.status()
|
||||
body: dict = {
|
||||
"host_uuid": host_uuid,
|
||||
@@ -75,13 +70,7 @@ async def _build_body(
|
||||
store.close()
|
||||
except Exception:
|
||||
log.debug("heartbeat: topology state unavailable", exc_info=True)
|
||||
if lifecycle:
|
||||
body["lifecycle"] = lifecycle
|
||||
return body
|
||||
|
||||
|
||||
async def _tick(client: httpx.AsyncClient, url: str, host_uuid: str, agent_version: str) -> None:
|
||||
body = await _build_body(host_uuid, agent_version)
|
||||
resp = await client.post(url, json=body)
|
||||
# 403 / 404 are terminal-ish — we still keep looping because an
|
||||
# operator may re-enrol the host mid-session, but we log loudly so
|
||||
@@ -132,7 +121,7 @@ def start() -> Optional[asyncio.Task]:
|
||||
return None
|
||||
|
||||
try:
|
||||
from decnet import __version__ as _v # type: ignore[attr-defined]
|
||||
from decnet import __version__ as _v
|
||||
agent_version = _v
|
||||
except Exception:
|
||||
agent_version = "unknown"
|
||||
@@ -145,59 +134,6 @@ def start() -> Optional[asyncio.Task]:
|
||||
return _task
|
||||
|
||||
|
||||
async def push_lifecycle_delta(deltas: list[dict]) -> None:
|
||||
"""Fire a one-off heartbeat POST carrying *deltas* in the
|
||||
``lifecycle`` field. Each delta: ``{decky_name, operation, status,
|
||||
error?, completed_at?}``.
|
||||
|
||||
Called by the agent executor on /deploy and /mutate completion so
|
||||
the master observes the terminal transition immediately rather than
|
||||
waiting up to ``INTERVAL_S`` for the next scheduled tick. Failures
|
||||
are logged and swallowed; the next scheduled heartbeat carries the
|
||||
same deltas via DB-side reconciliation, since the worker has no
|
||||
durable per-row state to lose.
|
||||
"""
|
||||
from decnet.env import (
|
||||
DECNET_HOST_UUID,
|
||||
DECNET_MASTER_HOST,
|
||||
DECNET_SWARMCTL_PORT,
|
||||
)
|
||||
|
||||
if not deltas:
|
||||
return
|
||||
if not DECNET_HOST_UUID or not DECNET_MASTER_HOST:
|
||||
log.debug("push_lifecycle_delta: identity unconfigured — skipping")
|
||||
return
|
||||
|
||||
agent_dir = _resolve_agent_dir()
|
||||
try:
|
||||
ssl_ctx = build_worker_ssl_context(agent_dir)
|
||||
except Exception:
|
||||
log.exception("push_lifecycle_delta: SSL context unavailable")
|
||||
return
|
||||
|
||||
try:
|
||||
from decnet import __version__ as _v # type: ignore[attr-defined]
|
||||
agent_version = _v
|
||||
except Exception:
|
||||
agent_version = "unknown"
|
||||
|
||||
url = f"https://{DECNET_MASTER_HOST}:{DECNET_SWARMCTL_PORT}/swarm/heartbeat"
|
||||
try:
|
||||
async with httpx.AsyncClient(verify=ssl_ctx, timeout=_TIMEOUT) as client:
|
||||
body = await _build_body(
|
||||
DECNET_HOST_UUID, agent_version, lifecycle=deltas,
|
||||
)
|
||||
resp = await client.post(url, json=body)
|
||||
if resp.status_code not in (200, 204):
|
||||
log.warning(
|
||||
"lifecycle delta push rejected status=%d body=%s",
|
||||
resp.status_code, resp.text[:200],
|
||||
)
|
||||
except Exception:
|
||||
log.exception("push_lifecycle_delta failed — next scheduled tick will retry")
|
||||
|
||||
|
||||
async def stop() -> None:
|
||||
global _task
|
||||
if _task is None:
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Worker-agent uvicorn launcher.
|
||||
|
||||
Starts ``decnet.agent.app:app`` over HTTPS with mTLS enforcement. The
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Agent-side topology apply/teardown/state primitives.
|
||||
|
||||
Wraps the compose + bridge machinery from :mod:`decnet.engine.deployer`
|
||||
@@ -29,7 +28,6 @@ from decnet.engine.deployer import (
|
||||
_compose_with_retry,
|
||||
_teardown_order,
|
||||
_topology_compose_path,
|
||||
_topology_compose_project,
|
||||
)
|
||||
from decnet.logging import get_logger
|
||||
from decnet.network import create_bridge_network, remove_bridge_network
|
||||
@@ -61,77 +59,6 @@ def _topology_id(hydrated: dict[str, Any]) -> str:
|
||||
return str(tid)
|
||||
|
||||
|
||||
def _check_hash_and_validate(hydrated: dict[str, Any], version_hash: str) -> str:
|
||||
"""Verify hash integrity and structural validity; return topology_id."""
|
||||
local_hash = canonical_hash(hydrated)
|
||||
if local_hash != version_hash:
|
||||
raise HashMismatch(
|
||||
f"master hash {version_hash!r} does not match agent hash "
|
||||
f"{local_hash!r} — refusing to apply"
|
||||
)
|
||||
issues = _validate_topology(hydrated)
|
||||
if _validation_errors(issues):
|
||||
raise ValidationError(issues)
|
||||
return _topology_id(hydrated)
|
||||
|
||||
|
||||
async def _teardown_superseded(topology_id: str, store: TopologyStore) -> None:
|
||||
"""Tear down the current topology if it differs from topology_id.
|
||||
|
||||
Master is authoritative — a different pinned topology (fully applied,
|
||||
partially applied, or drifted) is torn down before the new apply proceeds.
|
||||
Refusing with 409 would leave the agent stuck in a state only a human
|
||||
could resolve.
|
||||
"""
|
||||
existing = store.current()
|
||||
if existing is None or existing.topology_id == topology_id:
|
||||
return
|
||||
log.info(
|
||||
"superseding topology %s with %s on master authority",
|
||||
existing.topology_id, topology_id,
|
||||
)
|
||||
try:
|
||||
await teardown(existing.topology_id, store)
|
||||
except Exception as exc: # noqa: BLE001 — we still want to try applying
|
||||
log.warning(
|
||||
"best-effort teardown of superseded topology %s failed: %s",
|
||||
existing.topology_id, exc,
|
||||
)
|
||||
# Hard-clear the store row so the new apply isn't blocked by a
|
||||
# half-torn-down predecessor. Leftover docker objects surface via
|
||||
# the next heartbeat's observed block.
|
||||
store.clear(existing.topology_id)
|
||||
|
||||
|
||||
def _materialise(hydrated: dict[str, Any], topology_id: str) -> None:
|
||||
"""Create bridge networks, write compose file, and bring up containers.
|
||||
|
||||
Sync/blocking — callers must dispatch via asyncio.to_thread.
|
||||
|
||||
``--always-recreate-deps`` keeps service containers' netns shares
|
||||
fresh: every decky service joins its base's netns via
|
||||
``network_mode: container:<base>``, and that share is bound at
|
||||
service start time. If a base is recreated (e.g. when ``ports:``
|
||||
changes after toggling ``forwards_l3``) but compose decides the
|
||||
services are unchanged, the services keep a stale netns FD
|
||||
pointing at the destroyed base — they end up in an empty
|
||||
namespace with only ``lo``, and external traffic hits a closed
|
||||
port on the live base. Forcing dependents to recreate alongside
|
||||
the base is the cheapest way to make this race impossible.
|
||||
"""
|
||||
compose_path = _topology_compose_path(topology_id)
|
||||
compose_project = _topology_compose_project(topology_id)
|
||||
client = docker.from_env()
|
||||
for lan in hydrated["lans"]:
|
||||
net_name = _topology_network_name(topology_id, lan["name"])
|
||||
create_bridge_network(client, net_name, lan["subnet"], internal=not lan["is_dmz"])
|
||||
write_topology_compose(hydrated, compose_path)
|
||||
_compose_with_retry(
|
||||
"up", "--build", "-d", "--always-recreate-deps",
|
||||
compose_file=compose_path, project=compose_project,
|
||||
)
|
||||
|
||||
|
||||
async def apply(
|
||||
hydrated: dict[str, Any],
|
||||
version_hash: str,
|
||||
@@ -146,11 +73,76 @@ async def apply(
|
||||
Any docker / compose error propagates up; the endpoint maps it
|
||||
to 500 and records the message on the store row.
|
||||
"""
|
||||
topology_id = _check_hash_and_validate(hydrated, version_hash)
|
||||
await _teardown_superseded(topology_id, store)
|
||||
await asyncio.to_thread(_materialise, hydrated, topology_id)
|
||||
local_hash = canonical_hash(hydrated)
|
||||
if local_hash != version_hash:
|
||||
raise HashMismatch(
|
||||
f"master hash {version_hash!r} does not match agent hash "
|
||||
f"{local_hash!r} — refusing to apply"
|
||||
)
|
||||
|
||||
issues = _validate_topology(hydrated)
|
||||
if _validation_errors(issues):
|
||||
raise ValidationError(issues)
|
||||
|
||||
topology_id = _topology_id(hydrated)
|
||||
# Master is authoritative. If a different topology is pinned here
|
||||
# — whether it fully applied, only partially applied (failure
|
||||
# marker row + orphan containers), or drifted — teardown first,
|
||||
# then accept the new one. Refusing with 409 would leave the
|
||||
# agent stuck in a state only a human could resolve.
|
||||
existing = store.current()
|
||||
if existing is not None and existing.topology_id != topology_id:
|
||||
log.info(
|
||||
"superseding topology %s with %s on master authority",
|
||||
existing.topology_id, topology_id,
|
||||
)
|
||||
try:
|
||||
await teardown(existing.topology_id, store)
|
||||
except Exception as exc: # noqa: BLE001 — we still want to try applying
|
||||
log.warning(
|
||||
"best-effort teardown of superseded topology %s failed: %s",
|
||||
existing.topology_id, exc,
|
||||
)
|
||||
# Hard-clear the store row so the new apply isn't blocked
|
||||
# by a half-torn-down predecessor. Leftover docker objects
|
||||
# will surface via the next heartbeat's observed block.
|
||||
store.clear(existing.topology_id)
|
||||
|
||||
lans = hydrated["lans"]
|
||||
compose_path = _topology_compose_path(topology_id)
|
||||
client = docker.from_env()
|
||||
|
||||
# Bridges + compose are sync/blocking; hop to a thread so we don't
|
||||
# stall the event loop on a slow docker daemon.
|
||||
def _materialise() -> None:
|
||||
for lan in lans:
|
||||
net_name = _topology_network_name(topology_id, lan["name"])
|
||||
internal = not lan["is_dmz"]
|
||||
create_bridge_network(
|
||||
client, net_name, lan["subnet"], internal=internal
|
||||
)
|
||||
write_topology_compose(hydrated, compose_path)
|
||||
# ``--always-recreate-deps`` keeps service containers' netns shares
|
||||
# fresh: every decky service joins its base's netns via
|
||||
# ``network_mode: container:<base>``, and that share is bound at
|
||||
# service start time. If a base is recreated (e.g. when ``ports:``
|
||||
# changes after toggling ``forwards_l3``) but compose decides the
|
||||
# services are unchanged, the services keep a stale netns FD
|
||||
# pointing at the destroyed base — they end up in an empty
|
||||
# namespace with only ``lo``, and external traffic hits a closed
|
||||
# port on the live base. Forcing dependents to recreate alongside
|
||||
# the base is the cheapest way to make this race impossible.
|
||||
_compose_with_retry(
|
||||
"up", "--build", "-d", "--always-recreate-deps",
|
||||
compose_file=compose_path,
|
||||
)
|
||||
|
||||
await asyncio.to_thread(_materialise)
|
||||
|
||||
store.put(topology_id, version_hash, hydrated)
|
||||
log.info("topology %s applied on agent (%d LANs)", topology_id, len(hydrated["lans"]))
|
||||
log.info(
|
||||
"topology %s applied on agent (%d LANs)", topology_id, len(lans)
|
||||
)
|
||||
|
||||
|
||||
async def teardown(
|
||||
@@ -166,16 +158,12 @@ async def teardown(
|
||||
# LAN membership list via the hydrated blob if available.
|
||||
hydrated = row.hydrated if row and row.topology_id == topology_id else None
|
||||
compose_path = _topology_compose_path(topology_id)
|
||||
compose_project = _topology_compose_project(topology_id)
|
||||
client = docker.from_env()
|
||||
|
||||
def _dismantle() -> None:
|
||||
if compose_path.exists():
|
||||
try:
|
||||
_compose(
|
||||
"down", "--remove-orphans",
|
||||
compose_file=compose_path, project=compose_project,
|
||||
)
|
||||
_compose("down", "--remove-orphans", compose_file=compose_path)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
log.warning(
|
||||
"topology %s compose down failed (continuing): %s",
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Agent-side sqlite cache of the currently-applied topology.
|
||||
|
||||
**This is a cache, not a source of truth.** The master is the only
|
||||
@@ -64,7 +63,6 @@ class TopologyStore:
|
||||
# The agent is single-process, so there's no real contention —
|
||||
# sqlite's own connection lock is enough.
|
||||
self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
self._conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS applied_topology ("
|
||||
" topology_id TEXT PRIMARY KEY,"
|
||||
@@ -86,11 +84,11 @@ class TopologyStore:
|
||||
if row is None:
|
||||
return None
|
||||
return AppliedRow(
|
||||
topology_id=row["topology_id"],
|
||||
applied_version_hash=row["applied_version_hash"],
|
||||
hydrated=json.loads(row["hydrated_blob_json"]),
|
||||
applied_at=int(row["applied_at"]),
|
||||
last_error=row["last_error"],
|
||||
topology_id=row[0],
|
||||
applied_version_hash=row[1],
|
||||
hydrated=json.loads(row[2]),
|
||||
applied_at=int(row[3]),
|
||||
last_error=row[4],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------- writes
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
Machine archetype profiles for DECNET deckies.
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
"""Artifact storage helpers shared between the web router and TTP workers."""
|
||||
@@ -1,86 +0,0 @@
|
||||
"""
|
||||
Shared on-disk artifact path resolution.
|
||||
|
||||
Honeypot decoys (SSH, SMTP) farm captured payloads into a host-mounted
|
||||
quarantine tree:
|
||||
|
||||
/var/lib/decnet/artifacts/{decky}/{service}/{stored_as}
|
||||
|
||||
Two callers need to translate ``(decky, stored_as, service)`` into a
|
||||
concrete ``Path`` rooted under that tree:
|
||||
|
||||
* The web router endpoint ``GET /api/v1/artifacts/{decky}/{stored_as}``
|
||||
(``decnet.web.router.artifacts.api_get_artifact``) — admin-gated
|
||||
download for the dashboard.
|
||||
* The TTP ``EmailLifter`` (``decnet.ttp.impl.email_lifter``), which
|
||||
reads the stored ``.eml`` at tag-time so body-aware predicates
|
||||
(R0047 BEC, R0048 macro) don't need raw body text on the bus.
|
||||
|
||||
Both callers share the same validation rules and the same
|
||||
defence-in-depth symlink-escape check; this module is the single
|
||||
implementation. It is auth-agnostic — wrappers layer authentication
|
||||
where appropriate (the router does ``require_admin``, the lifter does
|
||||
not).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
# decky names come from the deployer — lowercase alnum plus hyphens.
|
||||
_DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
|
||||
|
||||
# Services that own an artifacts subdir. Kept explicit so a caller
|
||||
# can't pivot into arbitrary subpaths via a query string or bus payload.
|
||||
_ALLOWED_SERVICES = frozenset({"ssh", "smtp"})
|
||||
|
||||
# stored_as is assembled by the capturing template as:
|
||||
# ${ts}_${sha:0:12}_${base}
|
||||
# where ts is ISO-8601 UTC (e.g. 2026-04-18T02:22:56Z), sha is 12 hex chars,
|
||||
# and base is the original filename's basename. Keep the filename charset
|
||||
# tight but allow common punctuation dropped files actually use.
|
||||
_STORED_AS_RE = re.compile(
|
||||
r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z_[a-f0-9]{12}_[A-Za-z0-9._-]{1,255}$"
|
||||
)
|
||||
|
||||
# Module-level so tests can monkeypatch. Override via env in production
|
||||
# (the systemd unit sets this) — the prod path matches the bind mount
|
||||
# declared in decnet/services/{ssh,smtp}.py.
|
||||
ARTIFACTS_ROOT = Path(
|
||||
os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
|
||||
)
|
||||
|
||||
|
||||
class ArtifactPathError(ValueError):
|
||||
"""Raised when (decky, stored_as, service) fails validation or escapes
|
||||
the artifacts root.
|
||||
|
||||
The router catches this and re-raises HTTPException(400). The lifter
|
||||
catches it and treats the event as having no body available (no-tag).
|
||||
"""
|
||||
|
||||
|
||||
def resolve_artifact_path(decky: str, stored_as: str, service: str) -> Path:
|
||||
"""Validate inputs, resolve the on-disk path, and confirm it stays
|
||||
inside the artifacts root.
|
||||
|
||||
Raises :class:`ArtifactPathError` on any violation. Does NOT check
|
||||
that the file exists — callers handle that distinctly (404 for the
|
||||
router, no-tag for the lifter).
|
||||
"""
|
||||
if service not in _ALLOWED_SERVICES:
|
||||
raise ArtifactPathError("invalid service")
|
||||
if not _DECKY_RE.fullmatch(decky):
|
||||
raise ArtifactPathError("invalid decky name")
|
||||
if not _STORED_AS_RE.fullmatch(stored_as):
|
||||
raise ArtifactPathError("invalid stored_as")
|
||||
|
||||
root = ARTIFACTS_ROOT.resolve()
|
||||
candidate = (root / decky / service / stored_as).resolve()
|
||||
# defence-in-depth: even though the regexes reject `..`, make sure a
|
||||
# symlink or weird filesystem state can't escape the root.
|
||||
if root not in candidate.parents and candidate != root:
|
||||
raise ArtifactPathError("path escapes artifacts root")
|
||||
return candidate
|
||||
@@ -1,129 +0,0 @@
|
||||
"""Shared asciinema shard helpers.
|
||||
|
||||
Extracted from ``decnet/web/router/transcripts/api_get_transcript.py``
|
||||
so non-router callers (the BEHAVE-SHELL session-ended handler in
|
||||
``decnet/profiler/worker.py``, the collector's session aggregator)
|
||||
can resolve shard paths without crossing the layer boundary into the
|
||||
FastAPI router.
|
||||
|
||||
Functions here speak in :class:`ValueError` — callers that want HTTP
|
||||
semantics translate at the boundary. The router wrappers keep their
|
||||
existing ``HTTPException`` behaviour for backwards compatibility.
|
||||
|
||||
PII boundary unchanged: shards live on disk; this module returns
|
||||
:class:`pathlib.Path` pointers, never byte content. The ``_get_index``
|
||||
cache stores byte offsets only.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
|
||||
ARTIFACTS_ROOT = Path(
|
||||
os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts"),
|
||||
)
|
||||
|
||||
_DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
|
||||
_SERVICE_RE = re.compile(r"^(ssh|telnet)$")
|
||||
_SHARD_BASENAME_RE = re.compile(r"^sessions-\d{4}-\d{2}-\d{2}\.jsonl$")
|
||||
_SID_LINE_RE = re.compile(rb'"sid"\s*:\s*"([a-f0-9-]{36})"')
|
||||
|
||||
# (path, mtime_ns) → {sid: [(offset, length), ...]}
|
||||
_INDEX_CACHE: "OrderedDict[tuple[str, int], dict[str, list[tuple[int, int]]]]" = (
|
||||
OrderedDict()
|
||||
)
|
||||
_CACHE_MAX = 32
|
||||
|
||||
|
||||
def validate_names(decky: str, service: str) -> None:
|
||||
"""Raise :class:`ValueError` if ``decky`` / ``service`` look forged."""
|
||||
if not _DECKY_RE.fullmatch(decky):
|
||||
raise ValueError(f"invalid decky name: {decky!r}")
|
||||
if not _SERVICE_RE.fullmatch(service):
|
||||
raise ValueError(f"invalid service: {service!r}")
|
||||
|
||||
|
||||
def resolve_shard(decky: str, service: str, shard_name: str) -> Path:
|
||||
"""Resolve ``ARTIFACTS_ROOT/{decky}/{service}/transcripts/{shard_name}``
|
||||
with escape-attempt detection. Raises :class:`ValueError` on
|
||||
invalid inputs.
|
||||
"""
|
||||
validate_names(decky, service)
|
||||
if not _SHARD_BASENAME_RE.fullmatch(shard_name):
|
||||
raise ValueError(f"invalid shard name: {shard_name!r}")
|
||||
root = ARTIFACTS_ROOT.resolve()
|
||||
candidate = (root / decky / service / "transcripts" / shard_name).resolve()
|
||||
if root not in candidate.parents and candidate != root:
|
||||
raise ValueError(f"path escapes artifacts root: {candidate}")
|
||||
return candidate
|
||||
|
||||
|
||||
def _build_index(path: Path) -> dict[str, list[tuple[int, int]]]:
|
||||
index: dict[str, list[tuple[int, int]]] = {}
|
||||
with path.open("rb") as f:
|
||||
offset = 0
|
||||
for line in f:
|
||||
length = len(line)
|
||||
m = _SID_LINE_RE.search(line)
|
||||
if m:
|
||||
sid = m.group(1).decode("ascii")
|
||||
index.setdefault(sid, []).append((offset, length))
|
||||
offset += length
|
||||
return index
|
||||
|
||||
|
||||
def get_index(path: Path) -> tuple[dict[str, list[tuple[int, int]]], int]:
|
||||
"""Return ``(sid → [(offset, length), …], file_size)``.
|
||||
|
||||
Cached by ``(path, mtime_ns)``; rebuilt when the shard changes.
|
||||
"""
|
||||
st = path.stat()
|
||||
key = (str(path), st.st_mtime_ns)
|
||||
if key in _INDEX_CACHE:
|
||||
_INDEX_CACHE.move_to_end(key)
|
||||
return _INDEX_CACHE[key], st.st_size
|
||||
index = _build_index(path)
|
||||
_INDEX_CACHE[key] = index
|
||||
_INDEX_CACHE.move_to_end(key)
|
||||
while len(_INDEX_CACHE) > _CACHE_MAX:
|
||||
_INDEX_CACHE.popitem(last=False)
|
||||
return index, st.st_size
|
||||
|
||||
|
||||
def find_shard_with_sid(decky: str, service: str, sid: str) -> Path | None:
|
||||
"""Scan every ``sessions-YYYY-MM-DD.jsonl`` under the decky's
|
||||
transcripts dir until one claims this ``sid``.
|
||||
|
||||
Newest shards first — most lookups are for recent sessions. Caches
|
||||
the per-shard sid index, so repeated calls are ~free until the
|
||||
shard's mtime changes.
|
||||
|
||||
Returns ``None`` when nothing claims the sid OR when the
|
||||
transcripts dir is missing / unreadable. Never raises on
|
||||
filesystem-level errors — callers treat ``None`` as "skip".
|
||||
"""
|
||||
validate_names(decky, service)
|
||||
root = ARTIFACTS_ROOT.resolve()
|
||||
transcripts_dir = (root / decky / service / "transcripts").resolve()
|
||||
if root not in transcripts_dir.parents:
|
||||
return None
|
||||
try:
|
||||
if not transcripts_dir.is_dir():
|
||||
return None
|
||||
entries = list(transcripts_dir.iterdir())
|
||||
except (OSError, PermissionError):
|
||||
return None
|
||||
shards = sorted(
|
||||
(p for p in entries if _SHARD_BASENAME_RE.fullmatch(p.name)),
|
||||
reverse=True,
|
||||
)
|
||||
for shard in shards:
|
||||
try:
|
||||
index, _size = get_index(shard)
|
||||
except (OSError, PermissionError):
|
||||
continue
|
||||
if sid in index:
|
||||
return shard
|
||||
return None
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
IP-to-ASN enrichment — maps attacker IPs to BGP-announced AS numbers and
|
||||
org names for attacker intelligence.
|
||||
@@ -7,7 +6,7 @@ Public surface mirrors :mod:`decnet.geoip` so callers can compose them:
|
||||
|
||||
* :func:`get_lookup` — returns the singleton :class:`AsnLookup`.
|
||||
* :func:`enrich_ip` — takes an IP string, returns
|
||||
``(asn_int, asn_name, bgp_prefix, provider_name)`` or ``(None, None, None, None)``.
|
||||
``(asn_int, asn_name, provider_name)`` or ``(None, None, None)``.
|
||||
|
||||
Provider selection goes through :func:`~decnet.asn.factory.get_provider`
|
||||
(env ``DECNET_ASN_PROVIDER``, default ``iptoasn``). Direct imports of
|
||||
@@ -52,8 +51,8 @@ def get_lookup(*, force_refresh: bool = False) -> AsnLookup:
|
||||
return _lookup
|
||||
|
||||
|
||||
def enrich_ip(ip: str) -> Tuple[Optional[int], Optional[str], Optional[str], Optional[str]]:
|
||||
"""Return ``(asn, as_name, bgp_prefix, provider_name)`` or ``(None, None, None, None)``.
|
||||
def enrich_ip(ip: str) -> Tuple[Optional[int], Optional[str], Optional[str]]:
|
||||
"""Return ``(asn, as_name, provider_name)`` or ``(None, None, None)``.
|
||||
|
||||
Never raises — any lookup failure collapses to all-None so the
|
||||
caller (profiler) can upsert the attacker row regardless.
|
||||
@@ -63,15 +62,15 @@ def enrich_ip(ip: str) -> Tuple[Optional[int], Optional[str], Optional[str], Opt
|
||||
touching provider config.
|
||||
"""
|
||||
if os.environ.get("DECNET_ASN_ENABLED", "true").lower() == "false":
|
||||
return (None, None, None, None)
|
||||
return (None, None, None)
|
||||
try:
|
||||
lookup = get_lookup()
|
||||
info = lookup.asn(ip)
|
||||
if info is None:
|
||||
return (None, None, None, None)
|
||||
return (info.asn, info.name or None, info.prefix, _provider_name or "unknown")
|
||||
return (None, None, None)
|
||||
return (info.asn, info.name or None, _provider_name or "unknown")
|
||||
except Exception:
|
||||
return (None, None, None, None)
|
||||
return (None, None, None)
|
||||
|
||||
|
||||
def _files_stale(provider) -> bool:
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""ASN provider protocol — mirror of :mod:`decnet.geoip.base`.
|
||||
|
||||
Concrete providers (e.g. :mod:`decnet.asn.iptoasn`) implement this.
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""ASN provider factory — mirror of :mod:`decnet.geoip.factory`.
|
||||
|
||||
Dispatch key: ``DECNET_ASN_PROVIDER`` (default ``iptoasn``). Lazy
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""iptoasn.com IP→ASN provider.
|
||||
|
||||
Daily-refreshed gzipped TSV dump of the global BGP table, derived from
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""iptoasn.com bulk dump download.
|
||||
|
||||
One file: ``ip2asn-v4.tsv.gz``, ~5 MB compressed, refreshed daily.
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Parser for the iptoasn.com ``ip2asn-v4.tsv`` dump.
|
||||
|
||||
Line shape (gzipped, one row per BGP-announced prefix)::
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""iptoasn provider — orchestrates fetch + parse into an :class:`AsnLookup`.
|
||||
|
||||
Mirrors :class:`decnet.geoip.rir.provider.RirProvider` exactly: fetch,
|
||||
@@ -14,7 +13,7 @@ from typing import Sequence
|
||||
from decnet.asn.base import Provider
|
||||
from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all
|
||||
from decnet.asn.iptoasn.parse import parse_file
|
||||
from decnet.asn.lookup import AsnLookup, Range
|
||||
from decnet.asn.lookup import AsnLookup
|
||||
from decnet.asn.paths import ensure_root
|
||||
|
||||
logger = logging.getLogger("decnet.asn.iptoasn.provider")
|
||||
@@ -55,7 +54,7 @@ class IptoasnProvider(Provider):
|
||||
"asn.iptoasn: cache load failed, rebuilding: %s", exc
|
||||
)
|
||||
|
||||
ranges: list[Range] = []
|
||||
ranges = []
|
||||
for path in self.data_paths():
|
||||
if not path.exists():
|
||||
continue
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Provider-agnostic IP→ASN lookup.
|
||||
|
||||
A :class:`AsnLookup` is a frozen, sorted array of ``(start_ip,
|
||||
@@ -24,25 +23,11 @@ class AsnInfo:
|
||||
|
||||
asn: int
|
||||
name: str # AS description / org name; "" if absent in the source data
|
||||
prefix: Optional[str] = None # synthesized covering CIDR; set at lookup time, not at rest
|
||||
|
||||
|
||||
Range = Tuple[int, int, AsnInfo]
|
||||
|
||||
|
||||
def _synthesize_prefix(start_int: int, end_int: int, queried_int: int) -> Optional[str]:
|
||||
"""Return the most-specific CIDR from [start, end] that contains queried_int."""
|
||||
try:
|
||||
for net in ipaddress.summarize_address_range(
|
||||
ipaddress.IPv4Address(start_int), ipaddress.IPv4Address(end_int)
|
||||
):
|
||||
if queried_int >= int(net.network_address) and queried_int <= int(net.broadcast_address):
|
||||
return str(net)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class AsnLookup:
|
||||
"""Indexed AS lookup over IPv4 ranges."""
|
||||
@@ -103,9 +88,7 @@ class AsnLookup:
|
||||
if idx < 0:
|
||||
return None
|
||||
if n <= self._ends[idx]:
|
||||
info = self._infos[idx]
|
||||
prefix = _synthesize_prefix(self._starts[idx], self._ends[idx], n)
|
||||
return AsnInfo(asn=info.asn, name=info.name, prefix=prefix)
|
||||
return self._infos[idx]
|
||||
return None
|
||||
|
||||
def __len__(self) -> int:
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Filesystem layout for ASN data — mirror of :mod:`decnet.geoip.paths`.
|
||||
|
||||
``ASN_ROOT`` is where providers drop their raw files and cache indexes.
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""DECNET ServiceBus — pub/sub notification substrate.
|
||||
|
||||
The bus is the notification layer for DECNET's worker constellation. The DB
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Process-wide bus singleton for request-serving workers (API, SSE routes).
|
||||
|
||||
A single connected :class:`~decnet.bus.base.BaseBus` shared across request
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Bus abstractions: the :class:`Event` envelope and the :class:`BaseBus` ABC.
|
||||
|
||||
Every transport (NATS, in-process fake, null) speaks this contract. The
|
||||
@@ -22,7 +21,7 @@ import asyncio
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, AsyncIterator, cast
|
||||
from typing import Any, AsyncIterator
|
||||
|
||||
EVENT_SCHEMA_VERSION = 1
|
||||
|
||||
@@ -203,4 +202,4 @@ async def _next_or_stop(queue: "asyncio.Queue[Any]") -> Event:
|
||||
item = await queue.get()
|
||||
if item is _CLOSE_SENTINEL:
|
||||
raise StopAsyncIteration
|
||||
return cast(Event, item)
|
||||
return item
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Bus factory — selects a :class:`~decnet.bus.base.BaseBus` implementation.
|
||||
|
||||
Dispatch key: the ``DECNET_BUS_TYPE`` environment variable.
|
||||
@@ -17,7 +16,7 @@ env-driven dispatch, optional telemetry wrapping). Callers MUST use
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any, cast
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus.base import BaseBus
|
||||
|
||||
@@ -77,10 +76,10 @@ def _maybe_wrap_telemetry(bus: BaseBus) -> BaseBus:
|
||||
up at all we no-op.
|
||||
"""
|
||||
try:
|
||||
from decnet.telemetry import wrap_repository
|
||||
from decnet.telemetry import wrap_repository # type: ignore[attr-defined]
|
||||
except ImportError:
|
||||
return bus
|
||||
try:
|
||||
return cast(BaseBus, wrap_repository(bus))
|
||||
return wrap_repository(bus)
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return bus
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""In-process bus transports.
|
||||
|
||||
* :class:`FakeBus` — real pub/sub semantics without touching a socket. Used
|
||||
@@ -14,7 +13,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any, cast
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus.base import (
|
||||
BaseBus,
|
||||
@@ -51,7 +50,7 @@ class _FakeSubscription(Subscription):
|
||||
item = await self._queue.get()
|
||||
if item is _CLOSE_SENTINEL:
|
||||
raise StopAsyncIteration
|
||||
return cast(Event, item)
|
||||
return item
|
||||
|
||||
async def _aclose(self) -> None:
|
||||
self._bus._unregister(self)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Wire protocol for the DECNET bus UNIX-socket transport.
|
||||
|
||||
Frame layout:
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fire-and-forget publish helpers shared across every worker.
|
||||
|
||||
Lifted out of ``decnet/mutator/engine.py`` once a second caller showed up
|
||||
@@ -59,7 +58,7 @@ def make_thread_safe_publisher(
|
||||
contract the rest of this module already upholds.
|
||||
"""
|
||||
if bus is None:
|
||||
return lambda _topic, _payload, _event_type="": None # type: ignore[misc]
|
||||
return lambda _topic, _payload, _event_type="": None
|
||||
|
||||
def _publish(topic: str, payload: dict[str, Any], event_type: str = "") -> None:
|
||||
# Stream threads may keep draining after the bus owner closed it
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Canonical topic hierarchy for the DECNET ServiceBus.
|
||||
|
||||
Locked early so consumers can subscribe with stable wildcard patterns.
|
||||
@@ -18,7 +17,6 @@ Token structure (NATS-style, dot-separated):
|
||||
attacker.scored
|
||||
attacker.session.started
|
||||
attacker.session.ended
|
||||
attacker.observation.{primitive}
|
||||
identity.formed
|
||||
identity.observation.linked
|
||||
identity.merged
|
||||
@@ -30,18 +28,12 @@ Token structure (NATS-style, dot-separated):
|
||||
campaign.unmerged
|
||||
credential.captured
|
||||
credential.reuse.detected
|
||||
attribution.profile.state_changed
|
||||
attribution.profile.multi_actor_suspected
|
||||
canary.{token_id}.triggered
|
||||
canary.{token_id}.placed
|
||||
canary.{token_id}.revoked
|
||||
system.log
|
||||
system.bus.health
|
||||
system.{worker}.health
|
||||
email.received
|
||||
ttp.tagged
|
||||
ttp.rule.fired.{technique_id}
|
||||
ttp.rule.suppressed
|
||||
|
||||
Wildcards (per :func:`decnet.bus.base.matches`):
|
||||
|
||||
@@ -60,12 +52,8 @@ IDENTITY = "identity"
|
||||
CAMPAIGN = "campaign"
|
||||
SYSTEM = "system"
|
||||
CREDENTIAL = "credential"
|
||||
ATTRIBUTION = "attribution"
|
||||
ORCHESTRATOR = "orchestrator"
|
||||
CANARY = "canary"
|
||||
SMTP = "smtp"
|
||||
EMAIL = "email"
|
||||
TTP = "ttp"
|
||||
|
||||
|
||||
# ─── Leaf event-type constants (the last segment of each topic) ──────────────
|
||||
@@ -95,24 +83,6 @@ DECKY_MUTATE_REQUEST = "mutate_request"
|
||||
# syslog sidechannel too) to interleave substrate-change markers into
|
||||
# attacker traversals.
|
||||
DECKY_MUTATION = "mutation"
|
||||
# Per-service add/remove on a deployed decky (live; no full redeploy).
|
||||
# Payload carries ``decky_name``, ``service_name``, optional
|
||||
# ``topology_id``, and ``services`` (the post-mutation list). Consumers
|
||||
# that watch substrate shape (correlator, dashboard, profiler) reconcile
|
||||
# off these without waiting for the next decnet-state.json snapshot.
|
||||
DECKY_SERVICE_ADDED = "service_added"
|
||||
DECKY_SERVICE_REMOVED = "service_removed"
|
||||
# Per-service config change (the schema-driven Inspector form). Payload
|
||||
# carries ``decky_name``, ``service_name``, optional ``topology_id``,
|
||||
# ``service_config`` (the new validated dict), and ``recreated`` — true
|
||||
# when the operator hit Apply (container was force-recreated to pick up
|
||||
# the new env), false when they only hit Save (DB-only).
|
||||
DECKY_SERVICE_CONFIG_CHANGED = "service_config_changed"
|
||||
# Async deploy/mutate operation transitions
|
||||
# (pending/running/succeeded/failed). Payload: {lifecycle_id, operation,
|
||||
# status, error?}. UI polling endpoint is the source of truth; this
|
||||
# fires for live subscribers (dashboard, mutator-side audit, etc).
|
||||
DECKY_LIFECYCLE = "lifecycle"
|
||||
|
||||
# Attacker event types (second token under the ``attacker`` root). First
|
||||
# sighting, session boundary transitions, and score-threshold crossings
|
||||
@@ -120,27 +90,10 @@ DECKY_LIFECYCLE = "lifecycle"
|
||||
# the wildcard ``attacker.>``.
|
||||
ATTACKER_OBSERVED = "observed"
|
||||
ATTACKER_SCORED = "scored"
|
||||
# Published once per successful active probe result (JARM/HASSH/TCPfp/ipv6_leak).
|
||||
# Published once per successful active probe result (JARM/HASSH/TCPfp).
|
||||
# Distinct from ``observed`` which is the correlator's first-sight signal —
|
||||
# a fingerprint is additional evidence about an already-observed attacker.
|
||||
# Known payload ``kind`` discriminators carried in this topic:
|
||||
# "jarm" — JARM TLS server hash (prober)
|
||||
# "hassh" — HASSHServer SSH key-exchange hash (prober)
|
||||
# "tcpfp" — TCP/IP stack fingerprint hash (prober)
|
||||
# "tls_cert" — leaf TLS certificate SHA-256 (prober)
|
||||
# "ipv6_leak" — fe80:: link-local address observed via passive sniffer
|
||||
# or active ICMPv6 solicitation (prober + sniffer);
|
||||
# payload: {attacker_ip, addr, iid_kind, mac_oui, vector,
|
||||
# on_iface, observed_at}
|
||||
ATTACKER_FINGERPRINTED = "fingerprinted"
|
||||
# Published when the prober observes a NEW hash for an
|
||||
# (attacker_ip, port, probe_type) triple it has seen before — i.e. the
|
||||
# attacker rotated their VPS, rebuilt their SSH server, swapped their
|
||||
# TLS cert. Distinct from ``fingerprinted`` which fires on every probe
|
||||
# result; ``fingerprint_rotated`` fires only on diff and carries both
|
||||
# old_hash + new_hash. Producer: prober (via the rotation library);
|
||||
# consumers: dashboard, forensics, attribution clustering.
|
||||
ATTACKER_FINGERPRINT_ROTATED = "fingerprint_rotated"
|
||||
ATTACKER_SESSION_STARTED = "session.started"
|
||||
ATTACKER_SESSION_ENDED = "session.ended"
|
||||
# Published by the ``decnet enrich`` worker after an enrichment pass
|
||||
@@ -148,19 +101,6 @@ ATTACKER_SESSION_ENDED = "session.ended"
|
||||
# returned a verdict). Payload carries the aggregate verdict + per-
|
||||
# provider summary so SIEM-bound webhooks don't need to re-query the DB.
|
||||
ATTACKER_INTEL_ENRICHED = "intel.enriched"
|
||||
# Per-primitive BEHAVE-SHELL observation. Full topic shape:
|
||||
# attacker.observation.<primitive>
|
||||
# e.g. ``attacker.observation.motor.input_modality``. Producer:
|
||||
# ``decnet/profiler/behave_shell/`` (extractor library called from the
|
||||
# profiler worker on ``attacker.session.ended``); consumers: dashboard
|
||||
# SSE relay, attribution engine state machine, federation gossip
|
||||
# (post-v0). See development/BEHAVE-INTEGRATION.md §"Bus topics" for
|
||||
# the wire-format contract — the prefix is documentation + pattern
|
||||
# match only; bus auth is socket file perms (DEBT-029 §2), not
|
||||
# topic-level. The ``primitive`` segment MAY contain dots
|
||||
# (``motor.shell_mastery.tab_completion``) — the same dotted-leaf
|
||||
# rule that ``attacker.session.ended`` uses.
|
||||
ATTACKER_OBSERVATION_PREFIX = "observation"
|
||||
|
||||
# Identity-resolution event types (second/third tokens under ``identity``).
|
||||
# Published by the (future) clusterer worker — see
|
||||
@@ -228,42 +168,6 @@ CAMPAIGN_UNMERGED = "unmerged"
|
||||
CREDENTIAL_CAPTURED = "captured"
|
||||
CREDENTIAL_REUSE_DETECTED = "reuse.detected"
|
||||
|
||||
# Attribution-engine event types (second/third tokens under
|
||||
# ``attribution``). Published by the v0 attribution worker
|
||||
# (``decnet.correlation.attribution_worker``) which subscribes to
|
||||
# ``attacker.observation.>`` and runs the per-(identity, primitive)
|
||||
# state machine. See ``development/ATTRIBUTION-ENGINE.md``.
|
||||
#
|
||||
# attribution.profile.state_changed — per-primitive state
|
||||
# transition (e.g.
|
||||
# stable → drifting).
|
||||
# Payload: identity_uuid,
|
||||
# primitive, old_state,
|
||||
# new_state, current_value,
|
||||
# confidence,
|
||||
# observation_count, ts.
|
||||
# attribution.profile.multi_actor_suspected — fires when ≥ 2
|
||||
# primitives flag the same
|
||||
# identity as multi_actor
|
||||
# concurrently. Cross-
|
||||
# primitive correlator;
|
||||
# single-primitive
|
||||
# multi_actor is too noisy
|
||||
# on its own. Payload:
|
||||
# identity_uuid, primitives,
|
||||
# evidence_summary,
|
||||
# confidence, ts.
|
||||
#
|
||||
# These are *derived* signals — distinct from
|
||||
# ``identity.*`` (clusterer lifecycle, IDENTITY_RESOLUTION.md) and
|
||||
# ``attacker.observation.*`` (raw extractor envelopes,
|
||||
# BEHAVE-INTEGRATION.md). The three families compose: observations feed
|
||||
# the attribution engine, the engine emits derived state, the clusterer
|
||||
# reads observations + state to form / merge identities.
|
||||
ATTRIBUTION_PROFILE_PREFIX = "profile"
|
||||
ATTRIBUTION_PROFILE_STATE_CHANGED = "profile.state_changed"
|
||||
ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED = "profile.multi_actor_suspected"
|
||||
|
||||
# Canary-token event types (third token under ``canary``).
|
||||
#
|
||||
# canary.{token_id}.placed — orchestrator/API successfully planted a
|
||||
@@ -327,43 +231,6 @@ WORKER_CONTROL_START = "start"
|
||||
# of patterns. Payload is currently empty; consumers only need the signal.
|
||||
WEBHOOK_SUBSCRIPTIONS_CHANGED = "system.webhook.subscriptions_changed"
|
||||
|
||||
# Email-receipt event — fired by smtp / smtp-relay services on full-message
|
||||
# receipt (envelope + headers + body + attachments captured). Single-token
|
||||
# leaf so the bus tokenizer accepts it directly under the ``email`` root.
|
||||
# Consumed by the TTP ``email_lifter`` for header / body-pattern / attachment
|
||||
# rules. PII rule (TTP_TAGGING.md "Hard parts §6"): payload carries hashes,
|
||||
# counts, header names, and rcpt-domain sets — never rcpt addresses or body
|
||||
# bytes.
|
||||
EMAIL_RECEIVED = "received"
|
||||
|
||||
# TTP-tagging event types (second/third tokens under ``ttp``).
|
||||
#
|
||||
# ttp.tagged — one or more new tags written. Published
|
||||
# only when ``INSERT OR IGNORE`` wrote at
|
||||
# least one new row; idempotent
|
||||
# re-evaluations publish nothing
|
||||
# (loop-prevention invariant — see
|
||||
# TTP_TAGGING.md).
|
||||
# ttp.rule.fired.{technique_id} — per-technique fan-out for SIEM
|
||||
# consumers that subscribe to a single
|
||||
# technique. Topic key is the parent
|
||||
# technique; sub_technique is in the
|
||||
# payload. Built via :func:`ttp_rule_fired`.
|
||||
# ttp.rule.suppressed — rule fired but the tag was dropped
|
||||
# (confidence below floor, rate-limited,
|
||||
# or the rule's RuleState was disabled).
|
||||
# Observability signal for the dashboard.
|
||||
#
|
||||
# Per-rule reload + state-change topics. Built via
|
||||
# :func:`ttp_rule_reloaded` / :func:`ttp_rule_state`; SIEM consumers
|
||||
# subscribe to ``ttp.rule.reloaded.>`` (every rule) or
|
||||
# ``ttp.rule.reloaded.R0001`` (one rule) at their preferred granularity.
|
||||
TTP_TAGGED = "tagged"
|
||||
TTP_RULE_FIRED = "rule.fired"
|
||||
TTP_RULE_SUPPRESSED = "rule.suppressed"
|
||||
TTP_RULE_RELOADED = "rule.reloaded"
|
||||
TTP_RULE_STATE = "rule.state"
|
||||
|
||||
|
||||
# ─── Builders ────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -397,12 +264,6 @@ def decky_mutation(decky_id: str) -> str:
|
||||
return f"{DECKY}.{decky_id}.{DECKY_MUTATION}"
|
||||
|
||||
|
||||
def decky_lifecycle(decky_id: str) -> str:
|
||||
"""Build ``decky.<id>.lifecycle``."""
|
||||
_reject_tokens(decky_id)
|
||||
return f"{DECKY}.{decky_id}.{DECKY_LIFECYCLE}"
|
||||
|
||||
|
||||
def system(event_type: str) -> str:
|
||||
"""Build ``system.<event_type>``.
|
||||
|
||||
@@ -440,42 +301,6 @@ def attacker(event_type: str) -> str:
|
||||
return f"{ATTACKER}.{event_type}"
|
||||
|
||||
|
||||
def attacker_observation(primitive: str) -> str:
|
||||
"""Build ``attacker.observation.<primitive>``.
|
||||
|
||||
*primitive* is the fully-qualified BEHAVE-SHELL primitive path
|
||||
(e.g. ``motor.input_modality``,
|
||||
``cognitive.feedback_loop_engagement``,
|
||||
``motor.shell_mastery.tab_completion``). Dotted primitives are
|
||||
permitted — this matches the format
|
||||
``behave_shell.spec.event_adapter.event_topic_for`` produces
|
||||
upstream, and DECNET's bus admits the dotted leaf the same way
|
||||
:func:`attacker` does for ``session.started``.
|
||||
|
||||
Empty string is rejected so a downstream typo doesn't ship as
|
||||
``attacker.observation.``.
|
||||
"""
|
||||
if not primitive:
|
||||
raise ValueError(
|
||||
"attacker_observation topic requires a non-empty primitive",
|
||||
)
|
||||
return f"{ATTACKER}.{ATTACKER_OBSERVATION_PREFIX}.{primitive}"
|
||||
|
||||
|
||||
def attribution(event_type: str) -> str:
|
||||
"""Build ``attribution.<event_type>``.
|
||||
|
||||
*event_type* is typically one of
|
||||
:data:`ATTRIBUTION_PROFILE_STATE_CHANGED` or
|
||||
:data:`ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED` — both contain a
|
||||
dot (``profile.state_changed``) which is permitted under the same
|
||||
"trailing dotted leaf" rule that ``attacker.session.started`` uses.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("attribution topic requires a non-empty event_type")
|
||||
return f"{ATTRIBUTION}.{event_type}"
|
||||
|
||||
|
||||
def campaign(event_type: str) -> str:
|
||||
"""Build ``campaign.<event_type>``.
|
||||
|
||||
@@ -556,86 +381,6 @@ def system_control(worker: str) -> str:
|
||||
return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}"
|
||||
|
||||
|
||||
def smtp(event_type: str) -> str:
|
||||
"""Build ``smtp.<event_type>``.
|
||||
|
||||
*event_type* may contain dots (e.g. ``probe.pending``).
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("smtp topic requires a non-empty event_type")
|
||||
return f"{SMTP}.{event_type}"
|
||||
|
||||
|
||||
def email_topic(event_type: str) -> str:
|
||||
"""Build ``email.<event_type>``.
|
||||
|
||||
Named ``email_topic`` rather than ``email`` to avoid shadowing the
|
||||
Python ``email`` stdlib package at import sites that pull both.
|
||||
*event_type* is typically :data:`EMAIL_RECEIVED`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("email topic requires a non-empty event_type")
|
||||
return f"{EMAIL}.{event_type}"
|
||||
|
||||
|
||||
def ttp(event_type: str) -> str:
|
||||
"""Build ``ttp.<event_type>``.
|
||||
|
||||
*event_type* is typically one of :data:`TTP_TAGGED`,
|
||||
:data:`TTP_RULE_FIRED`, or :data:`TTP_RULE_SUPPRESSED`. Dotted
|
||||
leaves (``rule.fired``) are permitted — same rationale as
|
||||
:func:`system`. For per-technique fan-out use
|
||||
:func:`ttp_rule_fired`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("ttp topic requires a non-empty event_type")
|
||||
return f"{TTP}.{event_type}"
|
||||
|
||||
|
||||
def ttp_rule_fired(technique_id: str) -> str:
|
||||
"""Build ``ttp.rule.fired.<technique_id>``.
|
||||
|
||||
Per-technique fan-out: SIEM subscribers can listen on
|
||||
``ttp.rule.fired.>`` for everything, ``ttp.rule.fired.T1110`` for
|
||||
one technique. *technique_id* is validated as a single segment —
|
||||
sub-techniques like ``T1110.001`` are rejected because they would
|
||||
split into two tokens. The topic key is the parent technique;
|
||||
``sub_technique_id`` lives in the payload.
|
||||
"""
|
||||
_reject_tokens(technique_id)
|
||||
return f"{TTP}.rule.fired.{technique_id}"
|
||||
|
||||
|
||||
def ttp_rule_reloaded(rule_id: str) -> str:
|
||||
"""Build ``ttp.rule.reloaded.<rule_id>``.
|
||||
|
||||
Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
|
||||
when a rule's *definition* changes (YAML edit on the filesystem
|
||||
backend, ``ttp_rule`` row update on the database backend). One event
|
||||
per per-rule edit — never batched (the "incremental, never batched"
|
||||
property in TTP_TAGGING.md §"Bus topics" inherits its granularity
|
||||
from :meth:`RuleStore.subscribe_changes`).
|
||||
|
||||
Subscribers: ``ttp.rule.reloaded.>`` for every rule,
|
||||
``ttp.rule.reloaded.R0001`` for one. *rule_id* is validated as a
|
||||
single segment.
|
||||
"""
|
||||
_reject_tokens(rule_id)
|
||||
return f"{TTP}.{TTP_RULE_RELOADED}.{rule_id}"
|
||||
|
||||
|
||||
def ttp_rule_state(rule_id: str) -> str:
|
||||
"""Build ``ttp.rule.state.<rule_id>``.
|
||||
|
||||
Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
|
||||
when a rule's *operational state* changes (operator hits the disable
|
||||
button, an ``expires_at`` TTL fires and auto-reverts the state).
|
||||
*rule_id* is validated as a single segment.
|
||||
"""
|
||||
_reject_tokens(rule_id)
|
||||
return f"{TTP}.{TTP_RULE_STATE}.{rule_id}"
|
||||
|
||||
|
||||
def _reject_tokens(*parts: str) -> None:
|
||||
"""Reject topic segments that would break NATS-style tokenization.
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""UNIX-socket client — :class:`UnixSocketBus` implementation of :class:`BaseBus`.
|
||||
|
||||
Holds one open socket to the local :class:`~decnet.bus.unix_server.BusServer`.
|
||||
@@ -26,7 +25,7 @@ import asyncio
|
||||
import contextlib
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Any, cast
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus import protocol
|
||||
from decnet.bus.base import (
|
||||
@@ -61,7 +60,7 @@ class _UnixSubscription(Subscription):
|
||||
item = await self._queue.get()
|
||||
if item is _CLOSE_SENTINEL:
|
||||
raise StopAsyncIteration
|
||||
return cast(Event, item)
|
||||
return item
|
||||
|
||||
async def _aclose(self) -> None:
|
||||
await self._bus._unregister(self)
|
||||
@@ -105,25 +104,14 @@ class UnixSocketBus(BaseBus):
|
||||
# ─── Lifecycle ──────────────────────────────────────────────────────────
|
||||
|
||||
async def connect(self) -> None:
|
||||
# Double-checked locking: the cheap unlocked check fast-paths the
|
||||
# already-connected case, but the actual connect must hold ``_lock``
|
||||
# so two coroutines racing on a fresh bus (e.g. concurrent
|
||||
# publish()/subscribe() both lazily calling connect()) can't each
|
||||
# open a socket and spawn a reader task — the loser would orphan a
|
||||
# live FD and an uncancelled reader_loop that close() never reaps.
|
||||
if self._writer is not None:
|
||||
return
|
||||
async with self._lock:
|
||||
# Re-check under the lock: a racing caller may have connected
|
||||
# while we awaited the lock.
|
||||
if self._writer is not None:
|
||||
return
|
||||
if self._closed:
|
||||
raise RuntimeError("connect on closed bus")
|
||||
self._reader, self._writer = await asyncio.open_unix_connection(str(self._path))
|
||||
await self._send(protocol.encode(protocol.HELLO, args=self._client_name))
|
||||
self._reader_task = asyncio.create_task(self._reader_loop())
|
||||
log.debug("bus.client: connected to %s as %s", self._path, self._client_name)
|
||||
if self._closed:
|
||||
raise RuntimeError("connect on closed bus")
|
||||
self._reader, self._writer = await asyncio.open_unix_connection(str(self._path))
|
||||
await self._send(protocol.encode(protocol.HELLO, args=self._client_name))
|
||||
self._reader_task = asyncio.create_task(self._reader_loop())
|
||||
log.debug("bus.client: connected to %s as %s", self._path, self._client_name)
|
||||
|
||||
async def close(self) -> None:
|
||||
if self._closed:
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""UNIX-socket server for the DECNET bus.
|
||||
|
||||
One :class:`BusServer` per host. Accepts local connections on a UNIX-domain
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""``decnet bus`` worker entrypoint.
|
||||
|
||||
Starts a :class:`~decnet.bus.unix_server.BusServer` on the configured UNIX
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Canary tokens — decoy artifacts planted in decky filesystems.
|
||||
|
||||
Public surface is exported here so callers can ``from decnet.canary
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
// Node helper invoked by decnet.canary.obfuscator.
|
||||
// Reads {code, options} JSON from stdin, writes obfuscated JS to stdout.
|
||||
// Kept dependency-light on purpose: only javascript-obfuscator.
|
||||
const JsObf = require('javascript-obfuscator');
|
||||
|
||||
let raw = '';
|
||||
process.stdin.setEncoding('utf8');
|
||||
process.stdin.on('data', (chunk) => { raw += chunk; });
|
||||
process.stdin.on('end', () => {
|
||||
try {
|
||||
const { code, options } = JSON.parse(raw);
|
||||
const result = JsObf.obfuscate(code, options || {});
|
||||
process.stdout.write(result.getObfuscatedCode());
|
||||
} catch (e) {
|
||||
process.stderr.write(String(e && e.stack || e));
|
||||
process.exit(2);
|
||||
}
|
||||
});
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Canary generator / instrumenter ABCs and the artifact dataclass.
|
||||
|
||||
Two flavors of producer share the same return shape:
|
||||
@@ -101,12 +100,6 @@ class CanaryArtifact:
|
||||
planting. Never leaked to the attacker-facing surface.
|
||||
"""
|
||||
|
||||
fingerprint_nonce: Optional[str] = None
|
||||
"""Per-mint HMAC nonce for fingerprint canaries; ``None`` for everything
|
||||
else. Cultivator reads this and persists it on ``CanaryToken.fingerprint_nonce``
|
||||
so the worker can validate incoming ``?k=`` params.
|
||||
"""
|
||||
|
||||
|
||||
class CanaryGenerator(ABC):
|
||||
"""Produces a fake artifact from scratch."""
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Realism contract adapter for canary generators.
|
||||
|
||||
Stage 7 of the realism migration. The orchestrator's planner picks a
|
||||
@@ -47,8 +46,6 @@ _CLASS_TO_GENERATOR: dict[ContentClass, str] = {
|
||||
ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
|
||||
ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
|
||||
ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
|
||||
ContentClass.CANARY_FINGERPRINT_HTML: "fingerprint_html",
|
||||
ContentClass.CANARY_FINGERPRINT_SVG: "fingerprint_svg",
|
||||
}
|
||||
|
||||
|
||||
@@ -65,8 +62,6 @@ _GENERATOR_TO_KIND: dict[str, str] = {
|
||||
"honeydoc_pdf": "http",
|
||||
"ssh_key": "dns", # trip is DNS resolution of host comment
|
||||
"mysql_dump": "dns", # trip is DNS resolution of subdomain
|
||||
"fingerprint_html": "http", # obfuscated JS beacons GET /c/<slug>
|
||||
"fingerprint_svg": "http", # same, embedded inside SVG <script>
|
||||
}
|
||||
|
||||
|
||||
@@ -83,8 +78,6 @@ _DEFAULT_PATH: dict[ContentClass, str] = {
|
||||
ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
|
||||
ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
|
||||
ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
|
||||
ContentClass.CANARY_FINGERPRINT_HTML: "/home/{persona}/Documents/asset_directory.html",
|
||||
ContentClass.CANARY_FINGERPRINT_SVG: "/home/{persona}/Documents/network_topology.svg",
|
||||
}
|
||||
|
||||
|
||||
@@ -143,12 +136,10 @@ async def cultivate(
|
||||
)
|
||||
|
||||
callback_token = _new_callback_token()
|
||||
http_base_str: str = http_base or os.environ.get("DECNET_CANARY_HTTP_BASE") or ""
|
||||
dns_zone_str: str = dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE") or ""
|
||||
ctx = CanaryContext(
|
||||
callback_token=callback_token,
|
||||
http_base=http_base_str,
|
||||
dns_zone=dns_zone_str,
|
||||
http_base=http_base or os.environ.get("DECNET_CANARY_HTTP_BASE", ""),
|
||||
dns_zone=dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE", ""),
|
||||
persona="linux", # all our deckies are POSIX in MVP
|
||||
)
|
||||
generator = get_generator(gen_name)
|
||||
@@ -163,7 +154,7 @@ async def cultivate(
|
||||
# attribute a callback if the artifact trips during the plant
|
||||
# itself (improbable but possible — DOCX viewers can preview
|
||||
# autoplay-style).
|
||||
token_data: dict = {
|
||||
await repo.create_canary_token({
|
||||
"kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
|
||||
"decky_name": plan.decky_name,
|
||||
"instrumenter": None,
|
||||
@@ -174,10 +165,7 @@ async def cultivate(
|
||||
"placed_at": datetime.now(timezone.utc),
|
||||
"created_by": created_by,
|
||||
"state": "planted",
|
||||
}
|
||||
if artifact.fingerprint_nonce is not None:
|
||||
token_data["fingerprint_nonce"] = artifact.fingerprint_nonce
|
||||
await repo.create_canary_token(token_data)
|
||||
})
|
||||
|
||||
# Carry the placement_path on the artifact so the orchestrator's
|
||||
# plant_file call uses it. We don't mutate the generator's
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Minimal authoritative DNS server for canary tokens (stdlib only).
|
||||
|
||||
We don't need a full resolver — only enough to:
|
||||
@@ -132,7 +131,7 @@ def _build_response(
|
||||
question = qname_bytes + struct.pack("!HH", query.qtype, query.qclass)
|
||||
|
||||
answer = b""
|
||||
if an_count and answer_ip is not None:
|
||||
if an_count:
|
||||
# Use a name pointer back to the question (offset 12).
|
||||
ptr = struct.pack("!H", 0xC000 | 12)
|
||||
rdata = bytes(int(o) for o in answer_ip.split("."))
|
||||
@@ -170,10 +169,10 @@ class CanaryDNSProtocol(asyncio.DatagramProtocol):
|
||||
self._answer_ip = answer_ip
|
||||
self._transport: Optional[asyncio.DatagramTransport] = None
|
||||
|
||||
def connection_made(self, transport) -> None:
|
||||
self._transport = transport
|
||||
def connection_made(self, transport) -> None: # type: ignore[override]
|
||||
self._transport = transport # type: ignore[assignment]
|
||||
|
||||
def datagram_received(
|
||||
def datagram_received( # type: ignore[override]
|
||||
self, data: bytes, addr: Tuple[str, int],
|
||||
) -> None:
|
||||
try:
|
||||
@@ -191,7 +190,7 @@ class CanaryDNSProtocol(asyncio.DatagramProtocol):
|
||||
return
|
||||
# Known name — answer with our sinkhole IP, then fire the hook.
|
||||
self._send(addr, _build_response(query, answer_ip=self._answer_ip))
|
||||
asyncio.ensure_future(self._hook(slug, query, addr[0]))
|
||||
asyncio.create_task(self._hook(slug, query, addr[0]))
|
||||
|
||||
def _slug_for(self, qname: str) -> Optional[str]:
|
||||
if not self._zone or not qname.endswith(self._suffix):
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Generator and instrumenter factories.
|
||||
|
||||
Same lazy-import pattern as :mod:`decnet.intel.factory` — concrete
|
||||
@@ -22,8 +21,6 @@ KNOWN_GENERATORS: Tuple[str, ...] = (
|
||||
"honeydoc_docx",
|
||||
"honeydoc_pdf",
|
||||
"mysql_dump",
|
||||
"fingerprint_html",
|
||||
"fingerprint_svg",
|
||||
)
|
||||
|
||||
KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
|
||||
@@ -67,16 +64,6 @@ def get_generator(name: str) -> CanaryGenerator:
|
||||
if name == "mysql_dump":
|
||||
from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
|
||||
return MySQLDumpGenerator()
|
||||
if name == "fingerprint_html":
|
||||
from decnet.canary.generators.fingerprint_html import (
|
||||
FingerprintHtmlGenerator,
|
||||
)
|
||||
return FingerprintHtmlGenerator()
|
||||
if name == "fingerprint_svg":
|
||||
from decnet.canary.generators.fingerprint_svg import (
|
||||
FingerprintSvgGenerator,
|
||||
)
|
||||
return FingerprintSvgGenerator()
|
||||
raise ValueError(
|
||||
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
|
||||
)
|
||||
|
||||
@@ -1,292 +0,0 @@
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
// Canary fingerprint payload — the JS that runs inside an opened HTML/SVG
|
||||
// canary, harvests browser primitives, and beacons the result back to the
|
||||
// canary worker. Ported from canary-self-test.html with the rendering UI
|
||||
// stripped out.
|
||||
//
|
||||
// Three placeholders are substituted by the Python builder BEFORE
|
||||
// javascript-obfuscator runs:
|
||||
//
|
||||
// {{BEACON_URL}} → full URL to /c/<callback_token> (no trailing slash)
|
||||
// {{MINT_UUID}} → per-mint UUID, baked into the string-array post-obf
|
||||
// {{MINT_NONCE}} → 16-hex HMAC nonce; the worker rejects ?d=/?o= without it
|
||||
//
|
||||
// Beacon strategy (MVP): a bare GET pixel for "I was opened" reliability,
|
||||
// then a fingerprint payload sent as a base64-URL query param on a second
|
||||
// GET so the existing worker records the hit even before step-4 POST
|
||||
// support lands. Both fail-open: any error short-circuits to next step.
|
||||
|
||||
(async function () {
|
||||
var BEACON_URL = "{{BEACON_URL}}";
|
||||
var MINT_UUID = "{{MINT_UUID}}";
|
||||
var MINT_NONCE = "{{MINT_NONCE}}";
|
||||
var fp = { mint: MINT_UUID };
|
||||
|
||||
function fire(url) {
|
||||
try {
|
||||
var img = new Image();
|
||||
img.src = url;
|
||||
} catch (e) { /* swallow */ }
|
||||
}
|
||||
|
||||
// 1) bare-open beacon — fires regardless of whether the rest succeeds
|
||||
fire(BEACON_URL + "?o=1&k=" + MINT_NONCE);
|
||||
|
||||
function sha256(str) {
|
||||
var buf = new TextEncoder().encode(str);
|
||||
return crypto.subtle.digest("SHA-256", buf).then(function (h) {
|
||||
return Array.from(new Uint8Array(h))
|
||||
.map(function (b) { return b.toString(16).padStart(2, "0"); })
|
||||
.join("");
|
||||
});
|
||||
}
|
||||
|
||||
// navigator
|
||||
try {
|
||||
fp.nav = {
|
||||
ua: navigator.userAgent,
|
||||
pl: navigator.platform,
|
||||
lg: navigator.language,
|
||||
lgs: (navigator.languages || []).join(","),
|
||||
ck: navigator.cookieEnabled,
|
||||
dnt: navigator.doNotTrack,
|
||||
hc: navigator.hardwareConcurrency,
|
||||
dm: navigator.deviceMemory || null,
|
||||
tp: navigator.maxTouchPoints,
|
||||
wd: navigator.webdriver === true,
|
||||
pdf: navigator.pdfViewerEnabled || null,
|
||||
};
|
||||
} catch (e) { fp.nav = { err: String(e) }; }
|
||||
|
||||
// screen
|
||||
try {
|
||||
fp.scr = {
|
||||
w: screen.width, h: screen.height,
|
||||
aw: screen.availWidth, ah: screen.availHeight,
|
||||
cd: screen.colorDepth, pd: screen.pixelDepth,
|
||||
dpr: window.devicePixelRatio,
|
||||
iw: window.innerWidth, ih: window.innerHeight,
|
||||
or: (screen.orientation && screen.orientation.type) || null,
|
||||
};
|
||||
} catch (e) { fp.scr = { err: String(e) }; }
|
||||
|
||||
// tz / locale
|
||||
try {
|
||||
var dtf = Intl.DateTimeFormat().resolvedOptions();
|
||||
fp.tz = {
|
||||
z: dtf.timeZone, lc: dtf.locale,
|
||||
ca: dtf.calendar, ns: dtf.numberingSystem,
|
||||
off: new Date().getTimezoneOffset(),
|
||||
};
|
||||
} catch (e) { fp.tz = { err: String(e) }; }
|
||||
|
||||
// connection
|
||||
try {
|
||||
var c = navigator.connection;
|
||||
fp.cn = c ? {
|
||||
t: c.effectiveType, dl: c.downlink, rtt: c.rtt, sd: c.saveData,
|
||||
} : null;
|
||||
} catch (e) { fp.cn = { err: String(e) }; }
|
||||
|
||||
// canvas
|
||||
try {
|
||||
var cv = document.createElement("canvas");
|
||||
cv.width = 280; cv.height = 60;
|
||||
var ctx = cv.getContext("2d");
|
||||
ctx.textBaseline = "top";
|
||||
ctx.font = "14px Arial";
|
||||
ctx.fillStyle = "#f60";
|
||||
ctx.fillRect(125, 1, 62, 20);
|
||||
ctx.fillStyle = "#069";
|
||||
ctx.fillText("c-" + String.fromCharCode(0x1f600), 2, 15);
|
||||
ctx.fillStyle = "rgba(102,204,0,0.7)";
|
||||
ctx.fillText("c-" + String.fromCharCode(0x1f600), 4, 17);
|
||||
var dataURL = cv.toDataURL();
|
||||
fp.cv = { h: await sha256(dataURL), n: dataURL.length };
|
||||
} catch (e) { fp.cv = { err: String(e) }; }
|
||||
|
||||
// webgl
|
||||
try {
|
||||
var gc = document.createElement("canvas");
|
||||
var gl = gc.getContext("webgl") || gc.getContext("experimental-webgl");
|
||||
if (gl) {
|
||||
var ext = gl.getExtension("WEBGL_debug_renderer_info");
|
||||
fp.gl = {
|
||||
v: gl.getParameter(gl.VENDOR),
|
||||
r: gl.getParameter(gl.RENDERER),
|
||||
ver: gl.getParameter(gl.VERSION),
|
||||
sl: gl.getParameter(gl.SHADING_LANGUAGE_VERSION),
|
||||
uv: ext ? gl.getParameter(ext.UNMASKED_VENDOR_WEBGL) : null,
|
||||
ur: ext ? gl.getParameter(ext.UNMASKED_RENDERER_WEBGL) : null,
|
||||
};
|
||||
} else { fp.gl = { err: "unavailable" }; }
|
||||
} catch (e) { fp.gl = { err: String(e) }; }
|
||||
|
||||
// audio
|
||||
try {
|
||||
var ACtx = window.OfflineAudioContext || window.webkitOfflineAudioContext;
|
||||
if (ACtx) {
|
||||
var actx = new ACtx(1, 44100, 44100);
|
||||
var osc = actx.createOscillator();
|
||||
var cmp = actx.createDynamicsCompressor();
|
||||
osc.type = "triangle"; osc.frequency.value = 10000;
|
||||
cmp.threshold.value = -50; cmp.knee.value = 40;
|
||||
cmp.ratio.value = 12; cmp.attack.value = 0; cmp.release.value = 0.25;
|
||||
osc.connect(cmp); cmp.connect(actx.destination);
|
||||
osc.start(0);
|
||||
var buf = await actx.startRendering();
|
||||
var data = buf.getChannelData(0).slice(4500, 5000);
|
||||
var sum = 0;
|
||||
for (var i = 0; i < data.length; i++) sum += Math.abs(data[i]);
|
||||
fp.au = { h: await sha256(sum.toString()), s: sum.toFixed(8) };
|
||||
} else { fp.au = { err: "unavailable" }; }
|
||||
} catch (e) { fp.au = { err: String(e) }; }
|
||||
|
||||
// fonts
|
||||
try {
|
||||
var bases = ["monospace", "sans-serif", "serif"];
|
||||
var tests = [
|
||||
"Arial", "Helvetica", "Times New Roman", "Courier New", "Verdana",
|
||||
"Georgia", "Trebuchet MS", "Comic Sans MS", "Impact",
|
||||
"Calibri", "Cambria", "Consolas", "Segoe UI", "Tahoma",
|
||||
"JetBrains Mono", "Fira Code", "Cascadia Code", "SF Mono",
|
||||
"Menlo", "Monaco", "Source Code Pro", "Inconsolata", "Hack",
|
||||
"San Francisco", "Helvetica Neue", "Lucida Grande",
|
||||
"DejaVu Sans", "DejaVu Sans Mono", "Liberation Sans",
|
||||
"Liberation Mono", "Ubuntu", "Ubuntu Mono", "Roboto",
|
||||
"Noto Sans", "Noto Mono",
|
||||
"Microsoft YaHei", "SimSun", "PingFang SC", "Hiragino Sans",
|
||||
"Hiragino Kaku Gothic Pro", "Yu Gothic", "Meiryo",
|
||||
"Malgun Gothic", "Noto Sans CJK",
|
||||
"Adobe Garamond Pro", "Myriad Pro", "Minion Pro",
|
||||
"Bahnschrift", "Cyberpunk",
|
||||
];
|
||||
var sp = document.createElement("span");
|
||||
sp.style.fontSize = "72px";
|
||||
sp.style.position = "absolute";
|
||||
sp.style.left = "-9999px";
|
||||
sp.innerHTML = "mmmmmmmmmmlli";
|
||||
document.body.appendChild(sp);
|
||||
var bs = {};
|
||||
for (var bi = 0; bi < bases.length; bi++) {
|
||||
sp.style.fontFamily = bases[bi];
|
||||
bs[bases[bi]] = { w: sp.offsetWidth, h: sp.offsetHeight };
|
||||
}
|
||||
var det = [];
|
||||
for (var ti = 0; ti < tests.length; ti++) {
|
||||
for (var bj = 0; bj < bases.length; bj++) {
|
||||
sp.style.fontFamily = "'" + tests[ti] + "'," + bases[bj];
|
||||
if (sp.offsetWidth !== bs[bases[bj]].w ||
|
||||
sp.offsetHeight !== bs[bases[bj]].h) {
|
||||
det.push(tests[ti]); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
document.body.removeChild(sp);
|
||||
fp.ft = {
|
||||
h: await sha256(det.slice().sort().join(",")),
|
||||
n: det.length, t: tests.length, d: det,
|
||||
};
|
||||
} catch (e) { fp.ft = { err: String(e) }; }
|
||||
|
||||
// webrtc local ip leak
|
||||
try {
|
||||
var ips = {}; var cands = [];
|
||||
var RPC = window.RTCPeerConnection || window.webkitRTCPeerConnection ||
|
||||
window.mozRTCPeerConnection;
|
||||
if (RPC) {
|
||||
var pc = new RPC({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
|
||||
pc.createDataChannel("");
|
||||
pc.onicecandidate = function (e) {
|
||||
if (!e.candidate) return;
|
||||
cands.push(e.candidate.candidate);
|
||||
var m = e.candidate.candidate.match(
|
||||
/(\d+\.\d+\.\d+\.\d+|[a-f0-9:]+::[a-f0-9:]+)/);
|
||||
if (m) ips[m[1]] = 1;
|
||||
};
|
||||
var off = await pc.createOffer();
|
||||
await pc.setLocalDescription(off);
|
||||
await new Promise(function (r) { setTimeout(r, 1500); });
|
||||
pc.close();
|
||||
fp.rtc = { ip: Object.keys(ips), n: cands.length, c: cands.slice(0, 3) };
|
||||
} else { fp.rtc = { err: "unavailable" }; }
|
||||
} catch (e) { fp.rtc = { err: String(e) }; }
|
||||
|
||||
// battery
|
||||
try {
|
||||
if (navigator.getBattery) {
|
||||
var bat = await navigator.getBattery();
|
||||
fp.bt = {
|
||||
c: bat.charging, l: bat.level,
|
||||
ct: bat.chargingTime === Infinity ? "inf" : bat.chargingTime,
|
||||
dt: bat.dischargingTime === Infinity ? "inf" : bat.dischargingTime,
|
||||
};
|
||||
} else { fp.bt = { err: "unavailable" }; }
|
||||
} catch (e) { fp.bt = { err: String(e) }; }
|
||||
|
||||
// perf timing jitter
|
||||
try {
|
||||
var samples = [];
|
||||
for (var pi = 0; pi < 1000; pi++) {
|
||||
var pa = performance.now();
|
||||
var x = 0;
|
||||
for (var pj = 0; pj < 1000; pj++) x += Math.sqrt(pj);
|
||||
samples.push(performance.now() - pa);
|
||||
}
|
||||
samples.sort(function (a, b) { return a - b; });
|
||||
fp.pf = {
|
||||
med: samples[500].toFixed(4),
|
||||
p95: samples[950].toFixed(4),
|
||||
mn: samples[0].toFixed(4),
|
||||
mx: samples[999].toFixed(4),
|
||||
};
|
||||
} catch (e) { fp.pf = { err: String(e) }; }
|
||||
|
||||
// permissions
|
||||
try {
|
||||
if (navigator.permissions) {
|
||||
var names = ["geolocation", "notifications", "camera", "microphone",
|
||||
"persistent-storage", "clipboard-read", "clipboard-write"];
|
||||
var st = {};
|
||||
for (var ni = 0; ni < names.length; ni++) {
|
||||
try {
|
||||
var r = await navigator.permissions.query({ name: names[ni] });
|
||||
st[names[ni]] = r.state;
|
||||
} catch (e) { st[names[ni]] = "unsupported"; }
|
||||
}
|
||||
fp.pm = st;
|
||||
} else { fp.pm = { err: "unavailable" }; }
|
||||
} catch (e) { fp.pm = { err: String(e) }; }
|
||||
|
||||
// composite identity hash — stable inputs only
|
||||
try {
|
||||
var stable = [
|
||||
fp.cv && fp.cv.h, fp.au && fp.au.h, fp.ft && fp.ft.h,
|
||||
fp.gl && fp.gl.ur, fp.nav && fp.nav.pl,
|
||||
fp.nav && fp.nav.hc, fp.tz && fp.tz.z,
|
||||
fp.scr && (fp.scr.w + "x" + fp.scr.h),
|
||||
].filter(Boolean).join("|");
|
||||
fp.id = await sha256(stable);
|
||||
} catch (e) { fp.id = { err: String(e) }; }
|
||||
|
||||
// 2) ship the payload as base64url JSON on a GET query param.
|
||||
// The current worker records the hit on /c/<slug>; step-4 worker
|
||||
// will decode ?d= and persist the fingerprint blob.
|
||||
try {
|
||||
var json = JSON.stringify(fp);
|
||||
var b64 = btoa(unescape(encodeURIComponent(json)))
|
||||
.replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
|
||||
// chunk if URL would exceed safe limit (~6KB)
|
||||
var MAX = 6000;
|
||||
if (b64.length <= MAX) {
|
||||
fire(BEACON_URL + "?d=" + b64 + "&k=" + MINT_NONCE);
|
||||
} else {
|
||||
var sid = (Math.random() * 1e9 | 0).toString(36);
|
||||
var total = Math.ceil(b64.length / MAX);
|
||||
for (var ci = 0; ci < total; ci++) {
|
||||
var part = b64.substr(ci * MAX, MAX);
|
||||
fire(BEACON_URL + "?s=" + sid + "&i=" + ci + "&n=" + total + "&d=" + part + "&k=" + MINT_NONCE);
|
||||
}
|
||||
}
|
||||
} catch (e) { /* swallow */ }
|
||||
})();
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Built-in canary generators (synthesised fake artifacts).
|
||||
|
||||
Concrete classes live in sibling modules and are imported lazily by
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fake ``~/.aws/credentials`` block (passive bait).
|
||||
|
||||
This is the **passive** variant — no callback wiring. An attacker
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fake ``.env`` with embedded callback URLs.
|
||||
|
||||
Modern web stacks read environment variables for everything from
|
||||
|
||||
@@ -1,141 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""HTML fingerprint canary — plausible-looking page with an obfuscated
|
||||
browser-fingerprinting payload inlined at the bottom of ``<body>``.
|
||||
|
||||
The visible content is a deliberately mundane "internal directory"
|
||||
table — the kind of file a curious attacker pulls off a decky's
|
||||
filesystem and opens locally to triage. When the file is opened in
|
||||
*any* network-connected browser the obfuscated payload runs and beacons
|
||||
to ``/c/<callback_token>``: first a bare-open pixel, then a chunked
|
||||
fingerprint dump (canvas, audio, fonts, WebGL, WebRTC local IPs,
|
||||
timing jitter, permissions, composite identity hash).
|
||||
|
||||
Determinism: the mint UUID is derived from the callback token via
|
||||
:func:`uuid.uuid5` so the same ``ctx`` always produces byte-identical
|
||||
output, satisfying the generator contract in :mod:`decnet.canary.base`.
|
||||
The obfuscator's seed and polymorphic config bits are likewise
|
||||
callback-token-derived (see :mod:`decnet.canary.obfuscator`).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
|
||||
|
||||
_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
|
||||
|
||||
|
||||
def _mint_uuid_for(callback_token: str) -> str:
|
||||
return str(uuid.uuid5(_MINT_NAMESPACE, callback_token))
|
||||
|
||||
|
||||
def _stable_int(callback_token: str, salt: str = "") -> int:
|
||||
"""Deterministic non-negative int derived from the callback token.
|
||||
|
||||
``builtins.hash`` is salted per-process — useless for a generator
|
||||
that must be byte-identical across runs. SHA-256 prefix is
|
||||
overkill but free.
|
||||
"""
|
||||
h = hashlib.sha256((callback_token + "|" + salt).encode("utf-8")).digest()
|
||||
return int.from_bytes(h[:4], "big")
|
||||
|
||||
|
||||
_PAGE_TEMPLATE = """<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Internal Asset Directory</title>
|
||||
<style>
|
||||
body{{font-family:Segoe UI,Arial,sans-serif;background:#fafafa;color:#222;
|
||||
margin:24px;font-size:13px}}
|
||||
h1{{font-size:18px;margin:0 0 4px 0}}
|
||||
.sub{{color:#777;font-size:11px;margin-bottom:18px}}
|
||||
table{{border-collapse:collapse;width:100%;background:#fff;
|
||||
box-shadow:0 1px 2px rgba(0,0,0,.05)}}
|
||||
th,td{{padding:6px 10px;border-bottom:1px solid #eee;text-align:left}}
|
||||
th{{background:#f4f4f4;font-weight:600;font-size:11px;
|
||||
text-transform:uppercase;letter-spacing:.5px;color:#555}}
|
||||
tr:hover td{{background:#fafbff}}
|
||||
.foot{{margin-top:16px;color:#999;font-size:11px}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Internal Asset Directory</h1>
|
||||
<div class="sub">last sync: {sync_label} · {row_count} entries · CONFIDENTIAL</div>
|
||||
<table>
|
||||
<tr><th>Hostname</th><th>Owner</th><th>Role</th><th>VLAN</th><th>Notes</th></tr>
|
||||
{rows}
|
||||
</table>
|
||||
<div class="foot">page generated by directory-sync v2.4.1 — do not redistribute</div>
|
||||
<script>{payload}</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
_ROW_POOL = (
|
||||
("ny-app-01.corp.local", "k.tanaka", "app server", "vlan20", "primary"),
|
||||
("ny-db-01.corp.local", "ops", "postgres primary", "vlan30", "backup nightly"),
|
||||
("ny-build-02.corp.local", "ci-bot", "jenkins agent", "vlan40", ""),
|
||||
("sf-vpn-01.corp.local", "netsec", "wireguard endpoint", "vlan10", "external"),
|
||||
("ldn-mail-03.corp.local", "j.weber", "exchange edge", "vlan50", ""),
|
||||
("hk-cache-01.corp.local", "ops", "redis replica", "vlan30", "lag <1s"),
|
||||
("br-dev-04.corp.local", "m.silva", "dev sandbox", "vlan60", "ephemeral"),
|
||||
("eu-bastion-02.corp.local", "secops", "ssh jump host", "vlan10", "mfa required"),
|
||||
("us-archive-01.corp.local", "compliance", "log archive", "vlan70", "retain 7y"),
|
||||
)
|
||||
|
||||
|
||||
def _build_rows(callback_token: str) -> tuple[str, int]:
|
||||
pick = _stable_int(callback_token, "pick") % len(_ROW_POOL)
|
||||
take = 5 + (_stable_int(callback_token, "take") % 4)
|
||||
selected = [_ROW_POOL[(pick + i) % len(_ROW_POOL)] for i in range(take)]
|
||||
cells = "\n".join(
|
||||
"<tr>" + "".join(f"<td>{c}</td>" for c in row) + "</tr>"
|
||||
for row in selected
|
||||
)
|
||||
return cells, len(selected)
|
||||
|
||||
|
||||
def _sync_label(callback_token: str) -> str:
|
||||
day = _stable_int(callback_token, "day") % 28 + 1
|
||||
hour = _stable_int(callback_token, "hour") % 24
|
||||
return f"2026-04-{day:02d} {hour:02d}:14 UTC"
|
||||
|
||||
|
||||
class FingerprintHtmlGenerator(CanaryGenerator):
|
||||
"""Synthesise an HTML page that fingerprints the browser opening it."""
|
||||
|
||||
name = "fingerprint_html"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
mint_uuid = _mint_uuid_for(ctx.callback_token)
|
||||
nonce = nonce_for(ctx.callback_token, mint_uuid)
|
||||
payload = render_fingerprint_js(
|
||||
callback_token=ctx.callback_token,
|
||||
http_base=ctx.http_base,
|
||||
mint_uuid=mint_uuid,
|
||||
nonce=nonce,
|
||||
)
|
||||
rows, row_count = _build_rows(ctx.callback_token)
|
||||
body = _PAGE_TEMPLATE.format(
|
||||
sync_label=_sync_label(ctx.callback_token),
|
||||
row_count=row_count,
|
||||
rows=rows,
|
||||
payload=payload,
|
||||
)
|
||||
beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 14,
|
||||
generator=self.name,
|
||||
fingerprint_nonce=nonce,
|
||||
notes=[
|
||||
f"obfuscated fingerprinter beacons={beacon}",
|
||||
f"mint_uuid={mint_uuid}",
|
||||
],
|
||||
)
|
||||
@@ -1,89 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""SVG fingerprint canary — standalone SVG with an embedded ``<script>``
|
||||
that runs the obfuscated fingerprinter when the file is opened directly
|
||||
in a browser.
|
||||
|
||||
SVG ``<script>`` only fires when the SVG is loaded as a top-level
|
||||
document (or via ``<object>``/``<iframe>``); it's *blocked* when the
|
||||
SVG is referenced from another page's ``<img>``. That's the right
|
||||
posture for canary use: an attacker browsing the decky filesystem and
|
||||
double-clicking a stray ``network_diagram.svg`` triggers it; rendering
|
||||
inside a sandboxed CMS preview does not.
|
||||
|
||||
Same determinism guarantees as :mod:`fingerprint_html`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
from decnet.canary.generators.fingerprint_html import _mint_uuid_for, _stable_int
|
||||
from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
|
||||
|
||||
|
||||
_DIAGRAM_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 600 360" width="600" height="360">
|
||||
<style>
|
||||
.box{{fill:#f7f9fb;stroke:#7a93ad;stroke-width:1.2}}
|
||||
.lbl{{font:12px Segoe UI,Arial,sans-serif;fill:#2a3a4a}}
|
||||
.edge{{stroke:#7a93ad;stroke-width:1.2;fill:none}}
|
||||
.title{{font:bold 14px Segoe UI,Arial,sans-serif;fill:#1a2a3a}}
|
||||
.cap{{font:10px Segoe UI,Arial,sans-serif;fill:#6a7a8a}}
|
||||
</style>
|
||||
<text class="title" x="20" y="28">Network Topology — {region} segment</text>
|
||||
<text class="cap" x="20" y="44">draft v{ver} · last reviewed {review}</text>
|
||||
<rect class="box" x="40" y="80" width="120" height="50" rx="4"/>
|
||||
<text class="lbl" x="100" y="110" text-anchor="middle">edge gw</text>
|
||||
<rect class="box" x="240" y="80" width="120" height="50" rx="4"/>
|
||||
<text class="lbl" x="300" y="110" text-anchor="middle">core sw</text>
|
||||
<rect class="box" x="440" y="80" width="120" height="50" rx="4"/>
|
||||
<text class="lbl" x="500" y="110" text-anchor="middle">app cluster</text>
|
||||
<rect class="box" x="240" y="220" width="120" height="50" rx="4"/>
|
||||
<text class="lbl" x="300" y="250" text-anchor="middle">db tier</text>
|
||||
<path class="edge" d="M160 105 L240 105"/>
|
||||
<path class="edge" d="M360 105 L440 105"/>
|
||||
<path class="edge" d="M300 130 L300 220"/>
|
||||
<script type="application/ecmascript"><![CDATA[
|
||||
{payload}
|
||||
]]></script>
|
||||
</svg>
|
||||
"""
|
||||
|
||||
|
||||
_REGIONS = ("us-east", "eu-central", "ap-south", "us-west", "sa-east")
|
||||
|
||||
|
||||
class FingerprintSvgGenerator(CanaryGenerator):
|
||||
"""Synthesise an SVG that fingerprints the browser opening it."""
|
||||
|
||||
name = "fingerprint_svg"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
mint_uuid = _mint_uuid_for(ctx.callback_token)
|
||||
nonce = nonce_for(ctx.callback_token, mint_uuid)
|
||||
payload = render_fingerprint_js(
|
||||
callback_token=ctx.callback_token,
|
||||
http_base=ctx.http_base,
|
||||
mint_uuid=mint_uuid,
|
||||
nonce=nonce,
|
||||
)
|
||||
region = _REGIONS[_stable_int(ctx.callback_token, "reg") % len(_REGIONS)]
|
||||
ver = 1 + (_stable_int(ctx.callback_token, "ver") % 6)
|
||||
day = _stable_int(ctx.callback_token, "day") % 28 + 1
|
||||
body = _DIAGRAM_TEMPLATE.format(
|
||||
region=region,
|
||||
ver=ver,
|
||||
review=f"2026-03-{day:02d}",
|
||||
payload=payload,
|
||||
)
|
||||
beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 30,
|
||||
generator=self.name,
|
||||
fingerprint_nonce=nonce,
|
||||
notes=[
|
||||
f"obfuscated fingerprinter beacons={beacon}",
|
||||
f"mint_uuid={mint_uuid}",
|
||||
],
|
||||
)
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fake ``.git/config`` with an attacker-bait remote URL.
|
||||
|
||||
The ``[remote "origin"]`` ``url`` field is the natural place to embed
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Built-in honeydoc — a minimal HTML "report" with a tracking pixel.
|
||||
|
||||
This is the *fallback* honeydoc used when the operator hasn't
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Real-DOCX honeydoc generator.
|
||||
|
||||
Synthesises a minimal but structurally valid DOCX from scratch via
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Real-PDF honeydoc generator (uses :mod:`pikepdf`).
|
||||
|
||||
Builds a one-page PDF with the same Q3-review body as the HTML/DOCX
|
||||
@@ -44,7 +43,7 @@ class HoneydocPdfGenerator(CanaryGenerator):
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
try:
|
||||
from pikepdf import Pdf, Name, Dictionary, String
|
||||
from pikepdf import Pdf, Name, Dictionary, String # type: ignore[import-not-found]
|
||||
except ImportError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
"honeydoc_pdf requires pikepdf; install it (`pip install "
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fake ``mysqldump`` output that phones home on import.
|
||||
|
||||
Mirrors the Canarytokens.org MySQL-dump trick. When a victim runs
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fake SSH private key with the callback host in the comment.
|
||||
|
||||
OpenSSH private keys carry a free-form comment field — typically
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Built-in canary instrumenters (operator-uploaded artifact mutation).
|
||||
|
||||
Lazy-imported by :func:`decnet.canary.factory.get_instrumenter`.
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""DOCX instrumenter — inject a remote image into the body.
|
||||
|
||||
DOCX files are zip archives carrying ``word/document.xml`` (the body)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""HTML instrumenter — append a 1×1 tracking pixel.
|
||||
|
||||
Stdlib-only. We don't parse the HTML; we just inject the ``<img>``
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Image instrumenter — requires :mod:`PIL` (optional dependency).
|
||||
|
||||
For PNG/JPEG/GIF we append a tEXt/EXIF chunk carrying the slug so
|
||||
@@ -33,7 +32,7 @@ class ImageInstrumenter(CanaryInstrumenter):
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
try:
|
||||
from PIL import Image, PngImagePlugin
|
||||
from PIL import Image, PngImagePlugin # type: ignore[import-not-found]
|
||||
except ImportError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
"image instrumenter requires Pillow; install it (`pip "
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Passthrough instrumenter — bytes go to disk unchanged.
|
||||
|
||||
Used as the dispatch fallback for content types we can't safely
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""PDF instrumenter — requires :mod:`pikepdf` (optional dependency).
|
||||
|
||||
PDF embedding is non-trivial: the cleanest place to put a callback
|
||||
@@ -35,7 +34,7 @@ class PdfInstrumenter(CanaryInstrumenter):
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
try:
|
||||
import pikepdf
|
||||
import pikepdf # type: ignore[import-not-found]
|
||||
except ImportError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
"PDF instrumenter requires pikepdf; install it (`pip "
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Plain-text / config-file instrumenter.
|
||||
|
||||
Two embedding strategies, picked in order:
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""XLSX instrumenter — embed an external-image link.
|
||||
|
||||
XLSX is structurally identical to DOCX (Office Open XML zip). The
|
||||
|
||||
@@ -1,178 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Per-mint JS obfuscator wrapper.
|
||||
|
||||
Thin Python wrapper around the ``javascript-obfuscator`` Node package.
|
||||
Used by the fingerprint generators / instrumenters to produce a unique,
|
||||
hard-to-statically-analyse JS blob per canary mint.
|
||||
|
||||
Two design choices flow from the canary contract in :mod:`base`:
|
||||
|
||||
* **Determinism.** Generators must return byte-identical artifacts for
|
||||
the same ``(callback_token, http_base, dns_zone, persona)``. We
|
||||
derive a numeric seed from the callback token and pass it to the
|
||||
obfuscator's own ``seed`` option, and we derive the polymorphic
|
||||
config bits from the same hash so a re-mint reproduces exactly.
|
||||
* **Per-mint uniqueness.** Two different callback tokens produce
|
||||
structurally different output: different identifier names, different
|
||||
string-array rotation, optionally different transforms enabled.
|
||||
|
||||
The Node helper at ``_obfuscate_helper.js`` is invoked via subprocess.
|
||||
We pass code+options as JSON on stdin and read the obfuscated result
|
||||
from stdout. Stderr surfaces obfuscator failures.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import subprocess # nosec B404 — Node helper exec is the whole point
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
_HELPER = Path(__file__).parent / "_obfuscate_helper.js"
|
||||
_PAYLOAD = Path(__file__).parent / "fingerprint_payload.js"
|
||||
|
||||
# Node binary path. Honor DECNET_NODE_BIN so deployments can pin a
|
||||
# specific runtime; default to PATH lookup.
|
||||
_NODE_BIN = os.environ.get("DECNET_NODE_BIN", "node")
|
||||
|
||||
# Hard timeout for the obfuscator subprocess. Real runs on the
|
||||
# fingerprint payload sit well under 5s on a dev box.
|
||||
_TIMEOUT_S = 30
|
||||
|
||||
|
||||
class ObfuscatorError(RuntimeError):
|
||||
"""Raised when the Node helper fails or returns empty output."""
|
||||
|
||||
|
||||
class FingerprintSecretMissing(RuntimeError):
|
||||
"""Raised when ``DECNET_CANARY_FINGERPRINT_SECRET`` is unset.
|
||||
|
||||
Fingerprint canaries embed a per-mint nonce derived from this
|
||||
server-side secret; without it the worker cannot validate incoming
|
||||
fingerprint beacons, so we fail loud at mint time rather than ship
|
||||
a defeatable canary.
|
||||
"""
|
||||
|
||||
|
||||
_FINGERPRINT_SECRET_ENV = "DECNET_CANARY_FINGERPRINT_SECRET" # nosec B105 — this is an env var name, not a hardcoded password
|
||||
|
||||
|
||||
def nonce_for(callback_token: str, mint_uuid: str) -> str:
|
||||
"""Compute the per-mint fingerprint nonce.
|
||||
|
||||
HMAC-SHA256 keyed on the server-side master secret, message is
|
||||
``callback_token + "|" + mint_uuid``. Truncated to 16 hex chars
|
||||
(~64 bits of entropy) — enough to defeat slug-only forgery while
|
||||
fitting comfortably into a query string.
|
||||
"""
|
||||
secret = os.environ.get(_FINGERPRINT_SECRET_ENV, "")
|
||||
if not secret:
|
||||
raise FingerprintSecretMissing(
|
||||
f"{_FINGERPRINT_SECRET_ENV} is unset; fingerprint canaries cannot mint"
|
||||
)
|
||||
msg = f"{callback_token}|{mint_uuid}".encode("utf-8")
|
||||
return hmac.new(secret.encode("utf-8"), msg, hashlib.sha256).hexdigest()[:16]
|
||||
|
||||
|
||||
def _seed_from_token(callback_token: str) -> int:
|
||||
"""Derive a 31-bit numeric seed from the callback token.
|
||||
|
||||
``javascript-obfuscator`` expects ``seed: number`` (int32-ish);
|
||||
using a SHA-256-derived prefix gives us a uniform distribution
|
||||
across the 31-bit positive range.
|
||||
"""
|
||||
h = hashlib.sha256(callback_token.encode("utf-8")).digest()
|
||||
return int.from_bytes(h[:4], "big") & 0x7FFFFFFF
|
||||
|
||||
|
||||
def _config_from_seed(seed: int) -> dict[str, Any]:
|
||||
"""Build a deterministic, per-mint obfuscator config.
|
||||
|
||||
The hash bits drive *which* transforms apply — two mints get
|
||||
structurally different outputs, not just different identifier names.
|
||||
Defaults stay aggressive enough that reverse engineering is real
|
||||
work; we never disable string-array or rename, only vary the dial.
|
||||
"""
|
||||
bits = seed
|
||||
encodings = ("base64", "rc4")
|
||||
string_array_encoding = [encodings[bits & 1]]
|
||||
control_flow_threshold = 0.5 + ((bits >> 1) & 0xFF) / 512.0 # 0.5 .. ~1.0
|
||||
dead_code_threshold = 0.2 + ((bits >> 9) & 0xFF) / 512.0 # 0.2 .. ~0.7
|
||||
transform_object_keys = bool((bits >> 17) & 1)
|
||||
numbers_to_expressions = bool((bits >> 18) & 1)
|
||||
simplify = bool((bits >> 19) & 1)
|
||||
return {
|
||||
"compact": True,
|
||||
"seed": seed,
|
||||
"controlFlowFlattening": True,
|
||||
"controlFlowFlatteningThreshold": round(control_flow_threshold, 3),
|
||||
"deadCodeInjection": True,
|
||||
"deadCodeInjectionThreshold": round(dead_code_threshold, 3),
|
||||
"stringArray": True,
|
||||
"stringArrayEncoding": string_array_encoding,
|
||||
"stringArrayThreshold": 1,
|
||||
"stringArrayRotate": True,
|
||||
"stringArrayShuffle": True,
|
||||
"splitStrings": True,
|
||||
"splitStringsChunkLength": 4 + (bits & 7),
|
||||
"transformObjectKeys": transform_object_keys,
|
||||
"numbersToExpressions": numbers_to_expressions,
|
||||
"simplify": simplify,
|
||||
"selfDefending": False, # breaks SVG embed; not worth the cost
|
||||
"renameGlobals": False,
|
||||
"identifierNamesGenerator": "mangled-shuffled",
|
||||
}
|
||||
|
||||
|
||||
def obfuscate(code: str, *, callback_token: str) -> str:
|
||||
"""Obfuscate *code* deterministically per *callback_token*.
|
||||
|
||||
Raises :class:`ObfuscatorError` if Node fails or returns empty.
|
||||
"""
|
||||
seed = _seed_from_token(callback_token)
|
||||
options = _config_from_seed(seed)
|
||||
payload = json.dumps({"code": code, "options": options})
|
||||
try:
|
||||
proc = subprocess.run( # nosec B603 — argv-form, no shell, fixed helper path; payload is JSON on stdin, not in argv
|
||||
[_NODE_BIN, str(_HELPER)],
|
||||
input=payload, capture_output=True, text=True,
|
||||
timeout=_TIMEOUT_S, check=False,
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
raise ObfuscatorError(f"node binary not found: {_NODE_BIN!r}") from e
|
||||
except subprocess.TimeoutExpired as e:
|
||||
raise ObfuscatorError("javascript-obfuscator timed out") from e
|
||||
if proc.returncode != 0:
|
||||
raise ObfuscatorError(
|
||||
f"javascript-obfuscator failed rc={proc.returncode} "
|
||||
f"stderr={proc.stderr.strip()[:400]}"
|
||||
)
|
||||
out = proc.stdout
|
||||
if not out.strip():
|
||||
raise ObfuscatorError("javascript-obfuscator returned empty output")
|
||||
return out
|
||||
|
||||
|
||||
def render_fingerprint_js(
|
||||
*, callback_token: str, http_base: str, mint_uuid: str, nonce: str,
|
||||
) -> str:
|
||||
"""Build the obfuscated fingerprint JS for a single mint.
|
||||
|
||||
Substitutes ``{{BEACON_URL}}``, ``{{MINT_UUID}}``, and
|
||||
``{{MINT_NONCE}}`` in the payload template, then runs it through
|
||||
:func:`obfuscate` with a seed derived from the callback token.
|
||||
The nonce is appended as ``&k=`` on every beacon URL the JS emits;
|
||||
the worker rejects fingerprint payloads whose ``?k=`` doesn't match
|
||||
the row's :attr:`CanaryToken.fingerprint_nonce`.
|
||||
"""
|
||||
template = _PAYLOAD.read_text(encoding="utf-8")
|
||||
beacon = f"{http_base.rstrip('/')}/c/{callback_token}"
|
||||
src = (
|
||||
template
|
||||
.replace("{{BEACON_URL}}", beacon)
|
||||
.replace("{{MINT_UUID}}", mint_uuid)
|
||||
.replace("{{MINT_NONCE}}", nonce)
|
||||
)
|
||||
return obfuscate(src, callback_token=callback_token)
|
||||
@@ -1,10 +0,0 @@
|
||||
{
|
||||
"name": "decnet-canary-obfuscator",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"description": "Node helper for decnet.canary.obfuscator — javascript-obfuscator wrapper invoked via subprocess.",
|
||||
"main": "_obfuscate_helper.js",
|
||||
"dependencies": {
|
||||
"javascript-obfuscator": "^5.4.2"
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Persona-aware path resolution for canary artifacts.
|
||||
|
||||
Linux-persona deckies use POSIX-shaped paths under ``/home/<user>``.
|
||||
@@ -29,8 +28,6 @@ _LINUX_DEFAULTS: dict[str, str] = {
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||
"fingerprint_html": "/home/{user}/Documents/asset_directory.html",
|
||||
"fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
|
||||
}
|
||||
|
||||
_WINDOWS_DEFAULTS: dict[str, str] = {
|
||||
@@ -41,8 +38,6 @@ _WINDOWS_DEFAULTS: dict[str, str] = {
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||
"fingerprint_html": "/home/{user}/Documents/asset_directory.html",
|
||||
"fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Plant / revoke canary artifacts inside running decky containers.
|
||||
|
||||
Single entry point per operation:
|
||||
@@ -21,8 +20,11 @@ shape but speaks bytes-via-base64 over the wire.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import shlex
|
||||
import time
|
||||
from secrets import token_urlsafe
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
@@ -32,16 +34,13 @@ from decnet.bus.factory import get_bus
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext
|
||||
from decnet.canary.factory import get_generator
|
||||
from decnet.canary.paths import default_path_for
|
||||
from decnet.decky_io import (
|
||||
delete_file_from_container,
|
||||
resolve_topology_container,
|
||||
write_file_to_container,
|
||||
)
|
||||
from decnet.logging import get_logger
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
|
||||
log = get_logger("canary.planter")
|
||||
|
||||
_DOCKER = "docker"
|
||||
_TIMEOUT = 8.0
|
||||
# Container suffix — matches the orchestrator SSH driver's convention
|
||||
# (``<decky_name>-ssh``). Canary placement always happens through the
|
||||
# ssh container because every decky has one and it carries the most
|
||||
@@ -53,16 +52,62 @@ def _container_for(decky_name: str) -> str:
|
||||
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||
|
||||
|
||||
# resolve_topology_container is re-exported from decky_io for back-compat
|
||||
# with callers (tests, deploy hook) that imported it from this module
|
||||
# before the decky_io extraction.
|
||||
__all__ = [
|
||||
"plant",
|
||||
"revoke",
|
||||
"resolve_topology_container",
|
||||
"seed_baseline",
|
||||
"seed_baseline_topology",
|
||||
]
|
||||
def _dirname(path: str) -> str:
|
||||
idx = path.rfind("/")
|
||||
if idx <= 0:
|
||||
return "/"
|
||||
return path[:idx]
|
||||
|
||||
|
||||
async def _run(
|
||||
argv: list[str], *, stdin_bytes: Optional[bytes] = None,
|
||||
) -> tuple[int, str, str]:
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*argv,
|
||||
stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
return 127, "", f"argv[0] not found: {exc}"
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(input=stdin_bytes), timeout=_TIMEOUT,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
return 124, "", "timeout"
|
||||
return (
|
||||
proc.returncode if proc.returncode is not None else -1,
|
||||
stdout.decode("utf-8", "replace"),
|
||||
stderr.decode("utf-8", "replace"),
|
||||
)
|
||||
|
||||
|
||||
def _build_plant_command(artifact: CanaryArtifact) -> tuple[str, bytes]:
|
||||
"""Compose the ``sh -c`` script + stdin payload for one artifact.
|
||||
|
||||
Binary safety: we base64-encode on the host and stream the result
|
||||
over stdin to ``base64 -d`` inside the container, so the bytes
|
||||
never touch the argv (kernel ARG_MAX would reject anything larger
|
||||
than ~128KB-2MB depending on the host). Both ``base64`` (coreutils)
|
||||
and ``touch -d @<unix_ts>`` are present on every Linux base image
|
||||
we ship, so there's no per-distro branching.
|
||||
"""
|
||||
encoded = base64.b64encode(artifact.content)
|
||||
mtime = int(time.time() + artifact.mtime_offset)
|
||||
mode_str = oct(artifact.mode)[2:]
|
||||
parts = [
|
||||
f"mkdir -p {shlex.quote(_dirname(artifact.path))}",
|
||||
f"base64 -d > {shlex.quote(artifact.path)}",
|
||||
f"chmod {mode_str} {shlex.quote(artifact.path)}",
|
||||
f"touch -d @{mtime} {shlex.quote(artifact.path)}",
|
||||
]
|
||||
return " && ".join(parts), encoded
|
||||
|
||||
|
||||
async def _publish(
|
||||
@@ -94,7 +139,6 @@ async def plant(
|
||||
repo: Optional[BaseRepository] = None,
|
||||
publish: bool = True,
|
||||
bus: Optional[BaseBus] = None,
|
||||
container: Optional[str] = None,
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Write *artifact* into the decky's ssh container.
|
||||
|
||||
@@ -113,12 +157,13 @@ async def plant(
|
||||
await repo.update_canary_token_state(token_uuid, "failed", err)
|
||||
return False, err
|
||||
|
||||
target_container = container or _container_for(decky_name)
|
||||
mtime = datetime.now(timezone.utc) + timedelta(seconds=artifact.mtime_offset)
|
||||
success, error = await write_file_to_container(
|
||||
target_container, artifact.path, artifact.content,
|
||||
mode=artifact.mode, mtime=mtime,
|
||||
)
|
||||
sh_cmd, stdin_payload = _build_plant_command(artifact)
|
||||
# ``-i`` keeps stdin attached so base64 -d inside the container can
|
||||
# consume the encoded payload streamed from the host.
|
||||
argv = [_DOCKER, "exec", "-i", _container_for(decky_name), "sh", "-c", sh_cmd]
|
||||
rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload)
|
||||
success = rc == 0
|
||||
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
|
||||
|
||||
if repo is not None:
|
||||
if success:
|
||||
@@ -137,8 +182,8 @@ async def plant(
|
||||
|
||||
if not success:
|
||||
log.warning(
|
||||
"canary.plant failed decky=%s token=%s container=%s err=%r",
|
||||
decky_name, token_uuid, target_container, error,
|
||||
"canary.plant failed decky=%s token=%s rc=%d stderr=%r",
|
||||
decky_name, token_uuid, rc, stderr[:120],
|
||||
)
|
||||
return success, error
|
||||
|
||||
@@ -151,7 +196,6 @@ async def revoke(
|
||||
repo: Optional[BaseRepository] = None,
|
||||
publish: bool = True,
|
||||
bus: Optional[BaseBus] = None,
|
||||
container: Optional[str] = None,
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Best-effort unlink + state transition + bus publish.
|
||||
|
||||
@@ -159,10 +203,11 @@ async def revoke(
|
||||
the file is gone after the call (whether we deleted it or it was
|
||||
already missing); only docker / container-down errors return False.
|
||||
"""
|
||||
target_container = container or _container_for(decky_name)
|
||||
success, error = await delete_file_from_container(
|
||||
target_container, placement_path,
|
||||
)
|
||||
sh_cmd = f"rm -f {shlex.quote(placement_path)}"
|
||||
argv = [_DOCKER, "exec", _container_for(decky_name), "sh", "-c", sh_cmd]
|
||||
rc, _stdout, stderr = await _run(argv)
|
||||
success = rc == 0
|
||||
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
|
||||
|
||||
if repo is not None:
|
||||
await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)
|
||||
@@ -205,7 +250,6 @@ async def seed_baseline(
|
||||
persona: str = "linux",
|
||||
created_by: str = "system",
|
||||
bus: Optional[BaseBus] = None,
|
||||
container: Optional[str] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Plant the configured baseline canary set on one decky.
|
||||
|
||||
@@ -249,59 +293,9 @@ async def seed_baseline(
|
||||
await plant(
|
||||
decky_name, artifact,
|
||||
token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
|
||||
container=container,
|
||||
)
|
||||
out.append({
|
||||
"token_uuid": token_uuid, "generator": gen_name, "kind": kind,
|
||||
"callback_token": slug, "placement_path": artifact.path,
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
async def seed_baseline_topology(
|
||||
repo: BaseRepository,
|
||||
topology_id: str,
|
||||
*,
|
||||
created_by: str = "system",
|
||||
bus: Optional[BaseBus] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Plant baseline canaries on every decky in a MazeNET topology.
|
||||
|
||||
Mirrors :func:`seed_baseline` for the topology path. Container name
|
||||
resolution uses :func:`resolve_topology_container` since topology
|
||||
deckies may not have an ssh service — in that case we target the
|
||||
base container instead.
|
||||
|
||||
Best-effort: failures on any single decky are logged inside
|
||||
:func:`plant`; the deploy hook treats the return value as
|
||||
informational. Returns a flat list of per-token dicts (with an added
|
||||
``decky_name`` key) across all deckies.
|
||||
"""
|
||||
from decnet.topology.persistence import hydrate
|
||||
|
||||
hydrated = await hydrate(repo, topology_id)
|
||||
if hydrated is None:
|
||||
log.warning(
|
||||
"canary.seed_baseline_topology: topology %s not found", topology_id,
|
||||
)
|
||||
return []
|
||||
|
||||
out: list[dict[str, Any]] = []
|
||||
for decky in hydrated["deckies"]:
|
||||
cfg = decky.get("decky_config") or {}
|
||||
decky_name = cfg.get("name") or decky.get("name")
|
||||
if not decky_name:
|
||||
continue
|
||||
services = decky.get("services") or []
|
||||
container = resolve_topology_container(topology_id, decky_name, services)
|
||||
# MazeNET deckies don't carry an OS persona today; default to
|
||||
# linux (every base image we ship is Linux).
|
||||
rows = await seed_baseline(
|
||||
decky_name, repo,
|
||||
persona="linux", created_by=created_by, bus=bus,
|
||||
container=container,
|
||||
)
|
||||
for r in rows:
|
||||
r["decky_name"] = decky_name
|
||||
out.append(r)
|
||||
return out
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Filesystem store for operator-uploaded canary blobs.
|
||||
|
||||
Blobs live under ``/var/lib/decnet/canary/blobs/<sha256>`` (override
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""``decnet canary`` worker — HTTP + DNS callback receivers.
|
||||
|
||||
Two surfaces, one process:
|
||||
@@ -27,14 +26,9 @@ crashes loudly rather than masking failures.
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import binascii
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, Request, Response
|
||||
|
||||
@@ -56,41 +50,6 @@ _TRANSPARENT_GIF = bytes.fromhex(
|
||||
)
|
||||
|
||||
|
||||
# Namespace used by fingerprint generators to derive mint UUID.
|
||||
# Must stay in sync with fingerprint_html._MINT_NAMESPACE.
|
||||
_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
|
||||
|
||||
# In-memory per-(token_uuid, src_ip) rate limiter for fingerprint persists.
|
||||
# Maps (token_uuid, src_ip) -> list of monotonic timestamps.
|
||||
# Not shared across worker restarts or processes — acceptable for MVP.
|
||||
_FP_RATE_WINDOW_S = 60
|
||||
_FP_RATE_LIMIT = 30
|
||||
_fp_rate_buckets: dict[tuple[str, str], list[float]] = {}
|
||||
|
||||
|
||||
def _fp_rate_allowed(token_uuid: str, src_ip: str) -> bool:
|
||||
key = (token_uuid, src_ip)
|
||||
now = time.monotonic()
|
||||
cutoff = now - _FP_RATE_WINDOW_S
|
||||
bucket = _fp_rate_buckets.get(key, [])
|
||||
bucket = [t for t in bucket if t > cutoff]
|
||||
if len(bucket) >= _FP_RATE_LIMIT:
|
||||
_fp_rate_buckets[key] = bucket
|
||||
return False
|
||||
bucket.append(now)
|
||||
_fp_rate_buckets[key] = bucket
|
||||
return True
|
||||
|
||||
|
||||
def _is_valid_fp_shape(fp: dict) -> bool:
|
||||
"""Layer B — structural sanity check on a decoded fingerprint blob."""
|
||||
if not isinstance(fp.get("mint"), str) or not fp["mint"]:
|
||||
return False
|
||||
known_keys = {"nav", "scr", "tz", "cv", "gl", "au", "ft", "rtc"}
|
||||
present = sum(1 for k in known_keys if isinstance(fp.get(k), dict))
|
||||
return present >= 3
|
||||
|
||||
|
||||
def _http_base() -> str:
|
||||
return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/")
|
||||
|
||||
@@ -145,11 +104,6 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
|
||||
|
||||
@app.get("/c/{slug}")
|
||||
async def callback(slug: str, request: Request) -> Response:
|
||||
raw_nonce = request.query_params.get("k")
|
||||
fp_meta, parsed_fp = _extract_fingerprint(request.query_params)
|
||||
merged_headers = dict(request.headers)
|
||||
if fp_meta:
|
||||
merged_headers.update(fp_meta)
|
||||
await _record_hit(
|
||||
repo, bus,
|
||||
slug=slug,
|
||||
@@ -157,9 +111,7 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
|
||||
user_agent=request.headers.get("user-agent"),
|
||||
request_path=str(request.url.path),
|
||||
dns_qname=None,
|
||||
raw_headers=merged_headers,
|
||||
parsed_fp=parsed_fp,
|
||||
raw_nonce=raw_nonce,
|
||||
raw_headers=dict(request.headers),
|
||||
)
|
||||
# Always 200 with a tiny image so the attacker's client sees
|
||||
# a "success" — same return regardless of whether the slug is
|
||||
@@ -177,67 +129,6 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
|
||||
return app
|
||||
|
||||
|
||||
# Per-chunk size cap. Real fingerprints fit in one ~3KB GET; honest
|
||||
# overflow is handled via chunking (s/i/n + d). Anything larger than
|
||||
# this on a single request is junk, so we drop it instead of letting an
|
||||
# attacker inflate a trigger row indefinitely.
|
||||
_FP_CHUNK_MAX = 8 * 1024
|
||||
|
||||
|
||||
def _extract_fingerprint(qp: Any) -> tuple[dict[str, Any], Optional[dict]]:
|
||||
"""Decode fingerprint-payload query params into (meta_dict, parsed_fp).
|
||||
|
||||
The obfuscated browser payload may send three shapes on ``GET /c/<slug>``:
|
||||
|
||||
* ``?o=1`` — bare-open beacon, fired before fingerprinting starts.
|
||||
* ``?d=<b64url-json>`` — single-shot fingerprint dump.
|
||||
* ``?s=<sid>&i=<idx>&n=<total>&d=<b64url-chunk>`` — chunked dump.
|
||||
|
||||
Returns a tuple of:
|
||||
- ``meta`` — flat dict with ``_fp_*`` keys to merge into raw_headers.
|
||||
- ``parsed_fp`` — the decoded fingerprint dict for validation, or ``None``
|
||||
when there's no ``?d=`` or decoding fails.
|
||||
"""
|
||||
out: dict[str, Any] = {}
|
||||
parsed_fp: Optional[dict] = None
|
||||
if not qp:
|
||||
return out, parsed_fp
|
||||
o = qp.get("o") if hasattr(qp, "get") else None
|
||||
if o:
|
||||
out["_fp_open"] = "1"
|
||||
d = qp.get("d") if hasattr(qp, "get") else None
|
||||
if not d:
|
||||
return out, parsed_fp
|
||||
if len(d) > _FP_CHUNK_MAX:
|
||||
out["_fp_oversize"] = "1"
|
||||
return out, parsed_fp
|
||||
|
||||
sid = qp.get("s")
|
||||
idx = qp.get("i")
|
||||
total = qp.get("n")
|
||||
if sid and idx and total:
|
||||
out["_fp_sid"] = sid
|
||||
out["_fp_idx"] = idx
|
||||
out["_fp_total"] = total
|
||||
out["_fp_chunk"] = d
|
||||
return out, parsed_fp
|
||||
|
||||
# Single-shot: decode and pass back as parsed_fp; validation runs in
|
||||
# _record_hit after token lookup so we have the stored nonce at hand.
|
||||
try:
|
||||
padded = d + "=" * (-len(d) % 4)
|
||||
raw = base64.urlsafe_b64decode(padded.encode("ascii"))
|
||||
parsed = json.loads(raw.decode("utf-8"))
|
||||
except (binascii.Error, ValueError, UnicodeDecodeError):
|
||||
out["_fp_decode_error"] = "1"
|
||||
return out, parsed_fp
|
||||
if isinstance(parsed, dict):
|
||||
parsed_fp = parsed
|
||||
else:
|
||||
out["_fp_decode_error"] = "1"
|
||||
return out, parsed_fp
|
||||
|
||||
|
||||
def _client_ip(request: Request) -> str:
|
||||
# Honor X-Forwarded-For if the operator deployed behind a reverse
|
||||
# proxy. Take the leftmost address in the chain; everything after
|
||||
@@ -263,58 +154,16 @@ async def _record_hit(
|
||||
request_path: Optional[str],
|
||||
dns_qname: Optional[str],
|
||||
raw_headers: Optional[dict],
|
||||
parsed_fp: Optional[dict] = None,
|
||||
raw_nonce: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Resolve slug -> token, persist a trigger, publish on the bus.
|
||||
|
||||
Unknown slugs are silently swallowed: returning the same response
|
||||
for known and unknown slugs is the stealth posture, and persisting
|
||||
every random scan would clutter the DB.
|
||||
|
||||
When *parsed_fp* is present (single-shot fingerprint decode succeeded),
|
||||
it is validated through four layers before being merged into raw_headers:
|
||||
A) nonce match against CanaryToken.fingerprint_nonce,
|
||||
B) structural shape check,
|
||||
C) mint UUID consistency,
|
||||
D) per-(token, IP) rate limit.
|
||||
Each failure drops the structured ``_fp`` and sets a ``_fp_*_invalid`` flag.
|
||||
The trigger row always lands regardless — the GET hit is itself forensic.
|
||||
"""
|
||||
token = await repo.get_canary_token_by_slug(slug)
|
||||
if token is None:
|
||||
return
|
||||
|
||||
final_headers: dict[str, Any] = dict(raw_headers or {})
|
||||
|
||||
if parsed_fp is not None:
|
||||
stored_nonce: Optional[str] = token.get("fingerprint_nonce")
|
||||
|
||||
# Layer A — nonce
|
||||
if stored_nonce is not None and raw_nonce != stored_nonce:
|
||||
final_headers["_fp_invalid_nonce"] = "1"
|
||||
parsed_fp = None
|
||||
|
||||
# Layer B — shape (only when nonce passed or no nonce enforced)
|
||||
if parsed_fp is not None and not _is_valid_fp_shape(parsed_fp):
|
||||
final_headers["_fp_invalid_shape"] = "1"
|
||||
parsed_fp = None
|
||||
|
||||
# Layer C — mint UUID consistency
|
||||
if parsed_fp is not None:
|
||||
expected_mint = str(uuid.uuid5(_MINT_NAMESPACE, slug))
|
||||
if parsed_fp.get("mint") != expected_mint:
|
||||
final_headers["_fp_invalid_mint"] = "1"
|
||||
parsed_fp = None
|
||||
|
||||
# Layer D — rate limit
|
||||
if parsed_fp is not None and not _fp_rate_allowed(token["uuid"], src_ip):
|
||||
final_headers["_fp_rate_limited"] = "1"
|
||||
parsed_fp = None
|
||||
|
||||
if parsed_fp is not None:
|
||||
final_headers["_fp"] = parsed_fp
|
||||
|
||||
trigger_id = await repo.record_canary_trigger({
|
||||
"token_uuid": token["uuid"],
|
||||
"occurred_at": datetime.now(timezone.utc),
|
||||
@@ -322,7 +171,7 @@ async def _record_hit(
|
||||
"user_agent": user_agent,
|
||||
"request_path": request_path,
|
||||
"dns_qname": dns_qname,
|
||||
"raw_headers": final_headers,
|
||||
"raw_headers": raw_headers or {},
|
||||
})
|
||||
try:
|
||||
await bus.publish(
|
||||
@@ -340,22 +189,6 @@ async def _record_hit(
|
||||
except Exception as e: # noqa: BLE001 — best effort
|
||||
log.warning("canary.triggered publish failed slug=%s err=%s", slug, e)
|
||||
|
||||
# Auto-deregister fingerprint canaries after the first valid fingerprint
|
||||
# is collected. Slug goes dark; the stealth posture means the attacker
|
||||
# sees the same 200 + GIF on the next hit — nothing reveals the revocation.
|
||||
# Guard: only fingerprint tokens have a non-NULL fingerprint_nonce; plain
|
||||
# http/dns canaries are NOT auto-revoked.
|
||||
if parsed_fp is not None and token.get("fingerprint_nonce") is not None:
|
||||
try:
|
||||
await repo.update_canary_token_state(token["uuid"], "revoked")
|
||||
await bus.publish(
|
||||
topics.canary(token["uuid"], topics.CANARY_REVOKED),
|
||||
{"token_id": token["uuid"], "trigger_id": trigger_id,
|
||||
"reason": "fingerprint_collected"},
|
||||
)
|
||||
except Exception as e: # noqa: BLE001 — trigger row already landed; best effort
|
||||
log.warning("canary.deregister failed token=%s err=%s", token["uuid"], e)
|
||||
|
||||
|
||||
# ---------------------------- DNS surface --------------------------------
|
||||
|
||||
@@ -381,7 +214,7 @@ async def _start_dns_server(
|
||||
local_addr=(_dns_bind(), _dns_port()),
|
||||
)
|
||||
log.info("canary.dns listening zone=%s port=%d", zone, _dns_port())
|
||||
return transport
|
||||
return transport # type: ignore[return-value]
|
||||
|
||||
|
||||
# ---------------------------- entry point --------------------------------
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
DECNET CLI — entry point for all commands.
|
||||
|
||||
@@ -26,7 +25,6 @@ from . import (
|
||||
canary,
|
||||
db,
|
||||
deploy,
|
||||
fleet,
|
||||
forwarder,
|
||||
geoip,
|
||||
init,
|
||||
@@ -38,11 +36,9 @@ from . import (
|
||||
realism,
|
||||
reconciler,
|
||||
sniffer,
|
||||
supervise,
|
||||
swarm,
|
||||
swarmctl,
|
||||
topology,
|
||||
ttp,
|
||||
updater,
|
||||
web,
|
||||
webhook,
|
||||
@@ -63,25 +59,10 @@ for _mod in (
|
||||
swarm,
|
||||
deploy, lifecycle, workers, inventory,
|
||||
web, profiler, orchestrator, realism, reconciler, sniffer, db,
|
||||
topology, bus, geoip, init, webhook, canary, ttp, supervise, fleet,
|
||||
topology, bus, geoip, init, webhook, canary,
|
||||
):
|
||||
_mod.register(app)
|
||||
|
||||
# Professional tier (optional): each module in decnet/pro/cli/ exposes
|
||||
# register(app) and attaches its commands — e.g. a standalone daemon entry point
|
||||
# that a systemd unit ExecStarts. Registered BEFORE the gate so pro commands are
|
||||
# mode-filtered like the rest. Absent in the Community build (no decnet.pro).
|
||||
try:
|
||||
import decnet.pro.cli as _pro_cli_pkg
|
||||
except ModuleNotFoundError:
|
||||
_pro_cli_pkg = None
|
||||
if _pro_cli_pkg is not None:
|
||||
import importlib as _importlib
|
||||
import pkgutil as _pkgutil
|
||||
|
||||
for _pmi in _pkgutil.iter_modules(_pro_cli_pkg.__path__):
|
||||
_importlib.import_module(f"decnet.pro.cli.{_pmi.name}").register(app)
|
||||
|
||||
_gate_commands_by_mode(app)
|
||||
|
||||
# Backwards-compat re-exports. Tests and third-party tooling import these
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
@@ -1,14 +1,8 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""``decnet canary`` — HTTP + DNS callback receiver for canary tokens.
|
||||
|
||||
Two entry points share this module:
|
||||
|
||||
* ``decnet canary`` — runs the worker process. Mirrors the shape of
|
||||
:mod:`decnet.cli.webhook`. Invoked by the ``decnet-canary.service``
|
||||
systemd unit so its argv must stay stable.
|
||||
* ``decnet canary-install-toolchain`` — provisions the Node side of
|
||||
the fingerprint-canary obfuscator. Idempotent; safe to call from
|
||||
the API service unit's ``ExecStartPre``.
|
||||
Worker process. Mirrors the shape of :mod:`decnet.cli.webhook`: a
|
||||
``@app.command(name="canary")`` Typer entry point that delegates to
|
||||
:func:`decnet.canary.worker.run`.
|
||||
|
||||
Not master-only — any host that hosts deckies can run its own
|
||||
canary worker (the bus events stay local; the webhook worker on
|
||||
@@ -17,17 +11,11 @@ in ``development/let-s-move-to-the-enumerated-pike.md``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess # nosec B404 — npm exec is the whole point of the toolchain installer
|
||||
from pathlib import Path
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
_TOOLCHAIN_TIMEOUT_S = 180
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="canary")
|
||||
@@ -52,53 +40,3 @@ def register(app: typer.Typer) -> None:
|
||||
asyncio.run(run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Canary worker stopped.[/]")
|
||||
|
||||
@app.command(name="canary-install-toolchain")
|
||||
def canary_install_toolchain(
|
||||
npm_bin: str = typer.Option(
|
||||
"npm", "--npm-bin", help="Path to the npm executable. Defaults to PATH lookup.",
|
||||
),
|
||||
) -> None:
|
||||
"""Install the Node-side toolchain used by fingerprint canaries.
|
||||
|
||||
Runs ``npm install --omit=dev`` under the installed ``decnet/canary/``
|
||||
directory so the obfuscator's helper script can ``require()``
|
||||
``javascript-obfuscator`` at mint time. Requires Node >= 18.
|
||||
|
||||
Idempotent: re-running on an already-installed tree is fast
|
||||
(npm short-circuits when ``node_modules/`` is up-to-date).
|
||||
"""
|
||||
import decnet.canary as _canary_pkg
|
||||
canary_dir = Path(_canary_pkg.__file__).resolve().parent
|
||||
if not (canary_dir / "package.json").is_file():
|
||||
console.print(
|
||||
f"[red]canary package.json not found under {canary_dir}; "
|
||||
"wheel may be missing the JS toolchain payload.[/]"
|
||||
)
|
||||
raise typer.Exit(code=2)
|
||||
if shutil.which(npm_bin) is None:
|
||||
console.print(
|
||||
f"[red]npm executable {npm_bin!r} not found on PATH. "
|
||||
"Install Node >= 18 and re-run.[/]"
|
||||
)
|
||||
raise typer.Exit(code=2)
|
||||
console.print(
|
||||
f"[cyan]installing canary toolchain[/] in {canary_dir}",
|
||||
)
|
||||
try:
|
||||
proc = subprocess.run( # nosec B603 — argv-form, no shell, fixed cwd, npm_bin checked above
|
||||
[npm_bin, "install", "--omit=dev", "--no-fund", "--no-audit"],
|
||||
cwd=str(canary_dir),
|
||||
capture_output=True, text=True,
|
||||
timeout=_TOOLCHAIN_TIMEOUT_S, check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
console.print("[red]npm install timed out after 3 minutes[/]")
|
||||
raise typer.Exit(code=3) from None
|
||||
if proc.returncode != 0:
|
||||
console.print(
|
||||
f"[red]npm install failed rc={proc.returncode}[/]\n"
|
||||
f"{proc.stderr.strip()}"
|
||||
)
|
||||
raise typer.Exit(code=proc.returncode)
|
||||
console.print("[green]canary toolchain ready[/]")
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""``decnet fleet <name>`` — prefork supervisor (DECNET 1.2).
|
||||
|
||||
Imports the shared base floor ONCE in the master, then forks one child process
|
||||
per worker (see :mod:`decnet.prefork`). Children share the floor via copy-on-write
|
||||
(measured ~71 MB shared / ~1 MB private per idle child on CPython 3.14) while
|
||||
keeping their OWN process and GIL — unlike ``decnet supervise``, which co-hosts
|
||||
workers as asyncio tasks in one shared-GIL process.
|
||||
|
||||
Use ``fleet`` for workers that must stay process-isolated (heavy resident state,
|
||||
sustained CPU) but shouldn't each re-import the world; use ``supervise`` for cheap
|
||||
co-resident IO workers.
|
||||
|
||||
CONSOLIDATION COSTS (same shape as ``supervise``):
|
||||
* Forked children inherit the master's privileges — a fleet's systemd unit
|
||||
carries the UNION of its members' caps. So group by privilege profile, not
|
||||
convenience. The ``heavy`` fleet is DB-only (no docker socket, no raw net).
|
||||
* To share via CoW the master pre-imports each worker's module BEFORE forking,
|
||||
so its RSS is large — but that RSS is the shared floor, not per-child cost.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
_FLEETS = ("heavy",)
|
||||
|
||||
|
||||
def _build_fleet(name: str) -> dict:
|
||||
"""Return ``{worker_name: entry_thunk}`` for *name*.
|
||||
|
||||
Imports happen here, in the MASTER, before :func:`run_fleet` forks — that is
|
||||
what lets children share the imported code/objects via copy-on-write. Each
|
||||
thunk blocks running one worker; ``repo`` is initialized inside the child
|
||||
(post-fork) so every child opens its own pool, never a fork-inherited one.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
if name == "heavy":
|
||||
from decnet.profiler import attacker_profile_worker
|
||||
from decnet.ttp.worker import run_ttp_worker_loop
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
# Importing the worker modules here (in the master) is what lets children
|
||||
# share their code via CoW. Heavy per-worker runtime state (ATT&CK bundle,
|
||||
# ML) still loads lazily in each child — warming it in the master to share
|
||||
# it too is a future optimization, gated on a live RSS measurement showing
|
||||
# the big object graph actually CoW-shares rather than refcount-dirtying.
|
||||
def _profiler() -> None:
|
||||
async def _go() -> None:
|
||||
await repo.initialize()
|
||||
await attacker_profile_worker(repo, interval=60)
|
||||
asyncio.run(_go())
|
||||
|
||||
def _ttp() -> None:
|
||||
async def _go() -> None:
|
||||
await repo.initialize()
|
||||
await run_ttp_worker_loop(repo, poll_interval_secs=60.0)
|
||||
asyncio.run(_go())
|
||||
|
||||
return {"profiler": _profiler, "ttp": _ttp}
|
||||
|
||||
raise ValueError(f"unknown fleet: {name}")
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="fleet")
|
||||
def fleet_cmd(
|
||||
name: str = typer.Argument(
|
||||
..., help=f"Worker fleet to fork. One of: {', '.join(_FLEETS)}"
|
||||
),
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d", help="Detach to background as a daemon process"
|
||||
),
|
||||
) -> None:
|
||||
"""Prefork a worker fleet: shared base floor (CoW), one child process per worker."""
|
||||
from decnet.prefork import run_fleet
|
||||
|
||||
if name not in _FLEETS:
|
||||
console.print(
|
||||
f"[red]unknown fleet {name!r}; known fleets: {', '.join(_FLEETS)}[/]"
|
||||
)
|
||||
raise typer.Exit(2)
|
||||
|
||||
if daemon:
|
||||
log.info("fleet %s daemonizing", name)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("fleet %s starting", name)
|
||||
console.print(f"[bold cyan]Fleet starting[/] {name} (prefork)")
|
||||
specs = _build_fleet(name)
|
||||
run_fleet(specs)
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Role-based CLI gating.
|
||||
|
||||
MAINTAINERS: when you add a new Typer command (or add_typer group) that is
|
||||
@@ -31,10 +30,6 @@ MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
|
||||
"mutate", "listener", "profiler",
|
||||
"services", "distros", "correlate", "archetypes", "web",
|
||||
"db-reset", "init", "webhook", "clusterer", "campaign-clusterer",
|
||||
# `ttp` runs on agents — local SMTP decoys persist .eml files into the
|
||||
# agent's artifacts tree and the EmailLifter disk-reaches them in-process
|
||||
# (DEBT-047). `ttp-backfill` stays master-only: it walks the master DB.
|
||||
"ttp-backfill",
|
||||
})
|
||||
MASTER_ONLY_GROUPS: frozenset[str] = frozenset(
|
||||
{"swarm", "topology", "geoip", "realism"}
|
||||
@@ -70,7 +65,7 @@ def _gate_commands_by_mode(_app: typer.Typer) -> None:
|
||||
return
|
||||
_app.registered_commands = [
|
||||
c for c in _app.registered_commands
|
||||
if (c.name or (c.callback.__name__ if c.callback else "")) not in MASTER_ONLY_COMMANDS
|
||||
if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS
|
||||
]
|
||||
_app.registered_groups = [
|
||||
g for g in _app.registered_groups
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""GeoIP CLI — refresh and lookup subcommands (master-only).
|
||||
|
||||
Usage::
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
`decnet init` — one-shot master-host bootstrap.
|
||||
|
||||
@@ -45,12 +44,6 @@ _CONFIG_PLACEHOLDER = """\
|
||||
# EnvironmentFile= — never in a group-readable INI.
|
||||
|
||||
[decnet]
|
||||
# DECNET-service user/group as configured at `decnet init` time.
|
||||
# Resolved to a uid/gid on each host at deploy time via pwd.getpwnam,
|
||||
# so the same user name can have different numeric uids on master vs
|
||||
# agents without breaking artifact ownership.
|
||||
api-user = {api_user}
|
||||
api-group = {api_group}
|
||||
# mode = master # or "agent"
|
||||
|
||||
# [api]
|
||||
@@ -81,7 +74,6 @@ api-group = {api_group}
|
||||
# master-host = 10.0.0.1
|
||||
# syslog-port = 6514
|
||||
# swarmctl-port = 8770
|
||||
# swarmctl-host = 127.0.0.1
|
||||
|
||||
# [logging]
|
||||
# system-log = /var/log/decnet/decnet.system.log
|
||||
@@ -205,17 +197,14 @@ def _ensure_dir(
|
||||
return f"skip: {path} already present" if existed else "ok"
|
||||
|
||||
|
||||
def _ensure_config(
|
||||
path: Path, group: str, *, user: str, dry_run: bool,
|
||||
) -> str:
|
||||
def _ensure_config(path: Path, group: str, *, dry_run: bool) -> str:
|
||||
if path.exists():
|
||||
return f"skip: {path} already present"
|
||||
if dry_run:
|
||||
console.print(f" [dim]would write:[/] {path}")
|
||||
return "ok"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
rendered = _CONFIG_PLACEHOLDER.format(api_user=user, api_group=group)
|
||||
path.write_text(rendered)
|
||||
path.write_text(_CONFIG_PLACEHOLDER)
|
||||
try:
|
||||
os.chmod(path, 0o640)
|
||||
gid = grp.getgrnam(group).gr_gid
|
||||
@@ -612,7 +601,7 @@ def register(app: typer.Typer) -> None:
|
||||
# (Path("/"). / "/opt/decnet" == Path("/opt/decnet"), dropping pfx).
|
||||
_install_rel = install_dir.lstrip("/")
|
||||
|
||||
required_tools: tuple[str, ...] = ("systemctl",) if deinit else (
|
||||
required_tools = ("systemctl",) if deinit else (
|
||||
"systemctl", "useradd", "groupadd", "systemd-tmpfiles",
|
||||
)
|
||||
if deinit:
|
||||
@@ -669,7 +658,7 @@ def register(app: typer.Typer) -> None:
|
||||
)
|
||||
_step(
|
||||
"systemctl daemon-reload",
|
||||
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1], # type: ignore[func-returns-value]
|
||||
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
|
||||
)
|
||||
_step(
|
||||
f"remove {etc_decnet / 'decnet.ini'}",
|
||||
@@ -765,13 +754,6 @@ def register(app: typer.Typer) -> None:
|
||||
(pfx / _install_rel, 0o755, user, group),
|
||||
(pfx / "var/lib/decnet", 0o750, user, group),
|
||||
(pfx / "var/lib/decnet/geoip", 0o755, user, group),
|
||||
# DEBT-035 / DEBT-047: artifact root carries setgid (the
|
||||
# 0o2... bit) so every file written under it inherits the
|
||||
# decnet group regardless of which container's uid created
|
||||
# it. Group-write (0o2775) lets the API process and the
|
||||
# local TTP worker read each other's outputs without a
|
||||
# manual chown after every fresh deploy.
|
||||
(pfx / "var/lib/decnet/artifacts", 0o2775, user, group),
|
||||
(pfx / "var/log/decnet", 0o750, user, group),
|
||||
(etc_decnet, 0o755, "root", group),
|
||||
(pfx / "run/decnet", 0o755, "root", group),
|
||||
@@ -793,15 +775,12 @@ def register(app: typer.Typer) -> None:
|
||||
for path, mode, d_owner, d_group in dirs:
|
||||
_step(
|
||||
f"ensure dir {path}",
|
||||
lambda p=path, m=mode, o=d_owner, g=d_group: # type: ignore[misc]
|
||||
lambda p=path, m=mode, o=d_owner, g=d_group:
|
||||
_ensure_dir(p, mode=m, owner=o, group=g, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
f"write {etc_decnet / 'decnet.ini'}",
|
||||
lambda: _ensure_config(
|
||||
etc_decnet / "decnet.ini", group,
|
||||
user=user, dry_run=dry_run,
|
||||
),
|
||||
lambda: _ensure_config(etc_decnet / "decnet.ini", group, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
"install systemd units",
|
||||
@@ -833,7 +812,7 @@ def register(app: typer.Typer) -> None:
|
||||
)
|
||||
_step(
|
||||
"systemctl daemon-reload",
|
||||
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1], # type: ignore[func-returns-value]
|
||||
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
|
||||
)
|
||||
|
||||
if no_start:
|
||||
@@ -844,7 +823,7 @@ def register(app: typer.Typer) -> None:
|
||||
_step(
|
||||
"systemctl enable --now decnet.target",
|
||||
lambda: (
|
||||
_run( # type: ignore[func-returns-value]
|
||||
_run(
|
||||
["systemctl", "enable", "--now", "decnet.target"],
|
||||
dry_run=dry_run,
|
||||
),
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess # nosec B404
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""``decnet realism ...`` — content-engine maintenance commands.
|
||||
|
||||
After stage 5 of the realism migration, this is the only remaining
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
@@ -1,124 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""``decnet supervise <group>`` — host a co-resident group of workers in one
|
||||
process, paying the import floor (and the DB connection pool) once instead of
|
||||
once per worker. See ``development/RELEASE-1.1.md``.
|
||||
|
||||
Each worker keeps its own restart loop (see :mod:`decnet.supervisor`), so this
|
||||
trades per-worker systemd granularity for RAM — a worker can always be pulled
|
||||
back out to its own ``decnet <worker>`` unit by removing it from the group spec
|
||||
below; nothing about the worker's own code changes.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
# Groups are intentionally a small static registry, not config — the membership
|
||||
# is an architectural decision, not an operator knob.
|
||||
_GROUPS = ("batch", "cpu")
|
||||
|
||||
|
||||
async def _build_specs(group: str):
|
||||
"""Return ``[(name, factory), ...]`` for *group*, lazy-importing only the
|
||||
workers it hosts and initializing the shared ``repo`` once.
|
||||
|
||||
Factories return a fresh coroutine each call so :func:`supervise` can restart
|
||||
them. Intervals match the standalone units' defaults.
|
||||
# ponytail: defaults hardcoded to match the per-worker units; add CLI knobs
|
||||
# only if an operator actually needs to retune a consolidated group.
|
||||
"""
|
||||
if group == "batch":
|
||||
from decnet.fleet.reconciler_worker import fleet_reconciler_worker
|
||||
from decnet.intel.worker import run_intel_loop
|
||||
from decnet.mutator import run_watch_loop
|
||||
from decnet.orchestrator import orchestrator_worker
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
await repo.initialize() # shared by every batch worker → one DB pool
|
||||
return [
|
||||
("reconcile", lambda: fleet_reconciler_worker(repo, interval=30)),
|
||||
("enrich", lambda: run_intel_loop(repo, poll_interval_secs=60.0, ttl_hours=24)),
|
||||
("orchestrate", lambda: orchestrator_worker(repo, interval=60, llm_enabled=None)),
|
||||
("mutate", lambda: run_watch_loop(repo)),
|
||||
]
|
||||
if group == "cpu":
|
||||
from decnet.cli.gating import _require_master_mode
|
||||
from decnet.clustering.campaign.worker import run_campaign_clusterer_loop
|
||||
from decnet.clustering.worker import run_clusterer_loop
|
||||
from decnet.correlation.attribution_worker import run_attribution_loop
|
||||
from decnet.correlation.reuse_worker import run_reuse_loop
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
_require_master_mode("supervise cpu")
|
||||
await repo.initialize() # shared by every cpu worker → one DB pool
|
||||
return [
|
||||
("clusterer", lambda: run_clusterer_loop(repo, poll_interval_secs=60.0)),
|
||||
("campaign-clusterer", lambda: run_campaign_clusterer_loop(repo, poll_interval_secs=60.0)),
|
||||
("attribution", lambda: run_attribution_loop(repo, multi_actor_tick_secs=60.0)),
|
||||
("reuse-correlate", lambda: run_reuse_loop(repo, poll_interval_secs=60.0, min_targets=2)),
|
||||
]
|
||||
raise ValueError(f"unknown supervise group: {group}")
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="supervise")
|
||||
def supervise_cmd(
|
||||
group: str = typer.Argument(
|
||||
..., help=f"Worker group to host. One of: {', '.join(_GROUPS)}"
|
||||
),
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d", help="Detach to background as a daemon process"
|
||||
),
|
||||
) -> None:
|
||||
"""Host a co-resident worker group in one process (shared import floor + DB pool)."""
|
||||
import asyncio
|
||||
|
||||
from decnet.supervisor import run_group
|
||||
|
||||
if group not in _GROUPS:
|
||||
console.print(
|
||||
f"[red]unknown group {group!r}; known groups: {', '.join(_GROUPS)}[/]"
|
||||
)
|
||||
raise typer.Exit(2)
|
||||
|
||||
if daemon:
|
||||
log.info("supervise %s daemonizing", group)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("supervise group=%s starting", group)
|
||||
console.print(f"[bold cyan]Supervisor starting[/] group={group}")
|
||||
|
||||
async def _run() -> None:
|
||||
pool = None
|
||||
if group == "cpu":
|
||||
# The CPU workers offload their O(n^2) connected-components
|
||||
# kernels to ONE shared pool so they run in parallel instead of
|
||||
# serialising under the GIL. forkserver (not the default fork):
|
||||
# this process is multithreaded via bus clients, and forking a
|
||||
# multithreaded process is unsafe.
|
||||
import multiprocessing as _mp
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
|
||||
from decnet import offload
|
||||
|
||||
pool = ProcessPoolExecutor(
|
||||
max_workers=2, mp_context=_mp.get_context("forkserver")
|
||||
)
|
||||
offload.set_executor(pool)
|
||||
log.info("supervise cpu: kernel offload pool ready (max_workers=2)")
|
||||
try:
|
||||
specs = await _build_specs(group)
|
||||
await run_group(specs)
|
||||
finally:
|
||||
if pool is not None:
|
||||
from decnet import offload
|
||||
|
||||
offload.set_executor(None)
|
||||
pool.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Supervisor stopped.[/]")
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user