Compare commits
1 Commits
dev
...
e8d97281f7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e8d97281f7 |
@@ -2,7 +2,7 @@ name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [dev, testing]
|
||||
branches: [dev, testing, "temp/merge-*"]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
@@ -11,19 +11,17 @@ jobs:
|
||||
lint:
|
||||
name: Lint (ruff)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- run: pip install ruff
|
||||
- run: ruff check decnet/
|
||||
- run: ruff check .
|
||||
|
||||
bandit:
|
||||
name: SAST (bandit)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
@@ -35,7 +33,6 @@ jobs:
|
||||
pip-audit:
|
||||
name: Dependency audit (pip-audit)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
@@ -43,33 +40,12 @@ jobs:
|
||||
python-version: "3.11"
|
||||
- run: pip install pip-audit
|
||||
- run: pip install -e .[dev]
|
||||
- run: pip-audit --skip-editable --ignore-vuln CVE-2025-65896 --ignore-vuln CVE-2026-3219
|
||||
|
||||
merge-to-testing:
|
||||
name: Merge dev → testing
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint, bandit, pip-audit]
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge dev into testing
|
||||
run: |
|
||||
git fetch origin testing
|
||||
git checkout testing
|
||||
git merge origin/dev --no-ff -m "ci: auto-merge dev → testing"
|
||||
git push origin testing
|
||||
- run: pip-audit --skip-editable --ignore-vuln CVE-2025-65896
|
||||
|
||||
test-standard:
|
||||
name: Test (Standard)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/testing'
|
||||
needs: [lint, bandit, pip-audit]
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11"]
|
||||
@@ -84,24 +60,23 @@ jobs:
|
||||
test-live:
|
||||
name: Test (Live)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/testing'
|
||||
needs: [test-standard]
|
||||
services:
|
||||
mysql:
|
||||
image: mysql:8.0
|
||||
env:
|
||||
MYSQL_ROOT_PASSWORD: root
|
||||
MYSQL_DATABASE: decnet_test
|
||||
ports:
|
||||
- 3307:3306
|
||||
options: >-
|
||||
--health-cmd="mysqladmin ping -h 127.0.0.1"
|
||||
--health-interval=10s
|
||||
--health-timeout=5s
|
||||
--health-retries=5
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11"]
|
||||
services:
|
||||
mysql:
|
||||
image: mysql:8.0
|
||||
env:
|
||||
MYSQL_ROOT_PASSWORD: root
|
||||
MYSQL_DATABASE: decnet_test
|
||||
ports:
|
||||
- 3307:3306
|
||||
options: >-
|
||||
--health-cmd="mysqladmin ping -h 127.0.0.1"
|
||||
--health-interval=10s
|
||||
--health-timeout=5s
|
||||
--health-retries=5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
@@ -116,10 +91,48 @@ jobs:
|
||||
DECNET_MYSQL_PASSWORD: root
|
||||
DECNET_MYSQL_DATABASE: decnet_test
|
||||
|
||||
merge-to-main:
|
||||
name: Merge testing → main
|
||||
test-fuzz:
|
||||
name: Test (Fuzz)
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live]
|
||||
needs: [test-live]
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- run: pip install -e .[dev]
|
||||
- run: pytest -m fuzz
|
||||
env:
|
||||
SCHEMATHESIS_CONFIG: schemathesis.ci.toml
|
||||
|
||||
merge-to-testing:
|
||||
name: Merge dev → testing
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live, test-fuzz]
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge dev into testing
|
||||
run: |
|
||||
git fetch origin testing
|
||||
git checkout testing
|
||||
git merge origin/dev --no-ff -m "ci: auto-merge dev → testing [skip ci]"
|
||||
git push origin testing
|
||||
|
||||
prepare-merge-to-main:
|
||||
name: Prepare Merge to Main
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live, test-fuzz]
|
||||
if: github.ref == 'refs/heads/testing'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -130,12 +143,33 @@ jobs:
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge testing into main
|
||||
- name: Create temp branch and sync with main
|
||||
run: |
|
||||
git fetch origin main
|
||||
git checkout -b temp/merge-testing-to-main
|
||||
echo "--- Switched to temp branch, merging main into it ---"
|
||||
git merge origin/main --no-edit || { echo "CONFLICT: Manual resolution required"; exit 1; }
|
||||
git push origin temp/merge-testing-to-main --force
|
||||
|
||||
finalize-merge-to-main:
|
||||
name: Finalize Merge to Main
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-standard, test-live, test-fuzz]
|
||||
if: startsWith(github.ref, 'refs/heads/temp/merge-')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.DECNET_PR_TOKEN }}
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "DECNET CI"
|
||||
git config user.email "ci@decnet.local"
|
||||
- name: Merge RC into main
|
||||
run: |
|
||||
git fetch origin main
|
||||
git checkout main
|
||||
git merge origin/testing --no-ff -m "ci: auto-merge testing → main" || {
|
||||
echo "CONFLICT: testing and main have diverged — manual resolution required"
|
||||
exit 1
|
||||
}
|
||||
git merge ${{ github.ref }} --no-ff -m "ci: auto-merge testing → main"
|
||||
git push origin main
|
||||
echo "--- Cleaning up temp branch ---"
|
||||
git push origin --delete ${{ github.ref_name }}
|
||||
|
||||
43
.gitignore
vendored
43
.gitignore
vendored
@@ -1,8 +1,4 @@
|
||||
.venv/
|
||||
.venv*/
|
||||
docker-compose.yaml
|
||||
.311/
|
||||
.3[0-9][0-9]/
|
||||
logs/
|
||||
.claude/*
|
||||
CLAUDE.md
|
||||
@@ -13,10 +9,6 @@ __pycache__/
|
||||
dist/
|
||||
build/
|
||||
decnet-compose.yml
|
||||
# Per-topology compose fragments emitted by `decnet topology deploy`.
|
||||
decnet-topology-*-compose.yml
|
||||
# Docker build context cache.
|
||||
.docker/
|
||||
decnet-state.json
|
||||
*.ini
|
||||
decnet.log*
|
||||
@@ -29,9 +21,6 @@ windows1
|
||||
*.db-shm
|
||||
*.db-wal
|
||||
decnet.*.log
|
||||
# Rotated copies (logrotate appends .1, .2, .gz...) — the existing
|
||||
# decnet.*.log glob doesn't catch the suffix.
|
||||
decnet.*.log.*
|
||||
decnet.json
|
||||
.env*
|
||||
.env.local
|
||||
@@ -39,35 +28,3 @@ decnet.json
|
||||
.hypothesis/
|
||||
profiles/*
|
||||
tests/test_decnet.db*
|
||||
|
||||
# Nested git clone of the wiki — not a submodule, just a local
|
||||
# working copy so we can edit docs without a full round-trip.
|
||||
wiki-checkout/
|
||||
|
||||
# Scratch test/debug outputs that leak from saved `pytest > hang.log`
|
||||
# or `pytest > schem` redirections.
|
||||
hang.log
|
||||
schem
|
||||
*.pytest.log
|
||||
|
||||
# pydeps-style dependency graph dumps from local analysis runs.
|
||||
deps.txt
|
||||
|
||||
# Node modules vendored under decnet/canary/ for the obfuscator helper.
|
||||
# The package.json is the source of truth; modules are reinstalled at
|
||||
# build/deploy time.
|
||||
node_modules/
|
||||
package-lock.json
|
||||
|
||||
# TTP rule-precision corpus pulled from prod sqlite. Real attacker
|
||||
# payloads — operator-only artifact. The synthetic ``seed_*.jsonl``
|
||||
# files alongside ARE committed and exercise the harness in CI.
|
||||
tests/ttp/rule_precision/corpus/*.jsonl
|
||||
tests/ttp/rule_precision/corpus/seed_*.jsonl
|
||||
threatfox-api.json
|
||||
|
||||
# MITRE ATT&CK STIX bundle — 50 MB, fetched at runtime via attack_stix.py
|
||||
enterprise-attack-*.json
|
||||
|
||||
# pytest failure dump files
|
||||
testfail
|
||||
|
||||
17
COPYRIGHT
17
COPYRIGHT
@@ -1,17 +0,0 @@
|
||||
DECNET - Deception Network
|
||||
Copyright (C) 2026 Samuel Paschuan <samsam70000@gmail.com>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public
|
||||
License along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
661
LICENSE
661
LICENSE
@@ -1,661 +0,0 @@
|
||||
GNU AFFERO GENERAL PUBLIC LICENSE
|
||||
Version 3, 19 November 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU Affero General Public License is a free, copyleft license for
|
||||
software and other kinds of works, specifically designed to ensure
|
||||
cooperation with the community in the case of network server software.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
our General Public Licenses are intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
Developers that use our General Public Licenses protect your rights
|
||||
with two steps: (1) assert copyright on the software, and (2) offer
|
||||
you this License which gives you legal permission to copy, distribute
|
||||
and/or modify the software.
|
||||
|
||||
A secondary benefit of defending all users' freedom is that
|
||||
improvements made in alternate versions of the program, if they
|
||||
receive widespread use, become available for other developers to
|
||||
incorporate. Many developers of free software are heartened and
|
||||
encouraged by the resulting cooperation. However, in the case of
|
||||
software used on network servers, this result may fail to come about.
|
||||
The GNU General Public License permits making a modified version and
|
||||
letting the public access it on a server without ever releasing its
|
||||
source code to the public.
|
||||
|
||||
The GNU Affero General Public License is designed specifically to
|
||||
ensure that, in such cases, the modified source code becomes available
|
||||
to the community. It requires the operator of a network server to
|
||||
provide the source code of the modified version running there to the
|
||||
users of that server. Therefore, public use of a modified version, on
|
||||
a publicly accessible server, gives the public access to the source
|
||||
code of the modified version.
|
||||
|
||||
An older license, called the Affero General Public License and
|
||||
published by Affero, was designed to accomplish similar goals. This is
|
||||
a different license, not a version of the Affero GPL, but Affero has
|
||||
released a new version of the Affero GPL which permits relicensing under
|
||||
this license.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU Affero General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Remote Network Interaction; Use with the GNU General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, if you modify the
|
||||
Program, your modified version must prominently offer all users
|
||||
interacting with it remotely through a computer network (if your version
|
||||
supports such interaction) an opportunity to receive the Corresponding
|
||||
Source of your version by providing access to the Corresponding Source
|
||||
from a network server at no charge, through some standard or customary
|
||||
means of facilitating copying of software. This Corresponding Source
|
||||
shall include the Corresponding Source for any work covered by version 3
|
||||
of the GNU General Public License that is incorporated pursuant to the
|
||||
following paragraph.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the work with which it is combined will remain governed by version
|
||||
3 of the GNU General Public License.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU Affero General Public License from time to time. Such new versions
|
||||
will be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU Affero General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU Affero General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU Affero General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If your software can interact with users remotely through a computer
|
||||
network, you should also make sure that it provides a way for users to
|
||||
get its source. For example, if your program is a web application, its
|
||||
interface could display a "Source" link that leads users to an archive
|
||||
of the code. There are many ways you could offer source, and different
|
||||
solutions will be better for different programs; see section 13 for the
|
||||
specific requirements.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU AGPL, see
|
||||
<https://www.gnu.org/licenses/>.
|
||||
261
Makefile
261
Makefile
@@ -1,261 +0,0 @@
|
||||
PYTEST := .311/bin/pytest
|
||||
FAIL_FAST ?= 1
|
||||
NO_CACHE ?= 0
|
||||
ARGS :=
|
||||
|
||||
# addopts in pyproject.toml already provides -v -q -x -n 4 --dist load.
|
||||
# Unit suites inherit that; special suites clear it with --override-ini.
|
||||
UNIT_FLAGS := --timeout=30 --timeout-method=thread
|
||||
SEQ_FLAGS := --override-ini="addopts=-v -x" -n logical --timeout=120 --timeout-method=thread
|
||||
FUZZ_FLAGS := --override-ini="addopts=-v -x" -n logical -m fuzz \
|
||||
--ignore=tests/api/test_schemathesis.py \
|
||||
--ignore=tests/api/test_schemathesis_agent.py \
|
||||
--ignore=tests/api/test_schemathesis_swarm.py \
|
||||
--ignore=tests/api/test_schemathesis_ttp.py
|
||||
SCHEMA_QUICK ?= 0
|
||||
SCHEMA_FLAGS := --override-ini="addopts=-v -x" -n 4 -m fuzz --timeout=600 --timeout-method=thread
|
||||
BENCH_FLAGS := --override-ini="addopts=-v" -p no:xdist --benchmark-only -m bench
|
||||
|
||||
# ── Unit suites (xdist, 30s timeout) ─────────────────────────────────────────
|
||||
|
||||
.PHONY: test-core
|
||||
test-core:
|
||||
$(PYTEST) tests/core tests/config tests/factories tests/fixtures $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-web
|
||||
test-web:
|
||||
$(PYTEST) tests/web tests/services $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-db
|
||||
test-db:
|
||||
$(PYTEST) tests/db tests/vectorstore $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-bus
|
||||
test-bus:
|
||||
$(PYTEST) tests/bus tests/logging tests/telemetry $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-ttp
|
||||
test-ttp:
|
||||
$(PYTEST) tests/ttp $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-intel
|
||||
test-intel:
|
||||
$(PYTEST) tests/intel tests/asn tests/geoip $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-analysis
|
||||
test-analysis:
|
||||
$(PYTEST) tests/clustering tests/correlation $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-infra
|
||||
test-infra:
|
||||
$(PYTEST) tests/agent tests/collector tests/sniffer tests/profiler $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-fleet
|
||||
test-fleet:
|
||||
$(PYTEST) tests/fleet tests/swarm tests/topology tests/orchestrator tests/deploy tests/updater $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-cli
|
||||
test-cli:
|
||||
$(PYTEST) tests/cli tests/engine tests/mutator tests/realism $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-features
|
||||
test-features:
|
||||
$(PYTEST) tests/canary tests/artifacts tests/webhook tests/decky_io tests/prober $(UNIT_FLAGS) $(ARGS)
|
||||
|
||||
# ── Go and React suites ───────────────────────────────────────────────────────
|
||||
|
||||
_GO_MODULES := \
|
||||
decnet/templates/_caddy_modules/decnetfp \
|
||||
decnet/templates/http/_caddy_modules/decnetfp \
|
||||
decnet/templates/https/_caddy_modules/decnetfp
|
||||
|
||||
.PHONY: test-go
|
||||
test-go:
|
||||
@failed=""; \
|
||||
for mod in $(_GO_MODULES); do \
|
||||
echo "=== go test: $$mod ==="; \
|
||||
if (cd "$$mod" && go test ./...); then \
|
||||
echo "[PASS] $$mod"; \
|
||||
else \
|
||||
echo "[FAIL] $$mod"; \
|
||||
failed="$$failed $$mod"; \
|
||||
if [ "$(FAIL_FAST)" = "1" ]; then exit 1; fi; \
|
||||
fi; \
|
||||
done; \
|
||||
[ -z "$$failed" ]
|
||||
|
||||
.PHONY: test-react
|
||||
test-react:
|
||||
cd decnet_web && npm run test:run $(ARGS)
|
||||
|
||||
# ── Special suites (sequential, longer timeout) ───────────────────────────────
|
||||
|
||||
.PHONY: test-live
|
||||
test-live:
|
||||
$(PYTEST) tests/live -m live $(SEQ_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-api
|
||||
test-api:
|
||||
$(PYTEST) tests/api $(SEQ_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-stress
|
||||
test-stress:
|
||||
$(PYTEST) tests/stress -m stress $(SEQ_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-service
|
||||
test-service:
|
||||
$(PYTEST) tests/service_testing $(SEQ_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-fuzz
|
||||
test-fuzz:
|
||||
$(PYTEST) $(FUZZ_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-schema
|
||||
test-schema:
|
||||
SCHEMA_QUICK=$(SCHEMA_QUICK) $(PYTEST) \
|
||||
tests/api/test_schemathesis.py \
|
||||
tests/api/test_schemathesis_agent.py \
|
||||
tests/api/test_schemathesis_swarm.py \
|
||||
tests/api/test_schemathesis_ttp.py \
|
||||
$(SCHEMA_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-bench
|
||||
test-bench:
|
||||
$(PYTEST) tests/perf $(BENCH_FLAGS) $(ARGS)
|
||||
|
||||
.PHONY: test-docker
|
||||
test-docker:
|
||||
DECNET_LIVE_DOCKER=1 $(PYTEST) tests/docker -m docker $(SEQ_FLAGS) $(ARGS)
|
||||
|
||||
# ── Static analysis ───────────────────────────────────────────────────────────
|
||||
|
||||
.PHONY: test-mypy
|
||||
test-mypy:
|
||||
.311/bin/mypy decnet --ignore-missing-imports --no-error-summary
|
||||
|
||||
.PHONY: test-bandit
|
||||
test-bandit:
|
||||
.311/bin/bandit -r decnet -c pyproject.toml
|
||||
|
||||
.PHONY: test-vulture
|
||||
test-vulture:
|
||||
.311/bin/vulture decnet --min-confidence 80
|
||||
|
||||
.PHONY: test-pip-audit
|
||||
test-pip-audit:
|
||||
.311/bin/pip-audit
|
||||
|
||||
# ── Composite: all suites ─────────────────────────────────────────────────────
|
||||
|
||||
_ALL_SUITES := core web db bus ttp intel analysis infra fleet cli features \
|
||||
go react \
|
||||
live api schema stress service fuzz bench docker \
|
||||
mypy bandit vulture pip-audit
|
||||
|
||||
.PHONY: test-all test
|
||||
test-all test:
|
||||
@failed=""; \
|
||||
for suite in $(_ALL_SUITES); do \
|
||||
echo ""; \
|
||||
echo "══════════════════════════ $$suite ══════════════════════════"; \
|
||||
if $(MAKE) --no-print-directory test-$$suite ARGS="$(ARGS)"; then \
|
||||
echo "[PASS] $$suite"; \
|
||||
else \
|
||||
echo "[FAIL] $$suite"; \
|
||||
failed="$$failed $$suite"; \
|
||||
if [ "$(FAIL_FAST)" = "1" ]; then \
|
||||
echo "Stopping at first failure. Use FAIL_FAST=0 to run all suites."; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
fi; \
|
||||
done; \
|
||||
if [ -n "$$failed" ]; then \
|
||||
echo ""; \
|
||||
echo "Failed:$$failed"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
echo ""; \
|
||||
echo "All suites passed."
|
||||
|
||||
# ── Decky image pre-build ─────────────────────────────────────────────────────
|
||||
|
||||
_DECKY_TEMPLATES := \
|
||||
conpot docker_api elasticsearch ftp http https imap k8s ldap \
|
||||
llmnr mongodb mqtt mssql mysql pop3 postgres rdp redis sip smb smtp \
|
||||
sniffer snmp ssh telnet tftp vnc
|
||||
|
||||
.PHONY: build-all
|
||||
build-all:
|
||||
@failed=""; \
|
||||
for svc in $(_DECKY_TEMPLATES); do \
|
||||
echo ""; \
|
||||
echo "══════════════════════════ $$svc ══════════════════════════"; \
|
||||
_nc=""; \
|
||||
if [ "$(NO_CACHE)" = "1" ]; then _nc="--no-cache"; fi; \
|
||||
if DOCKER_BUILDKIT=1 docker build $$_nc \
|
||||
-t decnet/$$svc:latest \
|
||||
decnet/templates/$$svc; then \
|
||||
echo "[BUILT] $$svc"; \
|
||||
else \
|
||||
echo "[FAIL] $$svc"; \
|
||||
failed="$$failed $$svc"; \
|
||||
if [ "$(FAIL_FAST)" = "1" ]; then \
|
||||
echo "Stopping at first failure. Use FAIL_FAST=0 to build all."; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
fi; \
|
||||
done; \
|
||||
if [ -n "$$failed" ]; then \
|
||||
echo ""; \
|
||||
echo "Failed:$$failed"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
echo ""; \
|
||||
echo "All decky images built."
|
||||
|
||||
.PHONY: help
|
||||
help:
|
||||
@echo "Unit suites (xdist, 30s timeout):"
|
||||
@echo " make test-core tests/core + config + factories + fixtures"
|
||||
@echo " make test-web tests/web + services"
|
||||
@echo " make test-db tests/db + vectorstore"
|
||||
@echo " make test-bus tests/bus + logging + telemetry"
|
||||
@echo " make test-ttp tests/ttp"
|
||||
@echo " make test-intel tests/intel + asn + geoip"
|
||||
@echo " make test-analysis tests/clustering + correlation"
|
||||
@echo " make test-infra tests/agent + collector + sniffer + profiler"
|
||||
@echo " make test-fleet tests/fleet + swarm + topology + orchestrator + deploy + updater"
|
||||
@echo " make test-cli tests/cli + engine + mutator + realism"
|
||||
@echo " make test-features tests/canary + artifacts + webhook + decky_io + prober"
|
||||
@echo ""
|
||||
@echo "Go / React suites:"
|
||||
@echo " make test-go go test ./... in each Caddy module variant"
|
||||
@echo " make test-react vitest run in decnet_web"
|
||||
@echo ""
|
||||
@echo "Special suites (sequential, 120s timeout):"
|
||||
@echo " make test-live tests/live"
|
||||
@echo " make test-api tests/api (schemathesis)"
|
||||
@echo " make test-stress tests/stress"
|
||||
@echo " make test-service tests/service_testing"
|
||||
@echo " make test-schema schemathesis contract tests (-m fuzz, xdist logical)"
|
||||
@echo " make test-schema SCHEMA_QUICK=1 same, capped at 100 examples per test"
|
||||
@echo " make test-fuzz hypothesis fuzz (all normal dirs, -m fuzz, skips schemathesis files)"
|
||||
@echo " make test-bench tests/perf"
|
||||
@echo " make test-docker tests/docker (needs DECNET_LIVE_DOCKER=1)"
|
||||
@echo ""
|
||||
@echo "Static analysis:"
|
||||
@echo " make test-mypy mypy type check on decnet/"
|
||||
@echo " make test-bandit bandit security scan on decnet/"
|
||||
@echo " make test-vulture vulture dead code scan (>=80% confidence)"
|
||||
@echo " make test-pip-audit pip-audit dependency vulnerability scan"
|
||||
@echo ""
|
||||
@echo "Composites:"
|
||||
@echo " make test-all ALL suites (unit + go + react + live + api + schema + fuzz + bench + stress + docker + static analysis)"
|
||||
@echo " make test-all FAIL_FAST=0 same, report all failures instead of stopping"
|
||||
@echo ""
|
||||
@echo "Passthrough: make test-web ARGS='--lf -s'"
|
||||
@echo ""
|
||||
@echo "Decky images:"
|
||||
@echo " make build-all build decnet/<svc>:latest for all 27 decky templates"
|
||||
@echo " make build-all NO_CACHE=1 same, bypassing Docker layer cache"
|
||||
@echo " make build-all FAIL_FAST=0 same, continue past failures"
|
||||
@@ -1,5 +0,0 @@
|
||||
# bait/
|
||||
|
||||
Default operator-supplied email seed for IMAP/POP3 deckies. Drop `*.eml` and/or `*.json` files here; the IMAP/POP3 services bind-mount this dir read-only at `/var/spool/decnet-emails/seed` when no per-decky `email_seed` is configured. Entries concatenate onto the hardcoded bait baseline (additive to realism-engine output, never replacing).
|
||||
|
||||
JSON shape: list of dicts with required `from_addr`, `to_addr`, `subject`, `body`; optional `from_name`, `date`, `flags`. See `decnet/templates/imap/server.py` for the loader.
|
||||
BIN
decnet.tar
BIN
decnet.tar
Binary file not shown.
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""DECNET — honeypot deception-network framework.
|
||||
|
||||
This __init__ runs once, on the first `import decnet.*`. It seeds
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""DECNET worker agent — runs on every SWARM worker host.
|
||||
|
||||
Exposes an mTLS-protected FastAPI service the master's SWARM controller
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Worker-side FastAPI app.
|
||||
|
||||
Protected by mTLS at the ASGI/uvicorn transport layer: uvicorn is started
|
||||
@@ -19,139 +18,29 @@ Endpoints mirror the existing unihost CLI verbs:
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import pathlib
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any, Optional
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
import contextlib
|
||||
|
||||
from decnet.agent import executor as _exec
|
||||
from decnet.agent import heartbeat as _heartbeat
|
||||
from decnet.agent import topology_ops as _topology_ops
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.bus.publish import run_health_heartbeat
|
||||
from decnet.swarm.pki import DEFAULT_AGENT_DIR
|
||||
from decnet.agent.topology_store import AlreadyApplied, TopologyStore
|
||||
from decnet.config import DecnetConfig
|
||||
from decnet.logging import get_logger
|
||||
from decnet.topology.validate import ValidationError
|
||||
|
||||
log = get_logger("agent.app")
|
||||
|
||||
|
||||
def _resolve_agent_dir() -> pathlib.Path:
|
||||
env = os.environ.get("DECNET_AGENT_DIR")
|
||||
if env:
|
||||
return pathlib.Path(env)
|
||||
system = pathlib.Path("/etc/decnet/agent")
|
||||
if system.exists():
|
||||
return system
|
||||
return DEFAULT_AGENT_DIR
|
||||
|
||||
|
||||
# Module-level singleton. Created lazily on first use so tests can
|
||||
# monkeypatch DECNET_AGENT_DIR before the store binds to a path.
|
||||
_topology_store: Optional[TopologyStore] = None
|
||||
|
||||
|
||||
def _store() -> TopologyStore:
|
||||
global _topology_store
|
||||
if _topology_store is None:
|
||||
_topology_store = TopologyStore(_resolve_agent_dir() / "topology.db")
|
||||
return _topology_store
|
||||
|
||||
|
||||
_collector_task: Optional[asyncio.Task] = None
|
||||
|
||||
|
||||
def _ensure_collector_started() -> None:
|
||||
"""Spawn the log collector on demand — called from /topology/apply
|
||||
after a successful materialise. We must NOT start this in the
|
||||
lifespan hook: the agent's boot invariant is "never touch docker
|
||||
until master tells us to" (see tests/swarm/test_agent_no_auto_restore.py).
|
||||
|
||||
The collector watches ``decnet.topology.service=true`` labels via
|
||||
docker events, writing RFC 5424 lines to ``DECNET_AGENT_LOG_FILE``
|
||||
which the forwarder ships to the master over syslog-TLS. Idempotent:
|
||||
subsequent calls while the task is still running are no-ops.
|
||||
"""
|
||||
global _collector_task
|
||||
if _collector_task is not None and not _collector_task.done():
|
||||
return
|
||||
from decnet.env import DECNET_AGENT_LOG_FILE
|
||||
|
||||
try:
|
||||
from decnet.collector.worker import log_collector_worker
|
||||
except Exception: # noqa: BLE001 — docker may be unavailable on dev
|
||||
log.warning(
|
||||
"agent log collector not starting — collector worker import failed",
|
||||
exc_info=True,
|
||||
)
|
||||
return
|
||||
_collector_task = asyncio.create_task(
|
||||
log_collector_worker(DECNET_AGENT_LOG_FILE),
|
||||
name="agent-log-collector",
|
||||
)
|
||||
log.info("agent log collector started log_file=%s", DECNET_AGENT_LOG_FILE)
|
||||
|
||||
|
||||
_bus_heartbeat_task: Optional[asyncio.Task] = None
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _lifespan(app: FastAPI):
|
||||
# Best-effort: if identity/bundle plumbing isn't configured (e.g. dev
|
||||
# runs or non-enrolled hosts), heartbeat.start() is a silent no-op.
|
||||
_heartbeat.start()
|
||||
|
||||
# Host-local bus heartbeat (system.agent.health). Separate channel
|
||||
# from the mTLS master-facing heartbeat above; this one lets peers on
|
||||
# the same host (dashboard, updater) see the agent is alive without
|
||||
# hitting its HTTPS endpoint. Bus-disabled path is a no-op loop.
|
||||
bus = None
|
||||
try:
|
||||
bus = get_bus(client_name="agent")
|
||||
await bus.connect()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning("agent: bus unavailable, skipping health heartbeat: %s", exc)
|
||||
bus = None
|
||||
|
||||
global _bus_heartbeat_task
|
||||
_bus_heartbeat_task = asyncio.create_task(
|
||||
run_health_heartbeat(bus, "agent"),
|
||||
name="agent-bus-heartbeat",
|
||||
)
|
||||
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
await _heartbeat.stop()
|
||||
if _bus_heartbeat_task is not None:
|
||||
_bus_heartbeat_task.cancel()
|
||||
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||
await _bus_heartbeat_task
|
||||
_bus_heartbeat_task = None
|
||||
if bus is not None:
|
||||
with contextlib.suppress(Exception):
|
||||
await bus.close()
|
||||
global _collector_task
|
||||
if _collector_task is not None and not _collector_task.done():
|
||||
_collector_task.cancel()
|
||||
try:
|
||||
await _collector_task
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
_collector_task = None
|
||||
global _topology_store
|
||||
if _topology_store is not None:
|
||||
_topology_store.close()
|
||||
_topology_store = None
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
@@ -183,7 +72,6 @@ class TeardownRequest(BaseModel):
|
||||
class MutateRequest(BaseModel):
|
||||
decky_id: str
|
||||
services: list[str]
|
||||
dry_run: bool = False
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ routes
|
||||
@@ -200,22 +88,15 @@ async def status() -> dict:
|
||||
|
||||
@app.post(
|
||||
"/deploy",
|
||||
status_code=202,
|
||||
responses={202: {"description": "Deploy accepted; runs in background; lifecycle deltas pushed via heartbeat"}},
|
||||
responses={500: {"description": "Deployer raised an exception materialising the config"}},
|
||||
)
|
||||
async def deploy(req: DeployRequest) -> dict:
|
||||
"""Spawn the deploy in the background and return 202 immediately.
|
||||
|
||||
The master tracks per-decky completion via lifecycle deltas pushed on
|
||||
the next heartbeat (one immediate push on completion, plus the
|
||||
scheduled 30 s ticks as a fallback). Holding the request open across
|
||||
a multi-minute compose build was the previous source of the wizard
|
||||
API-hang."""
|
||||
asyncio.create_task(
|
||||
_exec.deploy_async(req.config, dry_run=req.dry_run, no_cache=req.no_cache),
|
||||
name=f"deploy-{id(req)}",
|
||||
)
|
||||
return {"status": "accepted", "deckies": [d.name for d in req.config.deckies]}
|
||||
try:
|
||||
await _exec.deploy(req.config, dry_run=req.dry_run, no_cache=req.no_cache)
|
||||
except Exception as exc:
|
||||
log.exception("agent.deploy failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
return {"status": "deployed", "deckies": len(req.config.deckies)}
|
||||
|
||||
|
||||
@app.post(
|
||||
@@ -248,119 +129,16 @@ async def self_destruct() -> dict:
|
||||
return {"status": "self_destruct_scheduled"}
|
||||
|
||||
|
||||
# ------------------------------------------------------- topology endpoints
|
||||
|
||||
|
||||
class ApplyTopologyRequest(BaseModel):
|
||||
hydrated: dict[str, Any] = Field(
|
||||
..., description="Hydrated topology dict from master.persistence.hydrate()"
|
||||
)
|
||||
version_hash: str = Field(
|
||||
..., description="Master's canonical_hash(hydrated); must match ours"
|
||||
)
|
||||
|
||||
|
||||
class TeardownTopologyRequest(BaseModel):
|
||||
topology_id: str = Field(..., description="Topology UUID to dismantle")
|
||||
|
||||
|
||||
@app.post(
|
||||
"/topology/apply",
|
||||
responses={
|
||||
400: {"description": "Malformed hydrated topology or hash mismatch"},
|
||||
409: {"description": "A different topology is already applied"},
|
||||
500: {"description": "Docker or compose raised while applying"},
|
||||
},
|
||||
)
|
||||
async def topology_apply(req: ApplyTopologyRequest) -> dict:
|
||||
store = _store()
|
||||
try:
|
||||
await _topology_ops.apply(req.hydrated, req.version_hash, store)
|
||||
except _topology_ops.HashMismatch as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
except ValidationError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
except AlreadyApplied as exc:
|
||||
raise HTTPException(status_code=409, detail=str(exc)) from exc
|
||||
except Exception as exc:
|
||||
log.exception("agent.topology_apply failed")
|
||||
topology_id = (req.hydrated.get("topology") or {}).get("id")
|
||||
if topology_id:
|
||||
try:
|
||||
store.record_error(
|
||||
str(topology_id), str(exc)[:500], hydrated=req.hydrated,
|
||||
)
|
||||
except Exception: # noqa: BLE001 — don't mask original failure
|
||||
log.exception("failed to record apply error")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
_ensure_collector_started()
|
||||
return {"status": "applied", "version_hash": req.version_hash}
|
||||
|
||||
|
||||
@app.post(
|
||||
"/topology/teardown",
|
||||
responses={500: {"description": "Docker or compose raised while tearing down"}},
|
||||
)
|
||||
async def topology_teardown(req: TeardownTopologyRequest) -> dict:
|
||||
try:
|
||||
await _topology_ops.teardown(req.topology_id, _store())
|
||||
except Exception as exc:
|
||||
log.exception("agent.topology_teardown failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
return {"status": "torn_down", "topology_id": req.topology_id}
|
||||
|
||||
|
||||
@app.get("/topology/state")
|
||||
async def topology_state() -> dict:
|
||||
return _topology_ops.state(_store())
|
||||
|
||||
|
||||
@app.post(
|
||||
"/mutate",
|
||||
status_code=202,
|
||||
responses={
|
||||
202: {"description": "Mutate accepted; runs in background; lifecycle delta pushed via heartbeat"},
|
||||
404: {"description": "No active deployment, or unknown decky_id (dry_run validation only)"},
|
||||
},
|
||||
responses={501: {"description": "Worker-side mutate not yet implemented"}},
|
||||
)
|
||||
async def mutate(req: MutateRequest) -> Any:
|
||||
"""Spawn the mutate in the background and return 202 immediately.
|
||||
|
||||
Master tracks completion via a lifecycle delta pushed on the next
|
||||
heartbeat (immediate push on completion). ``dry_run`` is still
|
||||
synchronous — it validates against the worker's current state and
|
||||
returns the would-be services without spawning a task or touching
|
||||
docker, so the wizard's preview path stays cheap."""
|
||||
if req.dry_run:
|
||||
from decnet.config import load_state
|
||||
state = load_state()
|
||||
if state is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="no active deployment on this worker",
|
||||
)
|
||||
cfg, _ = state
|
||||
decky = next((d for d in cfg.deckies if d.name == req.decky_id), None)
|
||||
if decky is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"decky {req.decky_id!r} not found in worker state",
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=200,
|
||||
content={
|
||||
"status": "dry_run",
|
||||
"decky_id": req.decky_id,
|
||||
"services": list(req.services),
|
||||
},
|
||||
)
|
||||
|
||||
asyncio.create_task(
|
||||
_exec.mutate_async(req.decky_id, list(req.services)),
|
||||
name=f"mutate-{req.decky_id}",
|
||||
async def mutate(req: MutateRequest) -> dict:
|
||||
# TODO: implement worker-side mutate. Currently the master performs
|
||||
# mutation by re-sending a full /deploy with the updated DecnetConfig;
|
||||
# this avoids duplicating mutation logic on the worker for v1. When
|
||||
# ready, replace the 501 with a real redeploy-of-a-single-decky path.
|
||||
raise HTTPException(
|
||||
status_code=501,
|
||||
detail="Per-decky mutate is performed via /deploy with updated services",
|
||||
)
|
||||
return {
|
||||
"status": "accepted",
|
||||
"decky_id": req.decky_id,
|
||||
"services": list(req.services),
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Thin adapter between the agent's HTTP endpoints and the existing
|
||||
``decnet.engine.deployer`` code path.
|
||||
|
||||
@@ -81,99 +80,6 @@ async def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = F
|
||||
await asyncio.to_thread(_deployer.deploy, config, dry_run, no_cache, False)
|
||||
|
||||
|
||||
async def deploy_async(
|
||||
config: DecnetConfig, *, dry_run: bool = False, no_cache: bool = False,
|
||||
) -> None:
|
||||
"""Background-task body for /deploy: run the deploy, then push a
|
||||
lifecycle delta to the master so it observes terminal transitions
|
||||
immediately rather than waiting for the next scheduled heartbeat.
|
||||
|
||||
Per-decky lifecycle deltas — master pivots them onto the matching
|
||||
open DeckyLifecycle rows via the heartbeat handler. Errors are
|
||||
captured and pushed as ``failed`` deltas; the task itself never
|
||||
raises (a crashed task would just leave master rows wedged).
|
||||
"""
|
||||
from datetime import datetime, timezone
|
||||
from decnet.agent.heartbeat import push_lifecycle_delta
|
||||
|
||||
decky_names = [d.name for d in config.deckies]
|
||||
try:
|
||||
await deploy(config, dry_run=dry_run, no_cache=no_cache)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.exception("agent.deploy_async failed")
|
||||
err = f"{type(exc).__name__}: {exc}"
|
||||
deltas = [
|
||||
{
|
||||
"decky_name": name, "operation": "deploy",
|
||||
"status": "failed", "error": err[:2000],
|
||||
"completed_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
for name in decky_names
|
||||
]
|
||||
await push_lifecycle_delta(deltas)
|
||||
return
|
||||
deltas = [
|
||||
{
|
||||
"decky_name": name, "operation": "deploy",
|
||||
"status": "succeeded",
|
||||
"completed_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
for name in decky_names
|
||||
]
|
||||
await push_lifecycle_delta(deltas)
|
||||
|
||||
|
||||
async def mutate_async(decky_id: str, services: list[str]) -> None:
|
||||
"""Background-task body for /mutate. Same shape as deploy_async:
|
||||
perform the work, then push a single lifecycle delta on
|
||||
completion (success or failure)."""
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from decnet.composer import write_compose
|
||||
from decnet.config import load_state, save_state
|
||||
from decnet.engine import _compose_with_retry
|
||||
from decnet.agent.heartbeat import push_lifecycle_delta
|
||||
|
||||
def _delta(status: str, error: str | None = None) -> dict:
|
||||
out = {
|
||||
"decky_name": decky_id, "operation": "mutate",
|
||||
"status": status,
|
||||
"completed_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
if error is not None:
|
||||
out["error"] = error[:2000]
|
||||
return out
|
||||
|
||||
try:
|
||||
state = load_state()
|
||||
if state is None:
|
||||
await push_lifecycle_delta(
|
||||
[_delta("failed", "no active deployment on this worker")],
|
||||
)
|
||||
return
|
||||
cfg, compose_path = state
|
||||
decky = next((d for d in cfg.deckies if d.name == decky_id), None)
|
||||
if decky is None:
|
||||
await push_lifecycle_delta(
|
||||
[_delta("failed", f"decky {decky_id!r} not found in worker state")],
|
||||
)
|
||||
return
|
||||
decky.services = list(services)
|
||||
decky.last_mutated = time.time()
|
||||
save_state(cfg, compose_path)
|
||||
write_compose(cfg, compose_path)
|
||||
await asyncio.to_thread(
|
||||
_compose_with_retry, "up", "-d", "--remove-orphans",
|
||||
compose_file=compose_path,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.exception("agent.mutate_async failed decky=%s", decky_id)
|
||||
err = f"{type(exc).__name__}: {exc}"
|
||||
await push_lifecycle_delta([_delta("failed", err)])
|
||||
return
|
||||
await push_lifecycle_delta([_delta("succeeded")])
|
||||
|
||||
|
||||
async def teardown(decky_id: str | None = None) -> None:
|
||||
log.info("agent.teardown decky_id=%s", decky_id)
|
||||
await asyncio.to_thread(_deployer.teardown, decky_id)
|
||||
@@ -226,7 +132,7 @@ if command -v docker >/dev/null 2>&1; then
|
||||
fi
|
||||
|
||||
# Stop+disable every systemd unit the installer may have dropped.
|
||||
for unit in decnet-agent decnet-engine decnet-collector decnet-forwarder decnet-prober decnet-reconciler decnet-sniffer decnet-updater; do
|
||||
for unit in decnet-agent decnet-engine decnet-collector decnet-forwarder decnet-prober decnet-sniffer decnet-updater; do
|
||||
systemctl stop "$unit" 2>/dev/null
|
||||
systemctl disable "$unit" 2>/dev/null
|
||||
done
|
||||
@@ -288,7 +194,7 @@ async def self_destruct() -> None:
|
||||
argv = ["/bin/bash", path]
|
||||
spawn_kwargs = {"start_new_session": True}
|
||||
|
||||
subprocess.Popen( # type: ignore[call-overload] # nosec B603
|
||||
subprocess.Popen( # nosec B603
|
||||
argv,
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Agent → master liveness heartbeat loop.
|
||||
|
||||
Every ``INTERVAL_S`` seconds the worker posts ``executor.status()`` to
|
||||
@@ -51,38 +50,16 @@ def _resolve_agent_dir() -> pathlib.Path:
|
||||
return pki.DEFAULT_AGENT_DIR
|
||||
|
||||
|
||||
async def _build_body(
|
||||
host_uuid: str,
|
||||
agent_version: str,
|
||||
lifecycle: Optional[list[dict]] = None,
|
||||
) -> dict:
|
||||
snap = await _exec.status()
|
||||
body: dict = {
|
||||
"host_uuid": host_uuid,
|
||||
"agent_version": agent_version,
|
||||
"status": snap,
|
||||
}
|
||||
# Best-effort: fold in applied-topology snapshot. Failures must never
|
||||
# wedge the heartbeat loop — master will fall back to "no topology
|
||||
# reported" which triggers a resync if it expected one.
|
||||
try:
|
||||
from decnet.agent import topology_ops as _topo_ops
|
||||
from decnet.agent.topology_store import TopologyStore
|
||||
store = TopologyStore(_resolve_agent_dir() / "topology.db")
|
||||
try:
|
||||
body["topology"] = _topo_ops.state(store)
|
||||
finally:
|
||||
store.close()
|
||||
except Exception:
|
||||
log.debug("heartbeat: topology state unavailable", exc_info=True)
|
||||
if lifecycle:
|
||||
body["lifecycle"] = lifecycle
|
||||
return body
|
||||
|
||||
|
||||
async def _tick(client: httpx.AsyncClient, url: str, host_uuid: str, agent_version: str) -> None:
|
||||
body = await _build_body(host_uuid, agent_version)
|
||||
resp = await client.post(url, json=body)
|
||||
snap = await _exec.status()
|
||||
resp = await client.post(
|
||||
url,
|
||||
json={
|
||||
"host_uuid": host_uuid,
|
||||
"agent_version": agent_version,
|
||||
"status": snap,
|
||||
},
|
||||
)
|
||||
# 403 / 404 are terminal-ish — we still keep looping because an
|
||||
# operator may re-enrol the host mid-session, but we log loudly so
|
||||
# prod ops can spot cert-pinning drift.
|
||||
@@ -132,7 +109,7 @@ def start() -> Optional[asyncio.Task]:
|
||||
return None
|
||||
|
||||
try:
|
||||
from decnet import __version__ as _v # type: ignore[attr-defined]
|
||||
from decnet import __version__ as _v
|
||||
agent_version = _v
|
||||
except Exception:
|
||||
agent_version = "unknown"
|
||||
@@ -145,59 +122,6 @@ def start() -> Optional[asyncio.Task]:
|
||||
return _task
|
||||
|
||||
|
||||
async def push_lifecycle_delta(deltas: list[dict]) -> None:
|
||||
"""Fire a one-off heartbeat POST carrying *deltas* in the
|
||||
``lifecycle`` field. Each delta: ``{decky_name, operation, status,
|
||||
error?, completed_at?}``.
|
||||
|
||||
Called by the agent executor on /deploy and /mutate completion so
|
||||
the master observes the terminal transition immediately rather than
|
||||
waiting up to ``INTERVAL_S`` for the next scheduled tick. Failures
|
||||
are logged and swallowed; the next scheduled heartbeat carries the
|
||||
same deltas via DB-side reconciliation, since the worker has no
|
||||
durable per-row state to lose.
|
||||
"""
|
||||
from decnet.env import (
|
||||
DECNET_HOST_UUID,
|
||||
DECNET_MASTER_HOST,
|
||||
DECNET_SWARMCTL_PORT,
|
||||
)
|
||||
|
||||
if not deltas:
|
||||
return
|
||||
if not DECNET_HOST_UUID or not DECNET_MASTER_HOST:
|
||||
log.debug("push_lifecycle_delta: identity unconfigured — skipping")
|
||||
return
|
||||
|
||||
agent_dir = _resolve_agent_dir()
|
||||
try:
|
||||
ssl_ctx = build_worker_ssl_context(agent_dir)
|
||||
except Exception:
|
||||
log.exception("push_lifecycle_delta: SSL context unavailable")
|
||||
return
|
||||
|
||||
try:
|
||||
from decnet import __version__ as _v # type: ignore[attr-defined]
|
||||
agent_version = _v
|
||||
except Exception:
|
||||
agent_version = "unknown"
|
||||
|
||||
url = f"https://{DECNET_MASTER_HOST}:{DECNET_SWARMCTL_PORT}/swarm/heartbeat"
|
||||
try:
|
||||
async with httpx.AsyncClient(verify=ssl_ctx, timeout=_TIMEOUT) as client:
|
||||
body = await _build_body(
|
||||
DECNET_HOST_UUID, agent_version, lifecycle=deltas,
|
||||
)
|
||||
resp = await client.post(url, json=body)
|
||||
if resp.status_code not in (200, 204):
|
||||
log.warning(
|
||||
"lifecycle delta push rejected status=%d body=%s",
|
||||
resp.status_code, resp.text[:200],
|
||||
)
|
||||
except Exception:
|
||||
log.exception("push_lifecycle_delta failed — next scheduled tick will retry")
|
||||
|
||||
|
||||
async def stop() -> None:
|
||||
global _task
|
||||
if _task is None:
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Worker-agent uvicorn launcher.
|
||||
|
||||
Starts ``decnet.agent.app:app`` over HTTPS with mTLS enforcement. The
|
||||
|
||||
@@ -1,220 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Agent-side topology apply/teardown/state primitives.
|
||||
|
||||
Wraps the compose + bridge machinery from :mod:`decnet.engine.deployer`
|
||||
so the agent can drive a topology without ever touching the master's
|
||||
sqlmodel repo. The master-side ``deploy_topology`` always calls
|
||||
``transition_status(repo, …)`` which is useless (and unreachable) on
|
||||
an agent — here we operate purely on a hydrated dict + the local
|
||||
:class:`TopologyStore`.
|
||||
|
||||
v1 constraint: one topology per agent. A second apply for a different
|
||||
``topology_id`` triggers an on-the-spot teardown of the predecessor
|
||||
before the new apply proceeds — master is authoritative.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import subprocess # nosec B404
|
||||
from typing import Any
|
||||
|
||||
import docker
|
||||
|
||||
from decnet.agent.topology_store import (
|
||||
TopologyStore,
|
||||
observed,
|
||||
)
|
||||
from decnet.engine.deployer import (
|
||||
_compose,
|
||||
_compose_with_retry,
|
||||
_teardown_order,
|
||||
_topology_compose_path,
|
||||
_topology_compose_project,
|
||||
)
|
||||
from decnet.logging import get_logger
|
||||
from decnet.network import create_bridge_network, remove_bridge_network
|
||||
from decnet.topology.compose import (
|
||||
_network_name as _topology_network_name,
|
||||
write_topology_compose,
|
||||
)
|
||||
from decnet.topology.hashing import canonical_hash
|
||||
from decnet.topology.validate import (
|
||||
ValidationError,
|
||||
errors as _validation_errors,
|
||||
validate as _validate_topology,
|
||||
)
|
||||
|
||||
log = get_logger("agent.topology_ops")
|
||||
|
||||
|
||||
class HashMismatch(RuntimeError):
|
||||
"""Raised when the master-provided version_hash doesn't match what we
|
||||
hash locally — suggests serialisation drift. We fail loudly rather
|
||||
than silently papering over a schema mismatch."""
|
||||
|
||||
|
||||
def _topology_id(hydrated: dict[str, Any]) -> str:
|
||||
topo = hydrated.get("topology") or {}
|
||||
tid = topo.get("id")
|
||||
if not tid:
|
||||
raise ValueError("hydrated topology missing topology.id")
|
||||
return str(tid)
|
||||
|
||||
|
||||
def _check_hash_and_validate(hydrated: dict[str, Any], version_hash: str) -> str:
|
||||
"""Verify hash integrity and structural validity; return topology_id."""
|
||||
local_hash = canonical_hash(hydrated)
|
||||
if local_hash != version_hash:
|
||||
raise HashMismatch(
|
||||
f"master hash {version_hash!r} does not match agent hash "
|
||||
f"{local_hash!r} — refusing to apply"
|
||||
)
|
||||
issues = _validate_topology(hydrated)
|
||||
if _validation_errors(issues):
|
||||
raise ValidationError(issues)
|
||||
return _topology_id(hydrated)
|
||||
|
||||
|
||||
async def _teardown_superseded(topology_id: str, store: TopologyStore) -> None:
|
||||
"""Tear down the current topology if it differs from topology_id.
|
||||
|
||||
Master is authoritative — a different pinned topology (fully applied,
|
||||
partially applied, or drifted) is torn down before the new apply proceeds.
|
||||
Refusing with 409 would leave the agent stuck in a state only a human
|
||||
could resolve.
|
||||
"""
|
||||
existing = store.current()
|
||||
if existing is None or existing.topology_id == topology_id:
|
||||
return
|
||||
log.info(
|
||||
"superseding topology %s with %s on master authority",
|
||||
existing.topology_id, topology_id,
|
||||
)
|
||||
try:
|
||||
await teardown(existing.topology_id, store)
|
||||
except Exception as exc: # noqa: BLE001 — we still want to try applying
|
||||
log.warning(
|
||||
"best-effort teardown of superseded topology %s failed: %s",
|
||||
existing.topology_id, exc,
|
||||
)
|
||||
# Hard-clear the store row so the new apply isn't blocked by a
|
||||
# half-torn-down predecessor. Leftover docker objects surface via
|
||||
# the next heartbeat's observed block.
|
||||
store.clear(existing.topology_id)
|
||||
|
||||
|
||||
def _materialise(hydrated: dict[str, Any], topology_id: str) -> None:
|
||||
"""Create bridge networks, write compose file, and bring up containers.
|
||||
|
||||
Sync/blocking — callers must dispatch via asyncio.to_thread.
|
||||
|
||||
``--always-recreate-deps`` keeps service containers' netns shares
|
||||
fresh: every decky service joins its base's netns via
|
||||
``network_mode: container:<base>``, and that share is bound at
|
||||
service start time. If a base is recreated (e.g. when ``ports:``
|
||||
changes after toggling ``forwards_l3``) but compose decides the
|
||||
services are unchanged, the services keep a stale netns FD
|
||||
pointing at the destroyed base — they end up in an empty
|
||||
namespace with only ``lo``, and external traffic hits a closed
|
||||
port on the live base. Forcing dependents to recreate alongside
|
||||
the base is the cheapest way to make this race impossible.
|
||||
"""
|
||||
compose_path = _topology_compose_path(topology_id)
|
||||
compose_project = _topology_compose_project(topology_id)
|
||||
client = docker.from_env()
|
||||
for lan in hydrated["lans"]:
|
||||
net_name = _topology_network_name(topology_id, lan["name"])
|
||||
create_bridge_network(client, net_name, lan["subnet"], internal=not lan["is_dmz"])
|
||||
write_topology_compose(hydrated, compose_path)
|
||||
_compose_with_retry(
|
||||
"up", "--build", "-d", "--always-recreate-deps",
|
||||
compose_file=compose_path, project=compose_project,
|
||||
)
|
||||
|
||||
|
||||
async def apply(
|
||||
hydrated: dict[str, Any],
|
||||
version_hash: str,
|
||||
store: TopologyStore,
|
||||
) -> None:
|
||||
"""Materialise *hydrated* on this agent and record it in *store*.
|
||||
|
||||
Raises:
|
||||
HashMismatch: master and agent disagree on the canonical hash —
|
||||
don't touch docker, fail the apply.
|
||||
ValidationError: topology fails structural validation.
|
||||
Any docker / compose error propagates up; the endpoint maps it
|
||||
to 500 and records the message on the store row.
|
||||
"""
|
||||
topology_id = _check_hash_and_validate(hydrated, version_hash)
|
||||
await _teardown_superseded(topology_id, store)
|
||||
await asyncio.to_thread(_materialise, hydrated, topology_id)
|
||||
store.put(topology_id, version_hash, hydrated)
|
||||
log.info("topology %s applied on agent (%d LANs)", topology_id, len(hydrated["lans"]))
|
||||
|
||||
|
||||
async def teardown(
|
||||
topology_id: str,
|
||||
store: TopologyStore,
|
||||
) -> None:
|
||||
"""Tear down *topology_id* on this agent. Idempotent: if there's no
|
||||
record and no compose file, it's a no-op that still returns cleanly."""
|
||||
row = store.current()
|
||||
# Prefer the stored hydrated blob — it's what we applied with. If
|
||||
# it's gone (db wiped) but compose-file lingers, we still try to
|
||||
# compose-down and delete bridges by scanning the compose file's
|
||||
# LAN membership list via the hydrated blob if available.
|
||||
hydrated = row.hydrated if row and row.topology_id == topology_id else None
|
||||
compose_path = _topology_compose_path(topology_id)
|
||||
compose_project = _topology_compose_project(topology_id)
|
||||
client = docker.from_env()
|
||||
|
||||
def _dismantle() -> None:
|
||||
if compose_path.exists():
|
||||
try:
|
||||
_compose(
|
||||
"down", "--remove-orphans",
|
||||
compose_file=compose_path, project=compose_project,
|
||||
)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
log.warning(
|
||||
"topology %s compose down failed (continuing): %s",
|
||||
topology_id, exc,
|
||||
)
|
||||
if hydrated is not None:
|
||||
for lan_name in _teardown_order(hydrated["lans"]):
|
||||
net_name = _topology_network_name(topology_id, lan_name)
|
||||
remove_bridge_network(client, net_name)
|
||||
if compose_path.exists():
|
||||
compose_path.unlink()
|
||||
|
||||
await asyncio.to_thread(_dismantle)
|
||||
store.clear(topology_id)
|
||||
log.info("topology %s torn down on agent", topology_id)
|
||||
|
||||
|
||||
def state(store: TopologyStore) -> dict[str, Any]:
|
||||
"""Snapshot-plus-live-observation — the shape the heartbeat embeds."""
|
||||
row = store.current()
|
||||
try:
|
||||
obs = observed(docker.from_env())
|
||||
except Exception as exc: # noqa: BLE001 — docker socket may be gone
|
||||
obs = {"error": str(exc)[:200]}
|
||||
if row is None:
|
||||
return {
|
||||
"topology_id": None,
|
||||
"applied_version_hash": None,
|
||||
"applied_at": None,
|
||||
"last_error": None,
|
||||
"observed": obs,
|
||||
}
|
||||
return {
|
||||
"topology_id": row.topology_id,
|
||||
"applied_version_hash": row.applied_version_hash,
|
||||
"applied_at": row.applied_at,
|
||||
"last_error": row.last_error,
|
||||
"observed": obs,
|
||||
}
|
||||
|
||||
|
||||
__all__ = ["apply", "teardown", "state", "HashMismatch"]
|
||||
@@ -1,215 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Agent-side sqlite cache of the currently-applied topology.
|
||||
|
||||
**This is a cache, not a source of truth.** The master is the only
|
||||
authority for what the agent should be running. This store exists so
|
||||
the agent can answer two questions quickly and offline:
|
||||
|
||||
1. What topology did I last apply, and with what version hash?
|
||||
2. Is what docker is currently doing consistent with that?
|
||||
|
||||
The hash goes out on every heartbeat; the master compares it to what
|
||||
it thinks this host should be running and schedules a re-push on
|
||||
mismatch.
|
||||
|
||||
Why sqlite when the blob is JSON? Consistent with
|
||||
:mod:`decnet.swarm.log_forwarder._OffsetStore` — single-row sqlite is
|
||||
the project-wide pattern for agent-local persistent state. Keeps
|
||||
operational mental model small: "one state.db per thing".
|
||||
|
||||
Design choices worth calling out:
|
||||
|
||||
- **One row, one topology.** v1 only supports a single topology per
|
||||
agent. Attempting to :meth:`put` a different ``topology_id`` while
|
||||
a row already exists raises :class:`AlreadyApplied` — the agent
|
||||
rejects the apply with 409 and the master is expected to teardown
|
||||
the old one first.
|
||||
- **No auto-restore on boot.** The agent does NOT read this db at
|
||||
startup and try to re-apply. Whatever docker has after a restart
|
||||
is what it has; the next heartbeat reports the truth and the
|
||||
master decides whether to re-push. Same reason we don't sync
|
||||
mutations from agent → master anywhere else: split-brain is worse
|
||||
than temporary drift.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import pathlib
|
||||
import sqlite3
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
class AlreadyApplied(RuntimeError):
|
||||
"""Raised when a different topology is already pinned to this agent."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AppliedRow:
|
||||
topology_id: str
|
||||
applied_version_hash: str
|
||||
hydrated: dict[str, Any]
|
||||
applied_at: int
|
||||
last_error: Optional[str]
|
||||
|
||||
|
||||
class TopologyStore:
|
||||
"""Single-row sqlite cache. Stdlib only, sync (called from endpoints)."""
|
||||
|
||||
def __init__(self, db_path: pathlib.Path) -> None:
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# check_same_thread=False: Starlette/FastAPI runs sync endpoint
|
||||
# bodies on a worker thread distinct from where `app` is imported.
|
||||
# The agent is single-process, so there's no real contention —
|
||||
# sqlite's own connection lock is enough.
|
||||
self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
self._conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS applied_topology ("
|
||||
" topology_id TEXT PRIMARY KEY,"
|
||||
" applied_version_hash TEXT NOT NULL,"
|
||||
" hydrated_blob_json TEXT NOT NULL,"
|
||||
" applied_at INTEGER NOT NULL,"
|
||||
" last_error TEXT)"
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
# ----------------------------------------------------------------- reads
|
||||
|
||||
def current(self) -> Optional[AppliedRow]:
|
||||
"""Return the single applied topology, or ``None`` if idle."""
|
||||
row = self._conn.execute(
|
||||
"SELECT topology_id, applied_version_hash, hydrated_blob_json,"
|
||||
" applied_at, last_error FROM applied_topology LIMIT 1"
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
return AppliedRow(
|
||||
topology_id=row["topology_id"],
|
||||
applied_version_hash=row["applied_version_hash"],
|
||||
hydrated=json.loads(row["hydrated_blob_json"]),
|
||||
applied_at=int(row["applied_at"]),
|
||||
last_error=row["last_error"],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------- writes
|
||||
|
||||
def put(
|
||||
self,
|
||||
topology_id: str,
|
||||
applied_version_hash: str,
|
||||
hydrated: dict[str, Any],
|
||||
) -> None:
|
||||
"""Record an applied topology.
|
||||
|
||||
If a *different* topology is already recorded, raises
|
||||
:class:`AlreadyApplied`. Re-applying the same ``topology_id``
|
||||
just updates the hash + blob (idempotent re-push).
|
||||
"""
|
||||
existing = self.current()
|
||||
if existing is not None and existing.topology_id != topology_id:
|
||||
raise AlreadyApplied(
|
||||
f"agent already has topology {existing.topology_id!r}; "
|
||||
f"cannot apply {topology_id!r}"
|
||||
)
|
||||
self._conn.execute(
|
||||
"INSERT INTO applied_topology"
|
||||
" (topology_id, applied_version_hash, hydrated_blob_json,"
|
||||
" applied_at, last_error)"
|
||||
" VALUES (?, ?, ?, ?, NULL)"
|
||||
" ON CONFLICT(topology_id) DO UPDATE SET"
|
||||
" applied_version_hash=excluded.applied_version_hash,"
|
||||
" hydrated_blob_json=excluded.hydrated_blob_json,"
|
||||
" applied_at=excluded.applied_at,"
|
||||
" last_error=NULL",
|
||||
(
|
||||
topology_id,
|
||||
applied_version_hash,
|
||||
json.dumps(hydrated, sort_keys=True),
|
||||
int(time.time()),
|
||||
),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def record_error(
|
||||
self,
|
||||
topology_id: str,
|
||||
message: str,
|
||||
hydrated: Optional[dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Attach a last-error message for *topology_id*.
|
||||
|
||||
Upserts a marker row when no apply has yet succeeded for this
|
||||
topology — that way a failure *during* the first materialise
|
||||
(put() hasn't been reached) still surfaces via GET
|
||||
/topology/state and the next heartbeat. The marker row uses an
|
||||
empty ``applied_version_hash`` so master's heartbeat check sees
|
||||
the hash mismatch and schedules a resync.
|
||||
|
||||
If *hydrated* is provided it is stored so a later teardown can
|
||||
still walk the LAN list — otherwise a partial deploy is strands
|
||||
containers + bridges with no breadcrumb back to them.
|
||||
"""
|
||||
blob = json.dumps(hydrated, sort_keys=True) if hydrated else "{}"
|
||||
self._conn.execute(
|
||||
"INSERT INTO applied_topology"
|
||||
" (topology_id, applied_version_hash, hydrated_blob_json,"
|
||||
" applied_at, last_error)"
|
||||
" VALUES (?, '', ?, 0, ?)"
|
||||
" ON CONFLICT(topology_id) DO UPDATE SET"
|
||||
" last_error=excluded.last_error,"
|
||||
" hydrated_blob_json=CASE"
|
||||
" WHEN applied_topology.hydrated_blob_json='{}'"
|
||||
" THEN excluded.hydrated_blob_json"
|
||||
" ELSE applied_topology.hydrated_blob_json END",
|
||||
(topology_id, blob, message),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def clear(self, topology_id: str) -> None:
|
||||
"""Remove the row for *topology_id* (post-teardown).
|
||||
|
||||
No-op if the row doesn't exist — makes teardown idempotent.
|
||||
"""
|
||||
self._conn.execute(
|
||||
"DELETE FROM applied_topology WHERE topology_id=?",
|
||||
(topology_id,),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def close(self) -> None:
|
||||
self._conn.close()
|
||||
|
||||
|
||||
# --------------------------------------------------- live docker observation
|
||||
|
||||
|
||||
def observed(docker_client: Any) -> dict[str, Any]:
|
||||
"""Snapshot what docker is *actually* running on this agent.
|
||||
|
||||
Returns a compact dict the heartbeat can ship so the master can
|
||||
cross-check ``applied_version_hash`` against reality (a matching
|
||||
hash with missing bridges is still drift). Best-effort: if docker
|
||||
is unreachable we return an ``error`` marker rather than raising —
|
||||
the agent still needs to heartbeat, and the master can treat
|
||||
``error`` as "unknown, re-push".
|
||||
"""
|
||||
try:
|
||||
bridges = [
|
||||
n.name
|
||||
for n in docker_client.networks.list()
|
||||
if n.attrs.get("Driver") == "bridge"
|
||||
and n.name.startswith("decnet-topology-")
|
||||
]
|
||||
containers = [
|
||||
c.name
|
||||
for c in docker_client.containers.list(all=False)
|
||||
if c.name.startswith("decnet-")
|
||||
]
|
||||
return {"bridges": sorted(bridges), "containers": sorted(containers)}
|
||||
except Exception as exc: # noqa: BLE001 — best-effort observation
|
||||
return {"error": str(exc)[:200]}
|
||||
|
||||
|
||||
__all__ = ["TopologyStore", "AppliedRow", "AlreadyApplied", "observed"]
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
Machine archetype profiles for DECNET deckies.
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
"""Artifact storage helpers shared between the web router and TTP workers."""
|
||||
@@ -1,86 +0,0 @@
|
||||
"""
|
||||
Shared on-disk artifact path resolution.
|
||||
|
||||
Honeypot decoys (SSH, SMTP) farm captured payloads into a host-mounted
|
||||
quarantine tree:
|
||||
|
||||
/var/lib/decnet/artifacts/{decky}/{service}/{stored_as}
|
||||
|
||||
Two callers need to translate ``(decky, stored_as, service)`` into a
|
||||
concrete ``Path`` rooted under that tree:
|
||||
|
||||
* The web router endpoint ``GET /api/v1/artifacts/{decky}/{stored_as}``
|
||||
(``decnet.web.router.artifacts.api_get_artifact``) — admin-gated
|
||||
download for the dashboard.
|
||||
* The TTP ``EmailLifter`` (``decnet.ttp.impl.email_lifter``), which
|
||||
reads the stored ``.eml`` at tag-time so body-aware predicates
|
||||
(R0047 BEC, R0048 macro) don't need raw body text on the bus.
|
||||
|
||||
Both callers share the same validation rules and the same
|
||||
defence-in-depth symlink-escape check; this module is the single
|
||||
implementation. It is auth-agnostic — wrappers layer authentication
|
||||
where appropriate (the router does ``require_admin``, the lifter does
|
||||
not).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
# decky names come from the deployer — lowercase alnum plus hyphens.
|
||||
_DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
|
||||
|
||||
# Services that own an artifacts subdir. Kept explicit so a caller
|
||||
# can't pivot into arbitrary subpaths via a query string or bus payload.
|
||||
_ALLOWED_SERVICES = frozenset({"ssh", "smtp"})
|
||||
|
||||
# stored_as is assembled by the capturing template as:
|
||||
# ${ts}_${sha:0:12}_${base}
|
||||
# where ts is ISO-8601 UTC (e.g. 2026-04-18T02:22:56Z), sha is 12 hex chars,
|
||||
# and base is the original filename's basename. Keep the filename charset
|
||||
# tight but allow common punctuation dropped files actually use.
|
||||
_STORED_AS_RE = re.compile(
|
||||
r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z_[a-f0-9]{12}_[A-Za-z0-9._-]{1,255}$"
|
||||
)
|
||||
|
||||
# Module-level so tests can monkeypatch. Override via env in production
|
||||
# (the systemd unit sets this) — the prod path matches the bind mount
|
||||
# declared in decnet/services/{ssh,smtp}.py.
|
||||
ARTIFACTS_ROOT = Path(
|
||||
os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
|
||||
)
|
||||
|
||||
|
||||
class ArtifactPathError(ValueError):
|
||||
"""Raised when (decky, stored_as, service) fails validation or escapes
|
||||
the artifacts root.
|
||||
|
||||
The router catches this and re-raises HTTPException(400). The lifter
|
||||
catches it and treats the event as having no body available (no-tag).
|
||||
"""
|
||||
|
||||
|
||||
def resolve_artifact_path(decky: str, stored_as: str, service: str) -> Path:
|
||||
"""Validate inputs, resolve the on-disk path, and confirm it stays
|
||||
inside the artifacts root.
|
||||
|
||||
Raises :class:`ArtifactPathError` on any violation. Does NOT check
|
||||
that the file exists — callers handle that distinctly (404 for the
|
||||
router, no-tag for the lifter).
|
||||
"""
|
||||
if service not in _ALLOWED_SERVICES:
|
||||
raise ArtifactPathError("invalid service")
|
||||
if not _DECKY_RE.fullmatch(decky):
|
||||
raise ArtifactPathError("invalid decky name")
|
||||
if not _STORED_AS_RE.fullmatch(stored_as):
|
||||
raise ArtifactPathError("invalid stored_as")
|
||||
|
||||
root = ARTIFACTS_ROOT.resolve()
|
||||
candidate = (root / decky / service / stored_as).resolve()
|
||||
# defence-in-depth: even though the regexes reject `..`, make sure a
|
||||
# symlink or weird filesystem state can't escape the root.
|
||||
if root not in candidate.parents and candidate != root:
|
||||
raise ArtifactPathError("path escapes artifacts root")
|
||||
return candidate
|
||||
@@ -1,129 +0,0 @@
|
||||
"""Shared asciinema shard helpers.
|
||||
|
||||
Extracted from ``decnet/web/router/transcripts/api_get_transcript.py``
|
||||
so non-router callers (the BEHAVE-SHELL session-ended handler in
|
||||
``decnet/profiler/worker.py``, the collector's session aggregator)
|
||||
can resolve shard paths without crossing the layer boundary into the
|
||||
FastAPI router.
|
||||
|
||||
Functions here speak in :class:`ValueError` — callers that want HTTP
|
||||
semantics translate at the boundary. The router wrappers keep their
|
||||
existing ``HTTPException`` behaviour for backwards compatibility.
|
||||
|
||||
PII boundary unchanged: shards live on disk; this module returns
|
||||
:class:`pathlib.Path` pointers, never byte content. The ``_get_index``
|
||||
cache stores byte offsets only.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
|
||||
ARTIFACTS_ROOT = Path(
|
||||
os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts"),
|
||||
)
|
||||
|
||||
_DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
|
||||
_SERVICE_RE = re.compile(r"^(ssh|telnet)$")
|
||||
_SHARD_BASENAME_RE = re.compile(r"^sessions-\d{4}-\d{2}-\d{2}\.jsonl$")
|
||||
_SID_LINE_RE = re.compile(rb'"sid"\s*:\s*"([a-f0-9-]{36})"')
|
||||
|
||||
# (path, mtime_ns) → {sid: [(offset, length), ...]}
|
||||
_INDEX_CACHE: "OrderedDict[tuple[str, int], dict[str, list[tuple[int, int]]]]" = (
|
||||
OrderedDict()
|
||||
)
|
||||
_CACHE_MAX = 32
|
||||
|
||||
|
||||
def validate_names(decky: str, service: str) -> None:
|
||||
"""Raise :class:`ValueError` if ``decky`` / ``service`` look forged."""
|
||||
if not _DECKY_RE.fullmatch(decky):
|
||||
raise ValueError(f"invalid decky name: {decky!r}")
|
||||
if not _SERVICE_RE.fullmatch(service):
|
||||
raise ValueError(f"invalid service: {service!r}")
|
||||
|
||||
|
||||
def resolve_shard(decky: str, service: str, shard_name: str) -> Path:
|
||||
"""Resolve ``ARTIFACTS_ROOT/{decky}/{service}/transcripts/{shard_name}``
|
||||
with escape-attempt detection. Raises :class:`ValueError` on
|
||||
invalid inputs.
|
||||
"""
|
||||
validate_names(decky, service)
|
||||
if not _SHARD_BASENAME_RE.fullmatch(shard_name):
|
||||
raise ValueError(f"invalid shard name: {shard_name!r}")
|
||||
root = ARTIFACTS_ROOT.resolve()
|
||||
candidate = (root / decky / service / "transcripts" / shard_name).resolve()
|
||||
if root not in candidate.parents and candidate != root:
|
||||
raise ValueError(f"path escapes artifacts root: {candidate}")
|
||||
return candidate
|
||||
|
||||
|
||||
def _build_index(path: Path) -> dict[str, list[tuple[int, int]]]:
|
||||
index: dict[str, list[tuple[int, int]]] = {}
|
||||
with path.open("rb") as f:
|
||||
offset = 0
|
||||
for line in f:
|
||||
length = len(line)
|
||||
m = _SID_LINE_RE.search(line)
|
||||
if m:
|
||||
sid = m.group(1).decode("ascii")
|
||||
index.setdefault(sid, []).append((offset, length))
|
||||
offset += length
|
||||
return index
|
||||
|
||||
|
||||
def get_index(path: Path) -> tuple[dict[str, list[tuple[int, int]]], int]:
|
||||
"""Return ``(sid → [(offset, length), …], file_size)``.
|
||||
|
||||
Cached by ``(path, mtime_ns)``; rebuilt when the shard changes.
|
||||
"""
|
||||
st = path.stat()
|
||||
key = (str(path), st.st_mtime_ns)
|
||||
if key in _INDEX_CACHE:
|
||||
_INDEX_CACHE.move_to_end(key)
|
||||
return _INDEX_CACHE[key], st.st_size
|
||||
index = _build_index(path)
|
||||
_INDEX_CACHE[key] = index
|
||||
_INDEX_CACHE.move_to_end(key)
|
||||
while len(_INDEX_CACHE) > _CACHE_MAX:
|
||||
_INDEX_CACHE.popitem(last=False)
|
||||
return index, st.st_size
|
||||
|
||||
|
||||
def find_shard_with_sid(decky: str, service: str, sid: str) -> Path | None:
|
||||
"""Scan every ``sessions-YYYY-MM-DD.jsonl`` under the decky's
|
||||
transcripts dir until one claims this ``sid``.
|
||||
|
||||
Newest shards first — most lookups are for recent sessions. Caches
|
||||
the per-shard sid index, so repeated calls are ~free until the
|
||||
shard's mtime changes.
|
||||
|
||||
Returns ``None`` when nothing claims the sid OR when the
|
||||
transcripts dir is missing / unreadable. Never raises on
|
||||
filesystem-level errors — callers treat ``None`` as "skip".
|
||||
"""
|
||||
validate_names(decky, service)
|
||||
root = ARTIFACTS_ROOT.resolve()
|
||||
transcripts_dir = (root / decky / service / "transcripts").resolve()
|
||||
if root not in transcripts_dir.parents:
|
||||
return None
|
||||
try:
|
||||
if not transcripts_dir.is_dir():
|
||||
return None
|
||||
entries = list(transcripts_dir.iterdir())
|
||||
except (OSError, PermissionError):
|
||||
return None
|
||||
shards = sorted(
|
||||
(p for p in entries if _SHARD_BASENAME_RE.fullmatch(p.name)),
|
||||
reverse=True,
|
||||
)
|
||||
for shard in shards:
|
||||
try:
|
||||
index, _size = get_index(shard)
|
||||
except (OSError, PermissionError):
|
||||
continue
|
||||
if sid in index:
|
||||
return shard
|
||||
return None
|
||||
@@ -1,93 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
IP-to-ASN enrichment — maps attacker IPs to BGP-announced AS numbers and
|
||||
org names for attacker intelligence.
|
||||
|
||||
Public surface mirrors :mod:`decnet.geoip` so callers can compose them:
|
||||
|
||||
* :func:`get_lookup` — returns the singleton :class:`AsnLookup`.
|
||||
* :func:`enrich_ip` — takes an IP string, returns
|
||||
``(asn_int, asn_name, bgp_prefix, provider_name)`` or ``(None, None, None, None)``.
|
||||
|
||||
Provider selection goes through :func:`~decnet.asn.factory.get_provider`
|
||||
(env ``DECNET_ASN_PROVIDER``, default ``iptoasn``). Direct imports of
|
||||
concrete providers are forbidden — mirrors the ``get_bus`` /
|
||||
``get_repository`` rule.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from decnet.asn.factory import get_provider
|
||||
from decnet.asn.lookup import AsnLookup
|
||||
from decnet.asn.paths import ASN_ROOT
|
||||
|
||||
# 24 h — iptoasn refreshes daily.
|
||||
REFRESH_INTERVAL_S = 86_400
|
||||
|
||||
_lookup: Optional[AsnLookup] = None
|
||||
_provider_name: Optional[str] = None
|
||||
|
||||
|
||||
def get_lookup(*, force_refresh: bool = False) -> AsnLookup:
|
||||
"""Return the cached :class:`AsnLookup`, building it on first use.
|
||||
|
||||
If the provider's data files are missing or older than
|
||||
``REFRESH_INTERVAL_S`` seconds, refresh before building. Pass
|
||||
``force_refresh=True`` to bypass the age check (used by a future
|
||||
``decnet asn refresh`` CLI command).
|
||||
"""
|
||||
global _lookup, _provider_name
|
||||
provider = get_provider()
|
||||
_provider_name = provider.name
|
||||
|
||||
if force_refresh or _files_stale(provider):
|
||||
provider.refresh()
|
||||
_lookup = None # rebuild on next access
|
||||
|
||||
if _lookup is None:
|
||||
_lookup = provider.build_lookup()
|
||||
return _lookup
|
||||
|
||||
|
||||
def enrich_ip(ip: str) -> Tuple[Optional[int], Optional[str], Optional[str], Optional[str]]:
|
||||
"""Return ``(asn, as_name, bgp_prefix, provider_name)`` or ``(None, None, None, None)``.
|
||||
|
||||
Never raises — any lookup failure collapses to all-None so the
|
||||
caller (profiler) can upsert the attacker row regardless.
|
||||
|
||||
``DECNET_ASN_ENABLED=false`` short-circuits the whole path, useful
|
||||
for tests / agent hosts / ops wanting to disable enrichment without
|
||||
touching provider config.
|
||||
"""
|
||||
if os.environ.get("DECNET_ASN_ENABLED", "true").lower() == "false":
|
||||
return (None, None, None, None)
|
||||
try:
|
||||
lookup = get_lookup()
|
||||
info = lookup.asn(ip)
|
||||
if info is None:
|
||||
return (None, None, None, None)
|
||||
return (info.asn, info.name or None, info.prefix, _provider_name or "unknown")
|
||||
except Exception:
|
||||
return (None, None, None, None)
|
||||
|
||||
|
||||
def _files_stale(provider) -> bool:
|
||||
"""True when the provider has no fresh data on disk.
|
||||
|
||||
Same semantics as :func:`decnet.geoip._files_stale`: a partial
|
||||
cache still produces correct answers for the ranges it covers.
|
||||
"""
|
||||
paths = provider.data_paths()
|
||||
if not paths:
|
||||
return True
|
||||
now = time.time()
|
||||
for p in paths:
|
||||
if p.exists() and now - p.stat().st_mtime <= REFRESH_INTERVAL_S:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
__all__ = ["get_lookup", "enrich_ip", "ASN_ROOT", "REFRESH_INTERVAL_S"]
|
||||
@@ -1,34 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""ASN provider protocol — mirror of :mod:`decnet.geoip.base`.
|
||||
|
||||
Concrete providers (e.g. :mod:`decnet.asn.iptoasn`) implement this.
|
||||
Callers must go through :func:`decnet.asn.factory.get_provider`; never
|
||||
import a concrete provider class directly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from decnet.asn.lookup import AsnLookup
|
||||
|
||||
|
||||
class Provider(ABC):
|
||||
"""Abstract IP→ASN data provider."""
|
||||
|
||||
#: Short tag written to ``Attacker.asn_source`` (e.g. ``'iptoasn'``).
|
||||
name: str
|
||||
|
||||
@abstractmethod
|
||||
def refresh(self) -> None:
|
||||
"""Download / regenerate the provider's raw data files."""
|
||||
|
||||
@abstractmethod
|
||||
def build_lookup(self) -> AsnLookup:
|
||||
"""Parse the on-disk data files and return a ready-to-query lookup."""
|
||||
|
||||
@abstractmethod
|
||||
def data_paths(self) -> Sequence[Path]:
|
||||
"""Return the list of files this provider manages — used for staleness
|
||||
detection. Order is not significant."""
|
||||
@@ -1,40 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""ASN provider factory — mirror of :mod:`decnet.geoip.factory`.
|
||||
|
||||
Dispatch key: ``DECNET_ASN_PROVIDER`` (default ``iptoasn``). Lazy
|
||||
singleton.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from decnet.asn.base import Provider
|
||||
|
||||
_cached: Optional[Provider] = None
|
||||
_cached_key: Optional[str] = None
|
||||
|
||||
|
||||
def get_provider() -> Provider:
|
||||
"""Return the configured :class:`Provider` singleton."""
|
||||
global _cached, _cached_key
|
||||
key = os.environ.get("DECNET_ASN_PROVIDER", "iptoasn").lower()
|
||||
if _cached is not None and _cached_key == key:
|
||||
return _cached
|
||||
|
||||
if key == "iptoasn":
|
||||
from decnet.asn.iptoasn.provider import IptoasnProvider
|
||||
provider: Provider = IptoasnProvider()
|
||||
else:
|
||||
raise ValueError(f"Unsupported ASN provider: {key!r}")
|
||||
|
||||
_cached = provider
|
||||
_cached_key = key
|
||||
return provider
|
||||
|
||||
|
||||
def reset_cache() -> None:
|
||||
"""Forget the singleton — tests swap providers via the env var."""
|
||||
global _cached, _cached_key
|
||||
_cached = None
|
||||
_cached_key = None
|
||||
@@ -1,10 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""iptoasn.com IP→ASN provider.
|
||||
|
||||
Daily-refreshed gzipped TSV dump of the global BGP table, derived from
|
||||
RIPE RIS. Released into the public domain by upstream — no attribution
|
||||
required, no UA mandate, no terms to violate.
|
||||
|
||||
Direct imports of :class:`IptoasnProvider` are discouraged — go through
|
||||
:func:`decnet.asn.factory.get_provider`.
|
||||
"""
|
||||
@@ -1,64 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""iptoasn.com bulk dump download.
|
||||
|
||||
One file: ``ip2asn-v4.tsv.gz``, ~5 MB compressed, refreshed daily.
|
||||
Pulled over HTTPS with the same generic UA the geoip RIR fetcher uses
|
||||
(stealth: never identify as DECNET — public-data scrapers correlated to
|
||||
honeypot operator egress is the threat model).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
|
||||
logger = logging.getLogger("decnet.asn.iptoasn.fetch")
|
||||
|
||||
# Mirror the (name, url) tuple shape of geoip.rir.fetch so test
|
||||
# harnesses can swap one for the other.
|
||||
IPTOASN_SOURCES: Tuple[Tuple[str, str], ...] = (
|
||||
("ip2asn-v4", "https://iptoasn.com/data/ip2asn-v4.tsv.gz"),
|
||||
)
|
||||
|
||||
# Generic UA — matches geoip.rir.fetch. iptoasn.com explicitly releases
|
||||
# the data into the public domain and does NOT require an identifying UA,
|
||||
# so we keep DECNET stealth instead of advertising.
|
||||
_USER_AGENT = "Mozilla/5.0 (compatible; fetch/1.0)"
|
||||
_TIMEOUT_S = 60
|
||||
|
||||
|
||||
def fetch_all(dest: Path) -> list[Path]:
|
||||
"""Download every iptoasn file into *dest*. Returns the written paths.
|
||||
|
||||
Atomic per file: download to ``{name}.tsv.gz.tmp`` then rename. A
|
||||
partial failure leaves the previous generation intact.
|
||||
"""
|
||||
dest.mkdir(parents=True, exist_ok=True)
|
||||
written: list[Path] = []
|
||||
for name, url in IPTOASN_SOURCES:
|
||||
target = dest / f"{name}.tsv.gz"
|
||||
tmp = target.with_suffix(".gz.tmp")
|
||||
try:
|
||||
_download(url, tmp)
|
||||
tmp.replace(target)
|
||||
written.append(target)
|
||||
logger.info(
|
||||
"asn.iptoasn: fetched %s (%d bytes)",
|
||||
name, target.stat().st_size,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"asn.iptoasn: fetch failed for %s (%s): %s", name, url, exc
|
||||
)
|
||||
if tmp.exists():
|
||||
tmp.unlink(missing_ok=True)
|
||||
# Keep any stale previous file — better outdated than empty.
|
||||
return written
|
||||
|
||||
|
||||
def _download(url: str, dest: Path) -> None:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT})
|
||||
with urllib.request.urlopen(req, timeout=_TIMEOUT_S) as resp, dest.open("wb") as fh: # nosec B310 — fixed https iptoasn URL
|
||||
shutil.copyfileobj(resp, fh)
|
||||
@@ -1,79 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Parser for the iptoasn.com ``ip2asn-v4.tsv`` dump.
|
||||
|
||||
Line shape (gzipped, one row per BGP-announced prefix)::
|
||||
|
||||
1.0.0.0\\t1.0.0.255\\t13335\\tUS\\tCLOUDFLARENET
|
||||
|
||||
Fields: ``range_start``, ``range_end``, ``as_number``, ``country_code``,
|
||||
``as_description``. Both range columns are dotted IPv4 strings (the dump
|
||||
is IPv4-only — there's a separate ``ip2asn-v6.tsv.gz`` we don't pull).
|
||||
|
||||
Rows skipped:
|
||||
|
||||
* ``as_number == 0`` — iptoasn's sentinel for "unannounced" / private
|
||||
/ reserved space. Country may still be present (``"None"`` / two-letter
|
||||
CC) but we don't care: the geoip module owns country, ASN owns BGP.
|
||||
* Rows where either range column won't parse as IPv4.
|
||||
* Rows with fewer than 3 tab-separated columns.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import gzip
|
||||
import ipaddress
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
from decnet.asn.lookup import AsnInfo, Range
|
||||
|
||||
logger = logging.getLogger("decnet.asn.iptoasn.parse")
|
||||
|
||||
|
||||
def parse_file(path: Path) -> Iterator[Range]:
|
||||
"""Yield ``(start_int, end_int_inclusive, AsnInfo)`` for every BGP row.
|
||||
|
||||
Accepts a gzipped path (``*.tsv.gz``); plain TSV is also fine for
|
||||
test harnesses that hand-craft small fixtures.
|
||||
"""
|
||||
opener = gzip.open if path.suffix == ".gz" else open
|
||||
with opener(path, "rt", encoding="utf-8", errors="replace") as fh:
|
||||
for lineno, raw in enumerate(fh, 1):
|
||||
line = raw.rstrip("\n")
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split("\t")
|
||||
if len(parts) < 3:
|
||||
continue
|
||||
start_s, end_s, asn_s = parts[0], parts[1], parts[2]
|
||||
# Description is the 5th column; iptoasn quotes nothing,
|
||||
# but the field can contain stray whitespace. ``""`` when
|
||||
# missing or unknown.
|
||||
name = parts[4].strip() if len(parts) >= 5 else ""
|
||||
|
||||
try:
|
||||
asn = int(asn_s)
|
||||
except ValueError:
|
||||
logger.debug(
|
||||
"asn.iptoasn: skipping malformed asn line %d in %s",
|
||||
lineno, path.name,
|
||||
)
|
||||
continue
|
||||
# ASN 0 is iptoasn's sentinel for unannounced / sentinel
|
||||
# space. Skip — there's no useful enrichment to attach.
|
||||
if asn == 0:
|
||||
continue
|
||||
|
||||
try:
|
||||
start_int = int(ipaddress.IPv4Address(start_s))
|
||||
end_int = int(ipaddress.IPv4Address(end_s))
|
||||
except (ValueError, ipaddress.AddressValueError):
|
||||
logger.debug(
|
||||
"asn.iptoasn: skipping malformed addr line %d in %s",
|
||||
lineno, path.name,
|
||||
)
|
||||
continue
|
||||
if end_int < start_int:
|
||||
continue
|
||||
|
||||
yield (start_int, end_int, AsnInfo(asn=asn, name=name))
|
||||
@@ -1,84 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""iptoasn provider — orchestrates fetch + parse into an :class:`AsnLookup`.
|
||||
|
||||
Mirrors :class:`decnet.geoip.rir.provider.RirProvider` exactly: fetch,
|
||||
build a pickled cache, invalidate when raw files are newer than the
|
||||
cache.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from decnet.asn.base import Provider
|
||||
from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all
|
||||
from decnet.asn.iptoasn.parse import parse_file
|
||||
from decnet.asn.lookup import AsnLookup, Range
|
||||
from decnet.asn.paths import ensure_root
|
||||
|
||||
logger = logging.getLogger("decnet.asn.iptoasn.provider")
|
||||
|
||||
# Pickled lookup cache — skips re-parsing the ~580k-row gz dump on every
|
||||
# profiler restart. Rebuilt whenever any raw file is newer than the
|
||||
# cache, see ``_cache_fresh``.
|
||||
_CACHE_NAME = ".iptoasn_index.pkl"
|
||||
|
||||
|
||||
class IptoasnProvider(Provider):
|
||||
name = "iptoasn"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._root = ensure_root()
|
||||
|
||||
# ---------- Provider interface ----------
|
||||
|
||||
def refresh(self) -> None:
|
||||
logger.info("asn.iptoasn: refreshing dump into %s", self._root)
|
||||
fetch_all(self._root)
|
||||
cache = self._root / _CACHE_NAME
|
||||
if cache.exists():
|
||||
cache.unlink(missing_ok=True)
|
||||
|
||||
def build_lookup(self) -> AsnLookup:
|
||||
cache = self._root / _CACHE_NAME
|
||||
if self._cache_fresh(cache):
|
||||
try:
|
||||
lookup = AsnLookup.load(cache)
|
||||
logger.debug(
|
||||
"asn.iptoasn: loaded cached index (%d ranges)",
|
||||
len(lookup),
|
||||
)
|
||||
return lookup
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"asn.iptoasn: cache load failed, rebuilding: %s", exc
|
||||
)
|
||||
|
||||
ranges: list[Range] = []
|
||||
for path in self.data_paths():
|
||||
if not path.exists():
|
||||
continue
|
||||
ranges.extend(parse_file(path))
|
||||
lookup = AsnLookup.from_ranges(ranges)
|
||||
try:
|
||||
lookup.save(cache)
|
||||
except Exception as exc:
|
||||
logger.warning("asn.iptoasn: cache save failed: %s", exc)
|
||||
logger.info("asn.iptoasn: built index with %d ranges", len(lookup))
|
||||
return lookup
|
||||
|
||||
def data_paths(self) -> Sequence[Path]:
|
||||
return [self._root / f"{name}.tsv.gz" for name, _url in IPTOASN_SOURCES]
|
||||
|
||||
# ---------- internals ----------
|
||||
|
||||
def _cache_fresh(self, cache: Path) -> bool:
|
||||
"""True when the pickle exists and is at least as new as every raw file."""
|
||||
if not cache.exists():
|
||||
return False
|
||||
cache_mtime = cache.stat().st_mtime
|
||||
for path in self.data_paths():
|
||||
if path.exists() and path.stat().st_mtime > cache_mtime:
|
||||
return False
|
||||
return True
|
||||
@@ -1,143 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Provider-agnostic IP→ASN lookup.
|
||||
|
||||
A :class:`AsnLookup` is a frozen, sorted array of ``(start_ip,
|
||||
end_ip_inclusive, AsnInfo)`` ranges queried via :mod:`bisect`.
|
||||
O(log n) on ~600k ranges (a current iptoasn dump is ~580k rows).
|
||||
|
||||
Private/loopback/invalid IPv4 and all IPv6 addresses resolve to
|
||||
``None`` — the same policy :mod:`decnet.geoip.lookup` uses.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import bisect
|
||||
import ipaddress
|
||||
import pickle # nosec B403 — self-produced cache under /var/lib/decnet, never deserialized from untrusted input
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Optional, Tuple
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AsnInfo:
|
||||
"""One BGP-announced prefix's origin metadata."""
|
||||
|
||||
asn: int
|
||||
name: str # AS description / org name; "" if absent in the source data
|
||||
prefix: Optional[str] = None # synthesized covering CIDR; set at lookup time, not at rest
|
||||
|
||||
|
||||
Range = Tuple[int, int, AsnInfo]
|
||||
|
||||
|
||||
def _synthesize_prefix(start_int: int, end_int: int, queried_int: int) -> Optional[str]:
|
||||
"""Return the most-specific CIDR from [start, end] that contains queried_int."""
|
||||
try:
|
||||
for net in ipaddress.summarize_address_range(
|
||||
ipaddress.IPv4Address(start_int), ipaddress.IPv4Address(end_int)
|
||||
):
|
||||
if queried_int >= int(net.network_address) and queried_int <= int(net.broadcast_address):
|
||||
return str(net)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class AsnLookup:
|
||||
"""Indexed AS lookup over IPv4 ranges."""
|
||||
|
||||
# Parallel arrays for bisect: _starts[i] is the start-IP of the i-th
|
||||
# range, _ends[i] its inclusive end, _infos[i] its AsnInfo.
|
||||
_starts: List[int]
|
||||
_ends: List[int]
|
||||
_infos: List[AsnInfo]
|
||||
|
||||
@classmethod
|
||||
def from_ranges(cls, ranges: Iterable[Range]) -> "AsnLookup":
|
||||
"""Build a lookup from ``(start, end_inclusive, AsnInfo)`` triples.
|
||||
|
||||
Ranges are sorted by start; on identical starts, last writer
|
||||
wins (matches :class:`decnet.geoip.lookup.Lookup` semantics).
|
||||
Non-overlapping adjacency is preserved.
|
||||
"""
|
||||
sorted_ranges = sorted(ranges, key=lambda r: (r[0], r[1]))
|
||||
starts: List[int] = []
|
||||
ends: List[int] = []
|
||||
infos: List[AsnInfo] = []
|
||||
for start, end, info in sorted_ranges:
|
||||
if starts and starts[-1] == start:
|
||||
ends[-1] = end
|
||||
infos[-1] = info
|
||||
continue
|
||||
starts.append(start)
|
||||
ends.append(end)
|
||||
infos.append(info)
|
||||
return cls(starts, ends, infos)
|
||||
|
||||
def asn(self, ip: str) -> Optional[AsnInfo]:
|
||||
"""Return the :class:`AsnInfo` for ``ip`` or ``None``.
|
||||
|
||||
``None`` on: IPv6, private/loopback/link-local/multicast/reserved
|
||||
addresses, malformed strings, and IPs outside every BGP-announced
|
||||
range in the source dump.
|
||||
"""
|
||||
try:
|
||||
addr = ipaddress.ip_address(ip)
|
||||
except ValueError:
|
||||
return None
|
||||
if isinstance(addr, ipaddress.IPv6Address):
|
||||
return None
|
||||
if (
|
||||
addr.is_private
|
||||
or addr.is_loopback
|
||||
or addr.is_link_local
|
||||
or addr.is_multicast
|
||||
or addr.is_reserved
|
||||
or addr.is_unspecified
|
||||
):
|
||||
return None
|
||||
|
||||
n = int(addr)
|
||||
idx = bisect.bisect_right(self._starts, n) - 1
|
||||
if idx < 0:
|
||||
return None
|
||||
if n <= self._ends[idx]:
|
||||
info = self._infos[idx]
|
||||
prefix = _synthesize_prefix(self._starts[idx], self._ends[idx], n)
|
||||
return AsnInfo(asn=info.asn, name=info.name, prefix=prefix)
|
||||
return None
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._starts)
|
||||
|
||||
# ---------- persistence ----------
|
||||
|
||||
def save(self, path: Path) -> None:
|
||||
"""Pickle the lookup to *path* (atomic rename)."""
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
tmp.parent.mkdir(parents=True, exist_ok=True)
|
||||
with tmp.open("wb") as fh:
|
||||
pickle.dump(
|
||||
{
|
||||
"version": 1,
|
||||
"starts": self._starts,
|
||||
"ends": self._ends,
|
||||
"infos": [(i.asn, i.name) for i in self._infos],
|
||||
},
|
||||
fh,
|
||||
protocol=pickle.HIGHEST_PROTOCOL,
|
||||
)
|
||||
tmp.replace(path)
|
||||
|
||||
@classmethod
|
||||
def load(cls, path: Path) -> "AsnLookup":
|
||||
"""Load a pickled lookup from *path*."""
|
||||
with path.open("rb") as fh:
|
||||
data = pickle.load(fh) # nosec B301 — self-produced file under /var/lib/decnet
|
||||
if data.get("version") != 1:
|
||||
raise ValueError(
|
||||
f"unsupported asn-lookup index version: {data.get('version')!r}"
|
||||
)
|
||||
infos = [AsnInfo(asn=a, name=n) for a, n in data["infos"]]
|
||||
return cls(data["starts"], data["ends"], infos)
|
||||
@@ -1,19 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Filesystem layout for ASN data — mirror of :mod:`decnet.geoip.paths`.
|
||||
|
||||
``ASN_ROOT`` is where providers drop their raw files and cache indexes.
|
||||
Default ``/var/lib/decnet/asn``. Override with ``DECNET_ASN_ROOT`` for
|
||||
test harnesses.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
ASN_ROOT = Path(os.environ.get("DECNET_ASN_ROOT", "/var/lib/decnet/asn"))
|
||||
|
||||
|
||||
def ensure_root() -> Path:
|
||||
"""Create ``ASN_ROOT`` if absent and return it. No-op if present."""
|
||||
ASN_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
return ASN_ROOT
|
||||
@@ -1,19 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""DECNET ServiceBus — pub/sub notification substrate.
|
||||
|
||||
The bus is the notification layer for DECNET's worker constellation. The DB
|
||||
remains the source of truth for anything durable; the bus carries "something
|
||||
happened, go look" events. Delivery is at-most-once, fire-and-forget.
|
||||
|
||||
Consumers call :func:`get_bus` from :mod:`decnet.bus.factory`; never import
|
||||
transport implementations directly. The factory selects the backend via
|
||||
``DECNET_BUS_TYPE`` (``nats`` or ``fake``) and honors ``DECNET_BUS_ENABLED``.
|
||||
|
||||
Topic hierarchy is defined in :mod:`decnet.bus.topics` and locked early so
|
||||
consumers can subscribe with stable wildcard patterns.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.bus.base import BaseBus, Event, Subscription
|
||||
|
||||
__all__ = ["BaseBus", "Event", "Subscription"]
|
||||
@@ -1,93 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Process-wide bus singleton for request-serving workers (API, SSE routes).
|
||||
|
||||
A single connected :class:`~decnet.bus.base.BaseBus` shared across request
|
||||
handlers — opening a UNIX socket per request would be wasteful and add
|
||||
latency to the hot path. The API lifespan is responsible for calling
|
||||
:func:`close_app_bus` on shutdown; connect is lazy so tests and
|
||||
contract-test mode that never hit a publish/subscribe code path don't
|
||||
pay for a bus connection they'll never use.
|
||||
|
||||
Failures during :meth:`BaseBus.connect` are swallowed and logged — a
|
||||
dead bus must never break request serving. Publishers should treat a
|
||||
``None`` return from :func:`get_app_bus` as "skip this notification",
|
||||
same as ``DECNET_BUS_ENABLED=false``.
|
||||
|
||||
Connect is **retried with a short backoff** (not one-shot): a startup
|
||||
race where the API lifespan hits :func:`get_app_bus` before ``decnet
|
||||
bus`` is ready would otherwise poison the singleton for the entire
|
||||
process lifetime. Instead we remember the last failure timestamp and
|
||||
let callers retry once ``_RETRY_BACKOFF`` seconds have passed.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
from decnet.bus.base import BaseBus
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.app")
|
||||
|
||||
# Publishers in the hot path shouldn't pay connect-retry latency on every
|
||||
# call; the dashboard's own 5 s poll interval recovers within one tick
|
||||
# once the bus comes up. A persistently-dead bus only gets a connect
|
||||
# attempt every 2 s, not once per request.
|
||||
_RETRY_BACKOFF: float = 2.0
|
||||
|
||||
_lock = asyncio.Lock()
|
||||
_shared: BaseBus | None = None
|
||||
_last_failure_ts: float = 0.0
|
||||
|
||||
|
||||
async def get_app_bus() -> BaseBus | None:
|
||||
"""Return the process-wide connected bus, or ``None`` if unavailable.
|
||||
|
||||
On first call, constructs a client via :func:`get_bus` and awaits
|
||||
``connect()``. Subsequent calls return the cached instance. If a
|
||||
connect attempt raises, the failure timestamp is recorded and
|
||||
subsequent calls within ``_RETRY_BACKOFF`` seconds return ``None``
|
||||
without re-attempting — after the backoff window, the next call
|
||||
retries. This is what lets the API recover from a
|
||||
``decnet bus``-started-after-API race without a full API restart.
|
||||
"""
|
||||
global _shared, _last_failure_ts
|
||||
if _shared is not None:
|
||||
return _shared
|
||||
if (time.monotonic() - _last_failure_ts) < _RETRY_BACKOFF:
|
||||
return None
|
||||
async with _lock:
|
||||
if _shared is not None:
|
||||
return _shared
|
||||
if (time.monotonic() - _last_failure_ts) < _RETRY_BACKOFF:
|
||||
return None
|
||||
try:
|
||||
candidate = get_bus(client_name="api")
|
||||
await candidate.connect()
|
||||
_shared = candidate
|
||||
_last_failure_ts = 0.0
|
||||
return _shared
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning("app bus unavailable: %s", exc)
|
||||
_last_failure_ts = time.monotonic()
|
||||
return None
|
||||
|
||||
|
||||
async def close_app_bus() -> None:
|
||||
"""Close the shared bus if one is open; clear the backoff window.
|
||||
|
||||
Call from the API lifespan shutdown. Safe to call multiple times.
|
||||
Resetting ``_last_failure_ts`` means the next ``get_app_bus()``
|
||||
after shutdown-and-restart-within-the-same-process (rare, but
|
||||
tests do this) retries immediately instead of honouring a stale
|
||||
backoff.
|
||||
"""
|
||||
global _shared, _last_failure_ts
|
||||
bus, _shared = _shared, None
|
||||
_last_failure_ts = 0.0
|
||||
if bus is not None:
|
||||
try:
|
||||
await bus.close()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning("app bus close raised: %s", exc)
|
||||
@@ -1,206 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Bus abstractions: the :class:`Event` envelope and the :class:`BaseBus` ABC.
|
||||
|
||||
Every transport (NATS, in-process fake, null) speaks this contract. The
|
||||
envelope is versioned (``v``) so future evolution never breaks deployed
|
||||
consumers that happen to see a newer event shape.
|
||||
|
||||
Subscription model: :meth:`BaseBus.subscribe` returns a :class:`Subscription`
|
||||
that is an async context manager AND an async iterator. The expected usage is:
|
||||
|
||||
async with bus.subscribe("topology.*.mutation.*") as sub:
|
||||
async for event in sub:
|
||||
handle(event)
|
||||
|
||||
Leaving the ``async with`` releases the underlying subscription handle; the
|
||||
transport is free to drop any buffered events after that point.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import asyncio
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, AsyncIterator
|
||||
|
||||
EVENT_SCHEMA_VERSION = 1
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Event:
|
||||
"""The bus envelope.
|
||||
|
||||
``v`` is the envelope schema version, bumped on incompatible shape
|
||||
changes. ``type`` is a short discriminator (``"mutation.applied"``,
|
||||
``"decky.state"``) useful for consumers that subscribe to a broad
|
||||
wildcard and dispatch in Python; it is redundant with the trailing
|
||||
segments of ``topic`` but cheaper to inspect. ``ts`` is epoch seconds
|
||||
(float). ``id`` is a random UUID so consumers can de-dupe if they
|
||||
ever see the same event twice (not expected at-most-once, but cheap
|
||||
insurance).
|
||||
"""
|
||||
|
||||
topic: str
|
||||
payload: dict[str, Any]
|
||||
type: str = ""
|
||||
v: int = EVENT_SCHEMA_VERSION
|
||||
ts: float = field(default_factory=time.time)
|
||||
id: str = field(default_factory=lambda: uuid.uuid4().hex)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"v": self.v,
|
||||
"id": self.id,
|
||||
"topic": self.topic,
|
||||
"type": self.type,
|
||||
"ts": self.ts,
|
||||
"payload": self.payload,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, topic: str, data: dict[str, Any]) -> "Event":
|
||||
"""Reconstruct an Event from a wire-format dict.
|
||||
|
||||
``topic`` is passed explicitly because the transport knows which
|
||||
subject the message arrived on; trusting a ``topic`` field from the
|
||||
wire would let a misbehaving publisher spoof events on topics they
|
||||
don't actually publish to.
|
||||
"""
|
||||
return cls(
|
||||
topic=topic,
|
||||
payload=data.get("payload", {}) or {},
|
||||
type=data.get("type", "") or "",
|
||||
v=int(data.get("v", EVENT_SCHEMA_VERSION)),
|
||||
ts=float(data.get("ts", time.time())),
|
||||
id=data.get("id") or uuid.uuid4().hex,
|
||||
)
|
||||
|
||||
|
||||
class Subscription(abc.ABC):
|
||||
"""An open subscription — async context manager + async iterator.
|
||||
|
||||
Concrete transports subclass this and implement :meth:`_aclose` plus the
|
||||
async iterator protocol. Callers should not instantiate directly; use
|
||||
:meth:`BaseBus.subscribe`.
|
||||
"""
|
||||
|
||||
def __init__(self, pattern: str) -> None:
|
||||
self.pattern = pattern
|
||||
self._closed = False
|
||||
|
||||
async def __aenter__(self) -> "Subscription":
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: Any) -> None:
|
||||
await self.aclose()
|
||||
|
||||
def __aiter__(self) -> AsyncIterator[Event]:
|
||||
return self
|
||||
|
||||
async def aclose(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
await self._aclose()
|
||||
|
||||
@abc.abstractmethod
|
||||
async def __anext__(self) -> Event: # pragma: no cover - abstract
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
async def _aclose(self) -> None: # pragma: no cover - abstract
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class BaseBus(abc.ABC):
|
||||
"""Pub/sub transport contract.
|
||||
|
||||
Implementations MUST be safe to ``await connect()`` multiple times and
|
||||
``await close()`` multiple times. Publishing to a closed bus raises
|
||||
:class:`RuntimeError`; subscribing to a closed bus does too.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def connect(self) -> None:
|
||||
"""Establish any network/transport resources. Idempotent."""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def publish(
|
||||
self,
|
||||
topic: str,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
event_type: str = "",
|
||||
) -> None:
|
||||
"""Publish *payload* on *topic*. Fire-and-forget.
|
||||
|
||||
Delivery is at-most-once. On transport error the implementation
|
||||
logs and returns; it does not raise, because bus losses must not
|
||||
cascade into worker failure (DB is source of truth).
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def subscribe(self, pattern: str) -> Subscription:
|
||||
"""Return a :class:`Subscription` that yields events matching *pattern*.
|
||||
|
||||
Patterns follow NATS wildcard semantics: ``*`` matches one topic
|
||||
token, ``>`` matches one-or-more trailing tokens. Examples:
|
||||
|
||||
* ``topology.*.mutation.applied`` — all ``applied`` events for any
|
||||
topology.
|
||||
* ``topology.abc123.mutation.*`` — all mutation states for one
|
||||
topology.
|
||||
* ``topology.>`` — every event under the ``topology`` root.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def close(self) -> None:
|
||||
"""Tear down transport resources. Idempotent."""
|
||||
|
||||
async def __aenter__(self) -> "BaseBus":
|
||||
await self.connect()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: Any) -> None:
|
||||
await self.close()
|
||||
|
||||
|
||||
# ─── Wildcard matching shared across in-process transports ───────────────────
|
||||
|
||||
def matches(pattern: str, topic: str) -> bool:
|
||||
"""Return True iff *topic* matches *pattern* under NATS wildcard rules.
|
||||
|
||||
``*`` matches exactly one non-empty token; ``>`` matches one-or-more
|
||||
trailing tokens (so ``topology.>`` matches ``topology.abc.x`` but not
|
||||
``topology`` alone).
|
||||
"""
|
||||
p_tokens = pattern.split(".")
|
||||
t_tokens = topic.split(".")
|
||||
for i, p in enumerate(p_tokens):
|
||||
if p == ">":
|
||||
# Must have at least one token remaining to match.
|
||||
return i < len(t_tokens)
|
||||
if i >= len(t_tokens):
|
||||
return False
|
||||
if p == "*":
|
||||
if not t_tokens[i]:
|
||||
return False
|
||||
continue
|
||||
if p != t_tokens[i]:
|
||||
return False
|
||||
return len(p_tokens) == len(t_tokens)
|
||||
|
||||
|
||||
# Sentinel used by the in-process transports to signal "no more events"
|
||||
# through the asyncio.Queue fan-out without inventing a separate control
|
||||
# channel. Not part of the wire protocol.
|
||||
_CLOSE_SENTINEL: Any = object()
|
||||
|
||||
|
||||
async def _next_or_stop(queue: "asyncio.Queue[Any]") -> Event:
|
||||
"""Pop the next item from *queue*, raising ``StopAsyncIteration`` on close."""
|
||||
item = await queue.get()
|
||||
if item is _CLOSE_SENTINEL:
|
||||
raise StopAsyncIteration
|
||||
return item
|
||||
@@ -1,86 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Bus factory — selects a :class:`~decnet.bus.base.BaseBus` implementation.
|
||||
|
||||
Dispatch key: the ``DECNET_BUS_TYPE`` environment variable.
|
||||
|
||||
* ``unix`` (default) → :class:`~decnet.bus.unix_client.UnixSocketBus`
|
||||
* ``fake`` → :class:`~decnet.bus.fake.FakeBus` (in-process)
|
||||
|
||||
If ``DECNET_BUS_ENABLED`` is ``"false"`` the factory short-circuits to
|
||||
:class:`~decnet.bus.fake.NullBus` regardless of ``DECNET_BUS_TYPE`` — a
|
||||
cheap way for dev environments to run workers without a bus daemon.
|
||||
|
||||
Mirrors :mod:`decnet.web.db.factory` (lazy imports inside each branch,
|
||||
env-driven dispatch, optional telemetry wrapping). Callers MUST use
|
||||
:func:`get_bus` rather than instantiating transports directly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus.base import BaseBus
|
||||
|
||||
|
||||
def get_bus(**kwargs: Any) -> BaseBus:
|
||||
"""Instantiate the bus implementation selected by environment.
|
||||
|
||||
Keyword arguments are forwarded to the concrete transport:
|
||||
|
||||
* ``UnixSocketBus`` accepts ``socket_path`` (overrides
|
||||
``DECNET_BUS_SOCKET``) and ``client_name``.
|
||||
* ``FakeBus`` accepts ``queue_size``.
|
||||
"""
|
||||
if os.environ.get("DECNET_BUS_ENABLED", "true").lower() == "false":
|
||||
from decnet.bus.fake import NullBus
|
||||
return NullBus()
|
||||
|
||||
bus_type = os.environ.get("DECNET_BUS_TYPE", "unix").lower()
|
||||
|
||||
if bus_type == "unix":
|
||||
from decnet.bus.unix_client import UnixSocketBus
|
||||
socket_path = kwargs.pop("socket_path", None) or _default_socket_path()
|
||||
bus: BaseBus = UnixSocketBus(socket_path=socket_path, **kwargs)
|
||||
elif bus_type == "fake":
|
||||
from decnet.bus.fake import FakeBus
|
||||
bus = FakeBus(**kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unsupported bus type: {bus_type}")
|
||||
|
||||
return _maybe_wrap_telemetry(bus)
|
||||
|
||||
|
||||
def _default_socket_path() -> str:
|
||||
"""Return the bus socket path honoring ``DECNET_BUS_SOCKET`` and falling
|
||||
back to ``/run/decnet/bus.sock`` → ``~/.decnet/bus.sock``.
|
||||
|
||||
The runtime path (``/run/decnet``) is preferred because systemd
|
||||
``RuntimeDirectory=decnet`` sets it up with the right perms; the home
|
||||
fallback keeps dev boxes usable without systemd.
|
||||
"""
|
||||
explicit = os.environ.get("DECNET_BUS_SOCKET")
|
||||
if explicit:
|
||||
return explicit
|
||||
|
||||
runtime_dir = "/run/decnet"
|
||||
if os.path.isdir(runtime_dir) and os.access(runtime_dir, os.W_OK):
|
||||
return f"{runtime_dir}/bus.sock"
|
||||
return os.path.expanduser("~/.decnet/bus.sock")
|
||||
|
||||
|
||||
def _maybe_wrap_telemetry(bus: BaseBus) -> BaseBus:
|
||||
"""Wrap *bus* in a tracing proxy if OTEL is enabled, else return as-is.
|
||||
|
||||
Uses :func:`decnet.telemetry.wrap_repository` as the underlying proxy —
|
||||
its implementation is generic (wraps any async method in a span), so we
|
||||
reuse it with a bus-appropriate tracer name. If telemetry isn't wired
|
||||
up at all we no-op.
|
||||
"""
|
||||
try:
|
||||
from decnet.telemetry import wrap_repository
|
||||
except ImportError:
|
||||
return bus
|
||||
try:
|
||||
return wrap_repository(bus)
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return bus
|
||||
@@ -1,184 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""In-process bus transports.
|
||||
|
||||
* :class:`FakeBus` — real pub/sub semantics without touching a socket. Used
|
||||
by unit tests and anywhere ``DECNET_BUS_TYPE=fake`` is set. Lets code
|
||||
that depends on the bus be exercised entirely inside a single event loop,
|
||||
matching the DECNET testing convention of not opening real network
|
||||
sockets from unit tests.
|
||||
* :class:`NullBus` — no-op. Returned by :func:`~decnet.bus.factory.get_bus`
|
||||
when ``DECNET_BUS_ENABLED=false`` so workers can start cleanly in dev
|
||||
environments where no bus daemon is running. Publishes are dropped;
|
||||
subscriptions yield nothing and close cleanly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus.base import (
|
||||
BaseBus,
|
||||
Event,
|
||||
Subscription,
|
||||
_CLOSE_SENTINEL,
|
||||
matches,
|
||||
)
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.fake")
|
||||
|
||||
# Per-subscriber bounded queue: backpressure policy is drop-oldest so a slow
|
||||
# consumer cannot stall publishers (the invariant — DB is the source of
|
||||
# truth — makes dropped events acceptable).
|
||||
_DEFAULT_QUEUE_SIZE = 1024
|
||||
|
||||
|
||||
# ─── FakeBus ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class _FakeSubscription(Subscription):
|
||||
"""Subscription backed by an :class:`asyncio.Queue` fed from
|
||||
:meth:`FakeBus.publish`. Unregisters itself on close."""
|
||||
|
||||
def __init__(self, bus: "FakeBus", pattern: str, queue: "asyncio.Queue[Any]") -> None:
|
||||
super().__init__(pattern)
|
||||
self._bus = bus
|
||||
self._queue = queue
|
||||
|
||||
async def __anext__(self) -> Event:
|
||||
if self._closed:
|
||||
raise StopAsyncIteration
|
||||
item = await self._queue.get()
|
||||
if item is _CLOSE_SENTINEL:
|
||||
raise StopAsyncIteration
|
||||
return item
|
||||
|
||||
async def _aclose(self) -> None:
|
||||
self._bus._unregister(self)
|
||||
# Unblock any pending __anext__ waiter.
|
||||
try:
|
||||
self._queue.put_nowait(_CLOSE_SENTINEL)
|
||||
except asyncio.QueueFull:
|
||||
pass
|
||||
|
||||
|
||||
class FakeBus(BaseBus):
|
||||
"""In-process pub/sub.
|
||||
|
||||
Publishes iterate every active subscription and enqueue the event on
|
||||
the ones whose pattern matches the topic. If a subscriber's queue is
|
||||
full, the oldest event is discarded to make room — same at-most-once
|
||||
semantics as the real UNIX-socket transport.
|
||||
"""
|
||||
|
||||
def __init__(self, queue_size: int = _DEFAULT_QUEUE_SIZE) -> None:
|
||||
self._queue_size = queue_size
|
||||
self._subs: list[_FakeSubscription] = []
|
||||
self._connected = False
|
||||
self._closed = False
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
async def connect(self) -> None:
|
||||
self._connected = True
|
||||
|
||||
async def publish(
|
||||
self,
|
||||
topic: str,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
event_type: str = "",
|
||||
) -> None:
|
||||
if self._closed:
|
||||
raise RuntimeError("publish on closed bus")
|
||||
event = Event(topic=topic, payload=payload, type=event_type)
|
||||
async with self._lock:
|
||||
targets = [s for s in self._subs if matches(s.pattern, topic)]
|
||||
for sub in targets:
|
||||
_enqueue_drop_oldest(sub._queue, event)
|
||||
|
||||
def subscribe(self, pattern: str) -> Subscription:
|
||||
if self._closed:
|
||||
raise RuntimeError("subscribe on closed bus")
|
||||
queue: asyncio.Queue[Any] = asyncio.Queue(maxsize=self._queue_size)
|
||||
sub = _FakeSubscription(self, pattern, queue)
|
||||
self._subs.append(sub)
|
||||
return sub
|
||||
|
||||
def _unregister(self, sub: _FakeSubscription) -> None:
|
||||
try:
|
||||
self._subs.remove(sub)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
async def close(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
# Wake every still-open subscription so iterators unblock cleanly.
|
||||
for sub in list(self._subs):
|
||||
try:
|
||||
sub._queue.put_nowait(_CLOSE_SENTINEL)
|
||||
except asyncio.QueueFull:
|
||||
pass
|
||||
self._subs.clear()
|
||||
|
||||
|
||||
def _enqueue_drop_oldest(queue: "asyncio.Queue[Any]", event: Event) -> None:
|
||||
"""Put *event* on *queue*, dropping the oldest item if the queue is full.
|
||||
|
||||
Factored out so both FakeBus and the real UNIX server share the exact
|
||||
same backpressure policy.
|
||||
"""
|
||||
while True:
|
||||
try:
|
||||
queue.put_nowait(event)
|
||||
return
|
||||
except asyncio.QueueFull:
|
||||
try:
|
||||
dropped = queue.get_nowait()
|
||||
log.warning(
|
||||
"bus.fake: subscriber queue full, dropped %s", getattr(dropped, "topic", "?")
|
||||
)
|
||||
except asyncio.QueueEmpty:
|
||||
return
|
||||
|
||||
|
||||
# ─── NullBus ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class _NullSubscription(Subscription):
|
||||
"""A subscription that never yields and closes immediately on iteration."""
|
||||
|
||||
async def __anext__(self) -> Event:
|
||||
raise StopAsyncIteration
|
||||
|
||||
async def _aclose(self) -> None:
|
||||
return
|
||||
|
||||
|
||||
class NullBus(BaseBus):
|
||||
"""No-op bus used when ``DECNET_BUS_ENABLED=false``.
|
||||
|
||||
Publishes are silently dropped; subscriptions are empty. Intended for
|
||||
dev environments where no bus daemon is running — the process starts
|
||||
cleanly, code that publishes doesn't need feature flags, and nothing
|
||||
ever blocks on a subscriber.
|
||||
"""
|
||||
|
||||
async def connect(self) -> None:
|
||||
return
|
||||
|
||||
async def publish(
|
||||
self,
|
||||
topic: str,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
event_type: str = "",
|
||||
) -> None:
|
||||
return
|
||||
|
||||
def subscribe(self, pattern: str) -> Subscription:
|
||||
return _NullSubscription(pattern)
|
||||
|
||||
async def close(self) -> None:
|
||||
return
|
||||
@@ -1,145 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Wire protocol for the DECNET bus UNIX-socket transport.
|
||||
|
||||
Frame layout:
|
||||
|
||||
<VERB> [<args ...>]\\n # ASCII header, single line, no trailing space
|
||||
<4-byte big-endian body length>
|
||||
<body> # orjson-serialized dict, or empty (length 0)
|
||||
|
||||
Verbs:
|
||||
|
||||
* ``HELLO <client-name>`` — optional greeting, logged by server. Body empty.
|
||||
* ``PUB <topic>`` — publisher → server. Body = payload dict.
|
||||
* ``SUB <pattern>`` — subscriber → server. Body empty.
|
||||
* ``UNSUB <pattern>`` — subscriber → server. Body empty.
|
||||
* ``EVT <topic>`` — server → subscriber. Body = payload dict (wrapped
|
||||
in an :class:`~decnet.bus.base.Event` envelope).
|
||||
* ``BYE`` — either direction. Body empty. Graceful shutdown.
|
||||
|
||||
Parsing rules:
|
||||
|
||||
* The header is a single line terminated by ``\\n`` (LF). ``\\r`` is tolerated
|
||||
but not required.
|
||||
* Header tokens are whitespace-separated. The first token is the verb;
|
||||
everything after is verb-specific. We split on the first space only so
|
||||
topics / patterns with quoted content are not supported (they are not
|
||||
needed — topic segments forbid whitespace per :mod:`decnet.bus.topics`).
|
||||
* Maximum header length is 4096 bytes; maximum body length is 1 MiB. Beyond
|
||||
those, the connection is dropped with a logged error. This is a honeypot
|
||||
framework, not a general-purpose message broker; a malformed frame is
|
||||
treated as hostile.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import struct
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
import orjson
|
||||
|
||||
MAX_HEADER_BYTES = 4096
|
||||
MAX_BODY_BYTES = 1 * 1024 * 1024 # 1 MiB
|
||||
|
||||
# Verb constants (callers should reference these, not bare strings).
|
||||
HELLO = "HELLO"
|
||||
PUB = "PUB"
|
||||
SUB = "SUB"
|
||||
UNSUB = "UNSUB"
|
||||
EVT = "EVT"
|
||||
BYE = "BYE"
|
||||
|
||||
_VALID_VERBS = frozenset({HELLO, PUB, SUB, UNSUB, EVT, BYE})
|
||||
|
||||
|
||||
class ProtocolError(Exception):
|
||||
"""Malformed or oversized frame. Callers should close the connection."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Frame:
|
||||
"""A parsed frame. ``body`` is the raw (unparsed) body bytes — callers
|
||||
decide whether to orjson-decode it (the protocol does not know whether
|
||||
a given verb expects a dict body or an empty one).
|
||||
"""
|
||||
|
||||
verb: str
|
||||
args: str # everything after the verb on the header line, trimmed
|
||||
body: bytes
|
||||
|
||||
|
||||
def encode(verb: str, args: str = "", body: dict[str, Any] | None = None) -> bytes:
|
||||
"""Serialize a frame.
|
||||
|
||||
*body* is a dict that will be orjson-encoded, or ``None`` for an empty
|
||||
body. The header line is written verbatim — callers must supply args
|
||||
that are free of ``\\n``.
|
||||
"""
|
||||
if verb not in _VALID_VERBS:
|
||||
raise ProtocolError(f"unknown verb {verb!r}")
|
||||
if "\n" in args or "\r" in args:
|
||||
raise ProtocolError("args must not contain newline characters")
|
||||
|
||||
body_bytes = b"" if body is None else orjson.dumps(body)
|
||||
if len(body_bytes) > MAX_BODY_BYTES:
|
||||
raise ProtocolError(
|
||||
f"body {len(body_bytes)} bytes exceeds max {MAX_BODY_BYTES}"
|
||||
)
|
||||
|
||||
header = f"{verb} {args}".rstrip() + "\n"
|
||||
header_bytes = header.encode("ascii")
|
||||
if len(header_bytes) > MAX_HEADER_BYTES:
|
||||
raise ProtocolError(
|
||||
f"header {len(header_bytes)} bytes exceeds max {MAX_HEADER_BYTES}"
|
||||
)
|
||||
return header_bytes + struct.pack(">I", len(body_bytes)) + body_bytes
|
||||
|
||||
|
||||
async def read_frame(reader: asyncio.StreamReader) -> Frame | None:
|
||||
"""Read one frame from *reader*.
|
||||
|
||||
Returns ``None`` on clean EOF before a new frame starts. Raises
|
||||
:class:`ProtocolError` on malformed input (caller should close the
|
||||
connection).
|
||||
"""
|
||||
try:
|
||||
header = await reader.readuntil(b"\n")
|
||||
except asyncio.IncompleteReadError as exc:
|
||||
if not exc.partial:
|
||||
return None
|
||||
raise ProtocolError("connection closed mid-header") from exc
|
||||
except asyncio.LimitOverrunError as exc:
|
||||
raise ProtocolError("header exceeded buffer limit") from exc
|
||||
|
||||
if len(header) > MAX_HEADER_BYTES:
|
||||
raise ProtocolError(f"header {len(header)} bytes exceeds max")
|
||||
|
||||
line = header.rstrip(b"\r\n").decode("ascii", errors="strict")
|
||||
if not line:
|
||||
raise ProtocolError("empty header line")
|
||||
|
||||
verb, _, args = line.partition(" ")
|
||||
if verb not in _VALID_VERBS:
|
||||
raise ProtocolError(f"unknown verb {verb!r}")
|
||||
|
||||
length_bytes = await reader.readexactly(4)
|
||||
(body_len,) = struct.unpack(">I", length_bytes)
|
||||
if body_len > MAX_BODY_BYTES:
|
||||
raise ProtocolError(f"body length {body_len} exceeds max")
|
||||
|
||||
body = await reader.readexactly(body_len) if body_len else b""
|
||||
return Frame(verb=verb, args=args.strip(), body=body)
|
||||
|
||||
|
||||
def decode_body(body: bytes) -> dict[str, Any]:
|
||||
"""Decode a frame body as a JSON dict. Empty body → empty dict."""
|
||||
if not body:
|
||||
return {}
|
||||
try:
|
||||
obj = orjson.loads(body)
|
||||
except orjson.JSONDecodeError as exc:
|
||||
raise ProtocolError(f"body is not valid JSON: {exc}") from exc
|
||||
if not isinstance(obj, dict):
|
||||
raise ProtocolError(f"body must be a JSON object, got {type(obj).__name__}")
|
||||
return obj
|
||||
@@ -1,212 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fire-and-forget publish helpers shared across every worker.
|
||||
|
||||
Lifted out of ``decnet/mutator/engine.py`` once a second caller showed up
|
||||
(DEBT-031). Keeping one implementation means the "never break the worker
|
||||
loop" guarantee is audited in exactly one place.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
from typing import Any, Callable
|
||||
|
||||
from decnet.bus import topics as _topics
|
||||
from decnet.bus.base import BaseBus
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.publish")
|
||||
|
||||
|
||||
async def publish_safely(
|
||||
bus: BaseBus | None,
|
||||
topic: str,
|
||||
payload: dict[str, Any],
|
||||
event_type: str = "",
|
||||
) -> None:
|
||||
"""Publish on *bus* without ever raising back at the caller.
|
||||
|
||||
The DB row (or equivalent side-effect) has already been committed by
|
||||
the time a worker calls this; the bus is the notification layer, not
|
||||
the source of truth. A dropped publish is at most a few seconds of
|
||||
UI latency until the next poll tick. A raised exception here, by
|
||||
contrast, would crash the worker — which is strictly worse.
|
||||
"""
|
||||
if bus is None:
|
||||
return
|
||||
try:
|
||||
await bus.publish(topic, payload, event_type=event_type)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning("bus publish failed topic=%s: %s", topic, exc)
|
||||
|
||||
|
||||
def make_thread_safe_publisher(
|
||||
bus: BaseBus | None,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
) -> Callable[[str, dict[str, Any], str], None]:
|
||||
"""Build a sync callable that marshals publishes back to *loop*.
|
||||
|
||||
Workers that run their hot paths in a worker thread (scapy sniff loop,
|
||||
``asyncio.to_thread`` probes, blocking socket reads) cannot ``await``
|
||||
the bus directly. This helper returns a plain function that schedules
|
||||
the publish on *loop* via ``run_coroutine_threadsafe`` and returns
|
||||
immediately — the calling thread is never blocked on the publish.
|
||||
|
||||
A ``None`` bus yields a no-op callable, matching the degraded-mode
|
||||
contract the rest of this module already upholds.
|
||||
"""
|
||||
if bus is None:
|
||||
return lambda _topic, _payload, _event_type="": None # type: ignore[misc]
|
||||
|
||||
def _publish(topic: str, payload: dict[str, Any], event_type: str = "") -> None:
|
||||
# Stream threads may keep draining after the bus owner closed it
|
||||
# (shutdown race). Short-circuit here so we don't marshal a
|
||||
# coroutine onto a dead loop just to have publish_safely swallow
|
||||
# it. bus.publish's own WARN-once guard handles the rare case
|
||||
# where _closed flips between this check and the coroutine
|
||||
# actually running.
|
||||
if getattr(bus, "_closed", False):
|
||||
return
|
||||
try:
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
publish_safely(bus, topic, payload, event_type=event_type),
|
||||
loop,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.debug("cross-thread bus publish failed topic=%s: %s", topic, exc)
|
||||
|
||||
return _publish
|
||||
|
||||
|
||||
async def run_health_heartbeat(
|
||||
bus: BaseBus | None,
|
||||
worker: str,
|
||||
*,
|
||||
interval: float = 30.0,
|
||||
extra: Callable[[], dict[str, Any]] | None = None,
|
||||
) -> None:
|
||||
"""Publish ``system.<worker>.health`` every *interval* seconds.
|
||||
|
||||
Standard heartbeat loop shared across agent/forwarder/updater. Emits
|
||||
``{"worker": <name>, "ts": <unix-ts>, **extra()}`` on each tick. A
|
||||
``None`` bus turns the loop into a no-op sleep cycle — still cancellable
|
||||
so the caller can use the same ``asyncio.create_task``/``.cancel()``
|
||||
pattern regardless of bus state.
|
||||
|
||||
Cancellation-safe: unwraps the ``CancelledError`` so callers awaiting
|
||||
the task during shutdown see a clean exit.
|
||||
"""
|
||||
topic = _topics.system_health(worker)
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
while True:
|
||||
payload: dict[str, Any] = {"worker": worker, "ts": time.time()}
|
||||
if extra is not None:
|
||||
try:
|
||||
payload.update(extra())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.debug("heartbeat extra() failed worker=%s: %s", worker, exc)
|
||||
await publish_safely(bus, topic, payload, event_type=_topics.SYSTEM_HEALTH)
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
|
||||
async def run_control_listener(
|
||||
bus: BaseBus | None,
|
||||
worker: str,
|
||||
shutdown: asyncio.Event,
|
||||
) -> None:
|
||||
"""Subscribe to ``system.<worker>.control`` and honour stop intents.
|
||||
|
||||
On a well-formed ``{"action": "stop", ...}`` message the function sets
|
||||
*shutdown* and returns — the worker's main loop is expected to check
|
||||
the event and unwind cleanly, matching the SIGTERM path.
|
||||
|
||||
Malformed payloads (missing/unknown action, non-dict, exception from
|
||||
the transport) are logged and ignored. A ``None`` bus yields a noop
|
||||
coroutine that simply awaits *shutdown* — callers can ``create_task``
|
||||
this unconditionally regardless of bus state.
|
||||
|
||||
Cancellation-safe.
|
||||
"""
|
||||
if bus is None:
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
await shutdown.wait()
|
||||
return
|
||||
|
||||
topic = _topics.system_control(worker)
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
try:
|
||||
async with bus.subscribe(topic) as sub:
|
||||
async for event in sub:
|
||||
payload = event.payload or {}
|
||||
action = payload.get("action")
|
||||
requested_by = payload.get("requested_by", "<unknown>")
|
||||
if action == _topics.WORKER_CONTROL_STOP:
|
||||
log.info(
|
||||
"control: stop requested worker=%s by=%s",
|
||||
worker, requested_by,
|
||||
)
|
||||
shutdown.set()
|
||||
return
|
||||
log.debug(
|
||||
"control: ignoring unknown action worker=%s action=%r",
|
||||
worker, action,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning(
|
||||
"control listener failed worker=%s: %s — shutdown via bus disabled",
|
||||
worker, exc,
|
||||
)
|
||||
|
||||
|
||||
async def run_control_listener_signal(
|
||||
bus: BaseBus | None,
|
||||
worker: str,
|
||||
) -> None:
|
||||
"""Like :func:`run_control_listener` but signals the process on stop.
|
||||
|
||||
Preferred for workers whose main loop is a blocking thread
|
||||
(container-log tail, PTY read, scapy sniff) — wiring an
|
||||
``asyncio.Event`` through the thread boundary is error-prone, and
|
||||
every DECNET worker already has systemd-equivalent SIGTERM cleanup.
|
||||
A SIGTERM self-signal routes the stop through that same path
|
||||
without inventing a second shutdown mechanism.
|
||||
|
||||
Cancellation-safe. Never raises: a failed self-signal is logged
|
||||
and the loop simply exits (admin can fall back to ``systemctl``).
|
||||
"""
|
||||
if bus is None:
|
||||
return
|
||||
|
||||
topic = _topics.system_control(worker)
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
try:
|
||||
async with bus.subscribe(topic) as sub:
|
||||
async for event in sub:
|
||||
payload = event.payload or {}
|
||||
action = payload.get("action")
|
||||
requested_by = payload.get("requested_by", "<unknown>")
|
||||
if action == _topics.WORKER_CONTROL_STOP:
|
||||
log.info(
|
||||
"control: stop requested worker=%s by=%s → SIGTERM self",
|
||||
worker, requested_by,
|
||||
)
|
||||
try:
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning(
|
||||
"control: self-signal failed worker=%s: %s",
|
||||
worker, exc,
|
||||
)
|
||||
return
|
||||
log.debug(
|
||||
"control: ignoring unknown action worker=%s action=%r",
|
||||
worker, action,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning(
|
||||
"control signal listener failed worker=%s: %s",
|
||||
worker, exc,
|
||||
)
|
||||
@@ -1,653 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Canonical topic hierarchy for the DECNET ServiceBus.
|
||||
|
||||
Locked early so consumers can subscribe with stable wildcard patterns.
|
||||
Adding new topic families is fine; **renaming** existing ones is a breaking
|
||||
change for every subscriber and requires a coordinated rollout.
|
||||
|
||||
Token structure (NATS-style, dot-separated):
|
||||
|
||||
topology.{topology_id}.mutation.{state}
|
||||
topology.{topology_id}.status
|
||||
decky.{decky_id}.state
|
||||
decky.{decky_id}.traffic
|
||||
orchestrator.traffic.{decky_id}
|
||||
orchestrator.file.{decky_id}
|
||||
orchestrator.email.{decky_id}
|
||||
attacker.observed
|
||||
attacker.scored
|
||||
attacker.session.started
|
||||
attacker.session.ended
|
||||
attacker.observation.{primitive}
|
||||
identity.formed
|
||||
identity.observation.linked
|
||||
identity.merged
|
||||
identity.unmerged
|
||||
identity.campaign.assigned
|
||||
campaign.formed
|
||||
campaign.identity.assigned
|
||||
campaign.merged
|
||||
campaign.unmerged
|
||||
credential.captured
|
||||
credential.reuse.detected
|
||||
attribution.profile.state_changed
|
||||
attribution.profile.multi_actor_suspected
|
||||
canary.{token_id}.triggered
|
||||
canary.{token_id}.placed
|
||||
canary.{token_id}.revoked
|
||||
system.log
|
||||
system.bus.health
|
||||
system.{worker}.health
|
||||
email.received
|
||||
ttp.tagged
|
||||
ttp.rule.fired.{technique_id}
|
||||
ttp.rule.suppressed
|
||||
|
||||
Wildcards (per :func:`decnet.bus.base.matches`):
|
||||
|
||||
* ``*`` matches exactly one token.
|
||||
* ``>`` matches one-or-more trailing tokens (so ``topology.>`` matches
|
||||
``topology.abc.status`` but not the bare root ``topology``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
# ─── Root prefixes ───────────────────────────────────────────────────────────
|
||||
|
||||
TOPOLOGY = "topology"
|
||||
DECKY = "decky"
|
||||
ATTACKER = "attacker"
|
||||
IDENTITY = "identity"
|
||||
CAMPAIGN = "campaign"
|
||||
SYSTEM = "system"
|
||||
CREDENTIAL = "credential"
|
||||
ATTRIBUTION = "attribution"
|
||||
ORCHESTRATOR = "orchestrator"
|
||||
CANARY = "canary"
|
||||
SMTP = "smtp"
|
||||
EMAIL = "email"
|
||||
TTP = "ttp"
|
||||
|
||||
|
||||
# ─── Leaf event-type constants (the last segment of each topic) ──────────────
|
||||
|
||||
# Topology mutation lifecycle states — keep in sync with TopologyMutation.state
|
||||
# in decnet/web/db/models.py; the bus topic mirrors the DB state machine.
|
||||
MUTATION_ENQUEUED = "enqueued"
|
||||
MUTATION_APPLYING = "applying"
|
||||
MUTATION_APPLIED = "applied"
|
||||
MUTATION_FAILED = "failed"
|
||||
|
||||
# Topology-level status transitions (topology.{id}.status): fires when the
|
||||
# topology row's status column changes (pending/deploying/active/degraded/failed).
|
||||
TOPOLOGY_STATUS = "status"
|
||||
|
||||
# Decky-level event types (second token).
|
||||
DECKY_STATE = "state"
|
||||
DECKY_TRAFFIC = "traffic"
|
||||
# On-demand mutation request — published by the API/CLI/UI, consumed by
|
||||
# the mutator's watch loop to force an immediate mutation of one decky
|
||||
# without waiting for its scheduled interval. Underscored (not dotted)
|
||||
# to stay a single NATS token so the builder's validator accepts it.
|
||||
DECKY_MUTATE_REQUEST = "mutate_request"
|
||||
# Mutation transition event — distinct from DECKY_STATE ("current
|
||||
# shape") because a mutation is a *transition* that carries old/new
|
||||
# services + trigger + timing. Correlator consumes these (via the
|
||||
# syslog sidechannel too) to interleave substrate-change markers into
|
||||
# attacker traversals.
|
||||
DECKY_MUTATION = "mutation"
|
||||
# Per-service add/remove on a deployed decky (live; no full redeploy).
|
||||
# Payload carries ``decky_name``, ``service_name``, optional
|
||||
# ``topology_id``, and ``services`` (the post-mutation list). Consumers
|
||||
# that watch substrate shape (correlator, dashboard, profiler) reconcile
|
||||
# off these without waiting for the next decnet-state.json snapshot.
|
||||
DECKY_SERVICE_ADDED = "service_added"
|
||||
DECKY_SERVICE_REMOVED = "service_removed"
|
||||
# Per-service config change (the schema-driven Inspector form). Payload
|
||||
# carries ``decky_name``, ``service_name``, optional ``topology_id``,
|
||||
# ``service_config`` (the new validated dict), and ``recreated`` — true
|
||||
# when the operator hit Apply (container was force-recreated to pick up
|
||||
# the new env), false when they only hit Save (DB-only).
|
||||
DECKY_SERVICE_CONFIG_CHANGED = "service_config_changed"
|
||||
# Async deploy/mutate operation transitions
|
||||
# (pending/running/succeeded/failed). Payload: {lifecycle_id, operation,
|
||||
# status, error?}. UI polling endpoint is the source of truth; this
|
||||
# fires for live subscribers (dashboard, mutator-side audit, etc).
|
||||
DECKY_LIFECYCLE = "lifecycle"
|
||||
|
||||
# Attacker event types (second token under the ``attacker`` root). First
|
||||
# sighting, session boundary transitions, and score-threshold crossings
|
||||
# published by correlator + profiler. Consumers typically subscribe to
|
||||
# the wildcard ``attacker.>``.
|
||||
ATTACKER_OBSERVED = "observed"
|
||||
ATTACKER_SCORED = "scored"
|
||||
# Published once per successful active probe result (JARM/HASSH/TCPfp/ipv6_leak).
|
||||
# Distinct from ``observed`` which is the correlator's first-sight signal —
|
||||
# a fingerprint is additional evidence about an already-observed attacker.
|
||||
# Known payload ``kind`` discriminators carried in this topic:
|
||||
# "jarm" — JARM TLS server hash (prober)
|
||||
# "hassh" — HASSHServer SSH key-exchange hash (prober)
|
||||
# "tcpfp" — TCP/IP stack fingerprint hash (prober)
|
||||
# "tls_cert" — leaf TLS certificate SHA-256 (prober)
|
||||
# "ipv6_leak" — fe80:: link-local address observed via passive sniffer
|
||||
# or active ICMPv6 solicitation (prober + sniffer);
|
||||
# payload: {attacker_ip, addr, iid_kind, mac_oui, vector,
|
||||
# on_iface, observed_at}
|
||||
ATTACKER_FINGERPRINTED = "fingerprinted"
|
||||
# Published when the prober observes a NEW hash for an
|
||||
# (attacker_ip, port, probe_type) triple it has seen before — i.e. the
|
||||
# attacker rotated their VPS, rebuilt their SSH server, swapped their
|
||||
# TLS cert. Distinct from ``fingerprinted`` which fires on every probe
|
||||
# result; ``fingerprint_rotated`` fires only on diff and carries both
|
||||
# old_hash + new_hash. Producer: prober (via the rotation library);
|
||||
# consumers: dashboard, forensics, attribution clustering.
|
||||
ATTACKER_FINGERPRINT_ROTATED = "fingerprint_rotated"
|
||||
ATTACKER_SESSION_STARTED = "session.started"
|
||||
ATTACKER_SESSION_ENDED = "session.ended"
|
||||
# Published by the ``decnet enrich`` worker after an enrichment pass
|
||||
# succeeds for an attacker IP (one or more 3rd-party intel providers
|
||||
# returned a verdict). Payload carries the aggregate verdict + per-
|
||||
# provider summary so SIEM-bound webhooks don't need to re-query the DB.
|
||||
ATTACKER_INTEL_ENRICHED = "intel.enriched"
|
||||
# Per-primitive BEHAVE-SHELL observation. Full topic shape:
|
||||
# attacker.observation.<primitive>
|
||||
# e.g. ``attacker.observation.motor.input_modality``. Producer:
|
||||
# ``decnet/profiler/behave_shell/`` (extractor library called from the
|
||||
# profiler worker on ``attacker.session.ended``); consumers: dashboard
|
||||
# SSE relay, attribution engine state machine, federation gossip
|
||||
# (post-v0). See development/BEHAVE-INTEGRATION.md §"Bus topics" for
|
||||
# the wire-format contract — the prefix is documentation + pattern
|
||||
# match only; bus auth is socket file perms (DEBT-029 §2), not
|
||||
# topic-level. The ``primitive`` segment MAY contain dots
|
||||
# (``motor.shell_mastery.tab_completion``) — the same dotted-leaf
|
||||
# rule that ``attacker.session.ended`` uses.
|
||||
ATTACKER_OBSERVATION_PREFIX = "observation"
|
||||
|
||||
# Identity-resolution event types (second/third tokens under ``identity``).
|
||||
# Published by the (future) clusterer worker — see
|
||||
# development/IDENTITY_RESOLUTION.md. Constants ship in this commit;
|
||||
# no publishers exist yet, but consumers (webhook worker, dashboard
|
||||
# SSE relay) can subscribe to ``identity.>`` from day one and receive
|
||||
# events the instant the clusterer comes online.
|
||||
#
|
||||
# identity.formed — clusterer creates a new identity from
|
||||
# one or more observations
|
||||
# identity.observation.linked — observation attached to an existing
|
||||
# identity (or reattached from another)
|
||||
# identity.merged — two identities collapsed; loser gets
|
||||
# ``merged_into_uuid`` set, subscribers
|
||||
# re-key cached references to the winner
|
||||
# identity.unmerged — revocable-merge undo: contradicting
|
||||
# evidence cleared ``merged_into_uuid``
|
||||
# and re-split observations. The
|
||||
# resurrected side's UUID is the same
|
||||
# as the prior loser, so subscribers
|
||||
# that cached references to the loser
|
||||
# during the merged interval can
|
||||
# re-attach without a new lookup.
|
||||
#
|
||||
# ``identity.campaign.assigned`` is deferred; it ships when the campaign
|
||||
# clusterer ships. YAGNI before then.
|
||||
IDENTITY_FORMED = "formed"
|
||||
IDENTITY_OBSERVATION_LINKED = "observation.linked"
|
||||
IDENTITY_MERGED = "merged"
|
||||
IDENTITY_UNMERGED = "unmerged"
|
||||
# Campaign-clusterer cross-family event — fires under ``identity.>`` so
|
||||
# identity-stream subscribers (e.g. the IdentityDetail SSE client) get
|
||||
# notified the moment an identity's ``campaign_id`` changes without
|
||||
# having to subscribe to the campaign topic family. The same event
|
||||
# fires under ``campaign.identity.assigned`` for campaign-side
|
||||
# subscribers.
|
||||
IDENTITY_CAMPAIGN_ASSIGNED = "campaign.assigned"
|
||||
|
||||
# Campaign-clusterer event types (second/third tokens under
|
||||
# ``campaign``). Mirror of the identity family at the layer above:
|
||||
# campaigns group identities into operations, and the clusterer
|
||||
# publishes the same form / link / merge / unmerge lifecycle.
|
||||
#
|
||||
# campaign.formed — clusterer creates a new campaign from
|
||||
# one or more identities
|
||||
# campaign.identity.assigned — identity attached to an existing
|
||||
# campaign (or reassigned from another)
|
||||
# campaign.merged — two campaigns collapsed; loser gets
|
||||
# ``merged_into_uuid`` set, subscribers
|
||||
# re-key cached references to the winner
|
||||
# campaign.unmerged — revocable-merge undo: contradicting
|
||||
# evidence cleared ``merged_into_uuid``
|
||||
# and re-split identities
|
||||
CAMPAIGN_FORMED = "formed"
|
||||
CAMPAIGN_IDENTITY_ASSIGNED = "identity.assigned"
|
||||
CAMPAIGN_MERGED = "merged"
|
||||
CAMPAIGN_UNMERGED = "unmerged"
|
||||
|
||||
# Credential event types (second/third tokens under ``credential``).
|
||||
# ``credential.captured`` fires once per upserted Credential row — the
|
||||
# correlator listens for it and runs the cred-reuse query in response,
|
||||
# so reuse detection latency is sub-second after a fresh capture.
|
||||
# ``credential.reuse.detected`` fires when the correlator inserts a new
|
||||
# CredentialReuse row or grows an existing one (added decky/service/IP).
|
||||
CREDENTIAL_CAPTURED = "captured"
|
||||
CREDENTIAL_REUSE_DETECTED = "reuse.detected"
|
||||
|
||||
# Attribution-engine event types (second/third tokens under
|
||||
# ``attribution``). Published by the v0 attribution worker
|
||||
# (``decnet.correlation.attribution_worker``) which subscribes to
|
||||
# ``attacker.observation.>`` and runs the per-(identity, primitive)
|
||||
# state machine. See ``development/ATTRIBUTION-ENGINE.md``.
|
||||
#
|
||||
# attribution.profile.state_changed — per-primitive state
|
||||
# transition (e.g.
|
||||
# stable → drifting).
|
||||
# Payload: identity_uuid,
|
||||
# primitive, old_state,
|
||||
# new_state, current_value,
|
||||
# confidence,
|
||||
# observation_count, ts.
|
||||
# attribution.profile.multi_actor_suspected — fires when ≥ 2
|
||||
# primitives flag the same
|
||||
# identity as multi_actor
|
||||
# concurrently. Cross-
|
||||
# primitive correlator;
|
||||
# single-primitive
|
||||
# multi_actor is too noisy
|
||||
# on its own. Payload:
|
||||
# identity_uuid, primitives,
|
||||
# evidence_summary,
|
||||
# confidence, ts.
|
||||
#
|
||||
# These are *derived* signals — distinct from
|
||||
# ``identity.*`` (clusterer lifecycle, IDENTITY_RESOLUTION.md) and
|
||||
# ``attacker.observation.*`` (raw extractor envelopes,
|
||||
# BEHAVE-INTEGRATION.md). The three families compose: observations feed
|
||||
# the attribution engine, the engine emits derived state, the clusterer
|
||||
# reads observations + state to form / merge identities.
|
||||
ATTRIBUTION_PROFILE_PREFIX = "profile"
|
||||
ATTRIBUTION_PROFILE_STATE_CHANGED = "profile.state_changed"
|
||||
ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED = "profile.multi_actor_suspected"
|
||||
|
||||
# Canary-token event types (third token under ``canary``).
|
||||
#
|
||||
# canary.{token_id}.placed — orchestrator/API successfully planted a
|
||||
# canary artifact inside a decky's
|
||||
# filesystem (or persisted a passive token
|
||||
# that has no callback wiring). Lets
|
||||
# dashboards reflect baseline coverage in
|
||||
# real time without a DB poll.
|
||||
# canary.{token_id}.triggered — ``decnet canary`` worker observed a
|
||||
# callback hit (HTTP slug or DNS subdomain
|
||||
# lookup) for the token. Payload carries
|
||||
# ``src_ip``, ``user_agent``, ``request_path``
|
||||
# and any DNS qname so downstream
|
||||
# consumers (correlator, webhook fanout)
|
||||
# can attribute and forward without a
|
||||
# follow-up DB read.
|
||||
# canary.{token_id}.revoked — operator removed a token; planter unlinked
|
||||
# the file (best-effort) and the row was
|
||||
# marked ``revoked``. Subscribers may
|
||||
# evict cached lookups by token id.
|
||||
CANARY_PLACED = "placed"
|
||||
CANARY_TRIGGERED = "triggered"
|
||||
CANARY_REVOKED = "revoked"
|
||||
|
||||
# Orchestrator event types (second token under ``orchestrator``). The
|
||||
# orchestrator worker publishes one of these per synthetic action it
|
||||
# drives against a decky — cheap inter-decky traffic and filesystem
|
||||
# mutations whose role is to keep the honeypot from looking suspiciously
|
||||
# static. Always nested with the destination decky uuid as the third
|
||||
# token, so consumers can subscribe to a single decky's life-injection
|
||||
# stream via ``orchestrator.*.<decky_uuid>``.
|
||||
ORCHESTRATOR_TRAFFIC = "traffic"
|
||||
ORCHESTRATOR_FILE = "file"
|
||||
# Emailgen — published by the ``decnet emailgen`` worker once per generated
|
||||
# fake email delivered into a mail decky's maildir. Third token is the
|
||||
# destination mail-decky uuid (the IMAP/POP3 host serving the mailbox),
|
||||
# matching the ``orchestrator.*.<decky_uuid>`` subscription pattern.
|
||||
ORCHESTRATOR_EMAIL = "email"
|
||||
|
||||
# System event types.
|
||||
SYSTEM_LOG = "log"
|
||||
SYSTEM_BUS_HEALTH = "bus.health"
|
||||
# Worker-health leaf — built per-worker as ``system.<worker>.health`` via
|
||||
# :func:`system_health`. The leaf constant stays the same across workers;
|
||||
# the worker name goes in the middle token.
|
||||
SYSTEM_HEALTH = "health"
|
||||
# Worker-control leaf — built per-worker as ``system.<worker>.control`` via
|
||||
# :func:`system_control`. Admin-originated stop intents travel on this
|
||||
# topic; each worker subscribes to its own.
|
||||
SYSTEM_CONTROL = "control"
|
||||
|
||||
# Control payload ``action`` values — the wire vocabulary. Only ``stop`` is
|
||||
# handled in v1; ``start`` is reserved because a stopped worker has no
|
||||
# subscriber, so starting requires external supervision (systemd).
|
||||
WORKER_CONTROL_STOP = "stop"
|
||||
WORKER_CONTROL_START = "start"
|
||||
|
||||
# Webhook subscription-set changed — published by the CRUD router after any
|
||||
# create / update / delete on WebhookSubscription so the webhook worker can
|
||||
# reload its in-memory subscription list and re-subscribe to the new union
|
||||
# of patterns. Payload is currently empty; consumers only need the signal.
|
||||
WEBHOOK_SUBSCRIPTIONS_CHANGED = "system.webhook.subscriptions_changed"
|
||||
|
||||
# Email-receipt event — fired by smtp / smtp-relay services on full-message
|
||||
# receipt (envelope + headers + body + attachments captured). Single-token
|
||||
# leaf so the bus tokenizer accepts it directly under the ``email`` root.
|
||||
# Consumed by the TTP ``email_lifter`` for header / body-pattern / attachment
|
||||
# rules. PII rule (TTP_TAGGING.md "Hard parts §6"): payload carries hashes,
|
||||
# counts, header names, and rcpt-domain sets — never rcpt addresses or body
|
||||
# bytes.
|
||||
EMAIL_RECEIVED = "received"
|
||||
|
||||
# TTP-tagging event types (second/third tokens under ``ttp``).
|
||||
#
|
||||
# ttp.tagged — one or more new tags written. Published
|
||||
# only when ``INSERT OR IGNORE`` wrote at
|
||||
# least one new row; idempotent
|
||||
# re-evaluations publish nothing
|
||||
# (loop-prevention invariant — see
|
||||
# TTP_TAGGING.md).
|
||||
# ttp.rule.fired.{technique_id} — per-technique fan-out for SIEM
|
||||
# consumers that subscribe to a single
|
||||
# technique. Topic key is the parent
|
||||
# technique; sub_technique is in the
|
||||
# payload. Built via :func:`ttp_rule_fired`.
|
||||
# ttp.rule.suppressed — rule fired but the tag was dropped
|
||||
# (confidence below floor, rate-limited,
|
||||
# or the rule's RuleState was disabled).
|
||||
# Observability signal for the dashboard.
|
||||
#
|
||||
# Per-rule reload + state-change topics. Built via
|
||||
# :func:`ttp_rule_reloaded` / :func:`ttp_rule_state`; SIEM consumers
|
||||
# subscribe to ``ttp.rule.reloaded.>`` (every rule) or
|
||||
# ``ttp.rule.reloaded.R0001`` (one rule) at their preferred granularity.
|
||||
TTP_TAGGED = "tagged"
|
||||
TTP_RULE_FIRED = "rule.fired"
|
||||
TTP_RULE_SUPPRESSED = "rule.suppressed"
|
||||
TTP_RULE_RELOADED = "rule.reloaded"
|
||||
TTP_RULE_STATE = "rule.state"
|
||||
|
||||
|
||||
# ─── Builders ────────────────────────────────────────────────────────────────
|
||||
|
||||
def topology_mutation(topology_id: str, state: str) -> str:
|
||||
"""Build ``topology.<id>.mutation.<state>``.
|
||||
|
||||
*state* should be one of the ``MUTATION_*`` constants.
|
||||
"""
|
||||
_reject_tokens(topology_id, state)
|
||||
return f"{TOPOLOGY}.{topology_id}.mutation.{state}"
|
||||
|
||||
|
||||
def topology_status(topology_id: str) -> str:
|
||||
"""Build ``topology.<id>.status``."""
|
||||
_reject_tokens(topology_id)
|
||||
return f"{TOPOLOGY}.{topology_id}.{TOPOLOGY_STATUS}"
|
||||
|
||||
|
||||
def decky(decky_id: str, event_type: str) -> str:
|
||||
"""Build ``decky.<id>.<event_type>``.
|
||||
|
||||
*event_type* is typically one of ``DECKY_STATE`` or ``DECKY_TRAFFIC``.
|
||||
"""
|
||||
_reject_tokens(decky_id, event_type)
|
||||
return f"{DECKY}.{decky_id}.{event_type}"
|
||||
|
||||
|
||||
def decky_mutation(decky_id: str) -> str:
|
||||
"""Build ``decky.<id>.mutation``."""
|
||||
_reject_tokens(decky_id)
|
||||
return f"{DECKY}.{decky_id}.{DECKY_MUTATION}"
|
||||
|
||||
|
||||
def decky_lifecycle(decky_id: str) -> str:
|
||||
"""Build ``decky.<id>.lifecycle``."""
|
||||
_reject_tokens(decky_id)
|
||||
return f"{DECKY}.{decky_id}.{DECKY_LIFECYCLE}"
|
||||
|
||||
|
||||
def system(event_type: str) -> str:
|
||||
"""Build ``system.<event_type>``.
|
||||
|
||||
*event_type* may itself contain dots (e.g. ``bus.health``) — we don't
|
||||
re-validate the already-constant leaves; this just prefixes.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("system topic requires a non-empty event_type")
|
||||
return f"{SYSTEM}.{event_type}"
|
||||
|
||||
|
||||
def credential(event_type: str) -> str:
|
||||
"""Build ``credential.<event_type>``.
|
||||
|
||||
*event_type* is typically one of :data:`CREDENTIAL_CAPTURED` or
|
||||
:data:`CREDENTIAL_REUSE_DETECTED`. Dotted leaves
|
||||
(``reuse.detected``) are permitted — same rationale as
|
||||
:func:`system`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("credential topic requires a non-empty event_type")
|
||||
return f"{CREDENTIAL}.{event_type}"
|
||||
|
||||
|
||||
def attacker(event_type: str) -> str:
|
||||
"""Build ``attacker.<event_type>``.
|
||||
|
||||
*event_type* is typically one of ``ATTACKER_OBSERVED``,
|
||||
``ATTACKER_SCORED``, ``ATTACKER_SESSION_STARTED``,
|
||||
``ATTACKER_SESSION_ENDED``. Dotted leaves (``session.started``) are
|
||||
permitted — same rationale as :func:`system`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("attacker topic requires a non-empty event_type")
|
||||
return f"{ATTACKER}.{event_type}"
|
||||
|
||||
|
||||
def attacker_observation(primitive: str) -> str:
|
||||
"""Build ``attacker.observation.<primitive>``.
|
||||
|
||||
*primitive* is the fully-qualified BEHAVE-SHELL primitive path
|
||||
(e.g. ``motor.input_modality``,
|
||||
``cognitive.feedback_loop_engagement``,
|
||||
``motor.shell_mastery.tab_completion``). Dotted primitives are
|
||||
permitted — this matches the format
|
||||
``behave_shell.spec.event_adapter.event_topic_for`` produces
|
||||
upstream, and DECNET's bus admits the dotted leaf the same way
|
||||
:func:`attacker` does for ``session.started``.
|
||||
|
||||
Empty string is rejected so a downstream typo doesn't ship as
|
||||
``attacker.observation.``.
|
||||
"""
|
||||
if not primitive:
|
||||
raise ValueError(
|
||||
"attacker_observation topic requires a non-empty primitive",
|
||||
)
|
||||
return f"{ATTACKER}.{ATTACKER_OBSERVATION_PREFIX}.{primitive}"
|
||||
|
||||
|
||||
def attribution(event_type: str) -> str:
|
||||
"""Build ``attribution.<event_type>``.
|
||||
|
||||
*event_type* is typically one of
|
||||
:data:`ATTRIBUTION_PROFILE_STATE_CHANGED` or
|
||||
:data:`ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED` — both contain a
|
||||
dot (``profile.state_changed``) which is permitted under the same
|
||||
"trailing dotted leaf" rule that ``attacker.session.started`` uses.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("attribution topic requires a non-empty event_type")
|
||||
return f"{ATTRIBUTION}.{event_type}"
|
||||
|
||||
|
||||
def campaign(event_type: str) -> str:
|
||||
"""Build ``campaign.<event_type>``.
|
||||
|
||||
*event_type* is typically one of :data:`CAMPAIGN_FORMED`,
|
||||
:data:`CAMPAIGN_IDENTITY_ASSIGNED`, :data:`CAMPAIGN_MERGED`, or
|
||||
:data:`CAMPAIGN_UNMERGED`. Dotted leaves (``identity.assigned``)
|
||||
are permitted — same rationale as :func:`system`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("campaign topic requires a non-empty event_type")
|
||||
return f"{CAMPAIGN}.{event_type}"
|
||||
|
||||
|
||||
def identity(event_type: str) -> str:
|
||||
"""Build ``identity.<event_type>``.
|
||||
|
||||
*event_type* is typically one of :data:`IDENTITY_FORMED`,
|
||||
:data:`IDENTITY_OBSERVATION_LINKED`, :data:`IDENTITY_MERGED`, or
|
||||
:data:`IDENTITY_UNMERGED`. Dotted leaves (``observation.linked``)
|
||||
are permitted — same rationale as :func:`system`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("identity topic requires a non-empty event_type")
|
||||
return f"{IDENTITY}.{event_type}"
|
||||
|
||||
|
||||
def orchestrator(event_type: str, decky_id: str) -> str:
|
||||
"""Build ``orchestrator.<event_type>.<decky_id>``.
|
||||
|
||||
*event_type* should be one of :data:`ORCHESTRATOR_TRAFFIC` or
|
||||
:data:`ORCHESTRATOR_FILE`. The destination decky is always the
|
||||
third token so per-decky subscribers can use
|
||||
``orchestrator.*.<decky_uuid>``.
|
||||
"""
|
||||
_reject_tokens(event_type, decky_id)
|
||||
return f"{ORCHESTRATOR}.{event_type}.{decky_id}"
|
||||
|
||||
|
||||
def canary(token_id: str, event_type: str) -> str:
|
||||
"""Build ``canary.<token_id>.<event_type>``.
|
||||
|
||||
*event_type* should be one of :data:`CANARY_PLACED`,
|
||||
:data:`CANARY_TRIGGERED`, or :data:`CANARY_REVOKED`. The token id
|
||||
is always the second token so per-token subscribers can use
|
||||
``canary.<token_id>.>`` and fleet-wide consumers (webhook fanout,
|
||||
correlator) use ``canary.>``.
|
||||
"""
|
||||
_reject_tokens(token_id, event_type)
|
||||
return f"{CANARY}.{token_id}.{event_type}"
|
||||
|
||||
|
||||
def system_health(worker: str) -> str:
|
||||
"""Build ``system.<worker>.health``.
|
||||
|
||||
Worker-health heartbeats live as a nested leaf under ``system`` so
|
||||
consumers can subscribe to ``system.*.health`` for every worker at
|
||||
once, or to ``system.mutator.health`` for a single one. *worker* is
|
||||
validated as a regular segment — no dots, wildcards, or whitespace.
|
||||
"""
|
||||
_reject_tokens(worker)
|
||||
return f"{SYSTEM}.{worker}.{SYSTEM_HEALTH}"
|
||||
|
||||
|
||||
def system_control(worker: str) -> str:
|
||||
"""Build ``system.<worker>.control``.
|
||||
|
||||
Admin-originated stop (and, eventually, start) intents are published
|
||||
here; the worker in question subscribes to its own address and reacts.
|
||||
Payload shape::
|
||||
|
||||
{"action": "stop", "requested_by": "<username>", "ts": <unix>}
|
||||
|
||||
*action* must be one of :data:`WORKER_CONTROL_STOP` /
|
||||
:data:`WORKER_CONTROL_START`; any other value is ignored by the
|
||||
listener. Same segment rules as :func:`system_health`.
|
||||
"""
|
||||
_reject_tokens(worker)
|
||||
return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}"
|
||||
|
||||
|
||||
def smtp(event_type: str) -> str:
|
||||
"""Build ``smtp.<event_type>``.
|
||||
|
||||
*event_type* may contain dots (e.g. ``probe.pending``).
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("smtp topic requires a non-empty event_type")
|
||||
return f"{SMTP}.{event_type}"
|
||||
|
||||
|
||||
def email_topic(event_type: str) -> str:
|
||||
"""Build ``email.<event_type>``.
|
||||
|
||||
Named ``email_topic`` rather than ``email`` to avoid shadowing the
|
||||
Python ``email`` stdlib package at import sites that pull both.
|
||||
*event_type* is typically :data:`EMAIL_RECEIVED`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("email topic requires a non-empty event_type")
|
||||
return f"{EMAIL}.{event_type}"
|
||||
|
||||
|
||||
def ttp(event_type: str) -> str:
|
||||
"""Build ``ttp.<event_type>``.
|
||||
|
||||
*event_type* is typically one of :data:`TTP_TAGGED`,
|
||||
:data:`TTP_RULE_FIRED`, or :data:`TTP_RULE_SUPPRESSED`. Dotted
|
||||
leaves (``rule.fired``) are permitted — same rationale as
|
||||
:func:`system`. For per-technique fan-out use
|
||||
:func:`ttp_rule_fired`.
|
||||
"""
|
||||
if not event_type:
|
||||
raise ValueError("ttp topic requires a non-empty event_type")
|
||||
return f"{TTP}.{event_type}"
|
||||
|
||||
|
||||
def ttp_rule_fired(technique_id: str) -> str:
|
||||
"""Build ``ttp.rule.fired.<technique_id>``.
|
||||
|
||||
Per-technique fan-out: SIEM subscribers can listen on
|
||||
``ttp.rule.fired.>`` for everything, ``ttp.rule.fired.T1110`` for
|
||||
one technique. *technique_id* is validated as a single segment —
|
||||
sub-techniques like ``T1110.001`` are rejected because they would
|
||||
split into two tokens. The topic key is the parent technique;
|
||||
``sub_technique_id`` lives in the payload.
|
||||
"""
|
||||
_reject_tokens(technique_id)
|
||||
return f"{TTP}.rule.fired.{technique_id}"
|
||||
|
||||
|
||||
def ttp_rule_reloaded(rule_id: str) -> str:
|
||||
"""Build ``ttp.rule.reloaded.<rule_id>``.
|
||||
|
||||
Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
|
||||
when a rule's *definition* changes (YAML edit on the filesystem
|
||||
backend, ``ttp_rule`` row update on the database backend). One event
|
||||
per per-rule edit — never batched (the "incremental, never batched"
|
||||
property in TTP_TAGGING.md §"Bus topics" inherits its granularity
|
||||
from :meth:`RuleStore.subscribe_changes`).
|
||||
|
||||
Subscribers: ``ttp.rule.reloaded.>`` for every rule,
|
||||
``ttp.rule.reloaded.R0001`` for one. *rule_id* is validated as a
|
||||
single segment.
|
||||
"""
|
||||
_reject_tokens(rule_id)
|
||||
return f"{TTP}.{TTP_RULE_RELOADED}.{rule_id}"
|
||||
|
||||
|
||||
def ttp_rule_state(rule_id: str) -> str:
|
||||
"""Build ``ttp.rule.state.<rule_id>``.
|
||||
|
||||
Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
|
||||
when a rule's *operational state* changes (operator hits the disable
|
||||
button, an ``expires_at`` TTL fires and auto-reverts the state).
|
||||
*rule_id* is validated as a single segment.
|
||||
"""
|
||||
_reject_tokens(rule_id)
|
||||
return f"{TTP}.{TTP_RULE_STATE}.{rule_id}"
|
||||
|
||||
|
||||
def _reject_tokens(*parts: str) -> None:
|
||||
"""Reject topic segments that would break NATS-style tokenization.
|
||||
|
||||
Dots, wildcards, whitespace, and empty strings in a *segment* would
|
||||
silently corrupt the hierarchy (e.g. ``topology.a.b.status`` for a
|
||||
``topology_id`` of ``"a.b"``). Raise early at the builder instead of
|
||||
shipping a malformed topic to the wire.
|
||||
"""
|
||||
for p in parts:
|
||||
if not p:
|
||||
raise ValueError("topic segment must not be empty")
|
||||
if "." in p or "*" in p or ">" in p or any(c.isspace() for c in p):
|
||||
raise ValueError(
|
||||
f"topic segment {p!r} may not contain '.', '*', '>', or whitespace"
|
||||
)
|
||||
@@ -1,258 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""UNIX-socket client — :class:`UnixSocketBus` implementation of :class:`BaseBus`.
|
||||
|
||||
Holds one open socket to the local :class:`~decnet.bus.unix_server.BusServer`.
|
||||
Operations:
|
||||
|
||||
* :meth:`publish` writes a single ``PUB`` frame and returns; no ack.
|
||||
* :meth:`subscribe` writes a ``SUB`` frame and returns a
|
||||
:class:`~decnet.bus.base.Subscription` backed by an :class:`asyncio.Queue`
|
||||
that the background reader task feeds.
|
||||
|
||||
One background reader task per bus instance dispatches incoming ``EVT``
|
||||
frames to every registered subscription whose pattern matches the topic.
|
||||
On connection drop or close, every subscription is woken via a sentinel so
|
||||
iterators unblock cleanly; callers see :class:`StopAsyncIteration` from the
|
||||
``async for`` loop.
|
||||
|
||||
No auto-reconnect in MVP. If the server restarts, callers must
|
||||
:meth:`close` the bus and construct a new one. This mirrors how other
|
||||
DECNET workers handle their dependencies — the systemd ``Restart=on-failure``
|
||||
supervision above us is the retry loop.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus import protocol
|
||||
from decnet.bus.base import (
|
||||
BaseBus,
|
||||
Event,
|
||||
Subscription,
|
||||
_CLOSE_SENTINEL,
|
||||
matches,
|
||||
)
|
||||
from decnet.bus.fake import _enqueue_drop_oldest as _enqueue_event_drop_oldest
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.client")
|
||||
|
||||
_INBOUND_QUEUE_SIZE = 1024
|
||||
|
||||
|
||||
class _UnixSubscription(Subscription):
|
||||
def __init__(
|
||||
self,
|
||||
bus: "UnixSocketBus",
|
||||
pattern: str,
|
||||
queue: "asyncio.Queue[Any]",
|
||||
) -> None:
|
||||
super().__init__(pattern)
|
||||
self._bus = bus
|
||||
self._queue = queue
|
||||
|
||||
async def __anext__(self) -> Event:
|
||||
if self._closed:
|
||||
raise StopAsyncIteration
|
||||
item = await self._queue.get()
|
||||
if item is _CLOSE_SENTINEL:
|
||||
raise StopAsyncIteration
|
||||
return item
|
||||
|
||||
async def _aclose(self) -> None:
|
||||
await self._bus._unregister(self)
|
||||
try:
|
||||
self._queue.put_nowait(_CLOSE_SENTINEL)
|
||||
except asyncio.QueueFull:
|
||||
pass
|
||||
|
||||
|
||||
class UnixSocketBus(BaseBus):
|
||||
"""Client handle for a local :class:`BusServer`.
|
||||
|
||||
One instance per process typically; multiple instances simply open
|
||||
multiple sockets to the same server. Connection is lazy — the first
|
||||
:meth:`connect` (or any publish/subscribe call via ``async with``)
|
||||
opens the socket.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
socket_path: pathlib.Path | str,
|
||||
*,
|
||||
client_name: str | None = None,
|
||||
) -> None:
|
||||
self._path = pathlib.Path(socket_path)
|
||||
self._client_name = client_name or f"decnet-bus-client[{os.getpid()}]"
|
||||
self._reader: asyncio.StreamReader | None = None
|
||||
self._writer: asyncio.StreamWriter | None = None
|
||||
self._reader_task: asyncio.Task[None] | None = None
|
||||
self._subs: list[_UnixSubscription] = []
|
||||
self._lock = asyncio.Lock()
|
||||
self._write_lock = asyncio.Lock()
|
||||
self._closed = False
|
||||
# Sticky flag: the first publish-on-closed-bus call logs at
|
||||
# WARNING so operators see that a publish was dropped; subsequent
|
||||
# calls on the same instance log at DEBUG only to prevent a
|
||||
# log flood when stream threads drain after close. The bus is
|
||||
# critical infra, so the first warning is non-negotiable.
|
||||
self._closed_publish_warned = False
|
||||
|
||||
# ─── Lifecycle ──────────────────────────────────────────────────────────
|
||||
|
||||
async def connect(self) -> None:
|
||||
if self._writer is not None:
|
||||
return
|
||||
if self._closed:
|
||||
raise RuntimeError("connect on closed bus")
|
||||
self._reader, self._writer = await asyncio.open_unix_connection(str(self._path))
|
||||
await self._send(protocol.encode(protocol.HELLO, args=self._client_name))
|
||||
self._reader_task = asyncio.create_task(self._reader_loop())
|
||||
log.debug("bus.client: connected to %s as %s", self._path, self._client_name)
|
||||
|
||||
async def close(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
|
||||
# Best-effort BYE — we don't care if it fails.
|
||||
if self._writer is not None and not self._writer.is_closing():
|
||||
with contextlib.suppress(Exception):
|
||||
await self._send(protocol.encode(protocol.BYE))
|
||||
|
||||
if self._reader_task is not None:
|
||||
self._reader_task.cancel()
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
await self._reader_task
|
||||
self._reader_task = None
|
||||
|
||||
if self._writer is not None:
|
||||
with contextlib.suppress(Exception):
|
||||
self._writer.close()
|
||||
await self._writer.wait_closed()
|
||||
self._writer = None
|
||||
self._reader = None
|
||||
|
||||
# Wake every subscription so `async for` exits.
|
||||
for sub in list(self._subs):
|
||||
with contextlib.suppress(asyncio.QueueFull):
|
||||
sub._queue.put_nowait(_CLOSE_SENTINEL)
|
||||
self._subs.clear()
|
||||
|
||||
# ─── Pub/Sub ────────────────────────────────────────────────────────────
|
||||
|
||||
async def publish(
|
||||
self,
|
||||
topic: str,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
event_type: str = "",
|
||||
) -> None:
|
||||
if self._closed:
|
||||
# Degrade gracefully: the DB is the source of truth, the bus
|
||||
# is only the notification layer. Raising here made every
|
||||
# caller via publish_safely flood the logs once per stream
|
||||
# line during shutdown races. First drop warns loudly;
|
||||
# subsequent drops on the same instance are DEBUG-only.
|
||||
if not self._closed_publish_warned:
|
||||
self._closed_publish_warned = True
|
||||
log.warning(
|
||||
"bus.client: publish on closed bus dropped topic=%s "
|
||||
"(further drops on this instance logged at DEBUG)",
|
||||
topic,
|
||||
)
|
||||
else:
|
||||
log.debug("bus.client: publish on closed bus dropped topic=%s", topic)
|
||||
return
|
||||
if self._writer is None:
|
||||
await self.connect()
|
||||
body = Event(topic=topic, payload=payload, type=event_type).to_dict()
|
||||
try:
|
||||
await self._send(protocol.encode(protocol.PUB, args=topic, body=body))
|
||||
except (ConnectionError, BrokenPipeError) as exc:
|
||||
# Bus loss is a logged warning, never a publisher crash. The
|
||||
# DB-as-source-of-truth invariant means the work is already
|
||||
# persisted; the missing event is just a missed notification.
|
||||
log.warning("bus.client: publish failed: %s", exc)
|
||||
|
||||
def subscribe(self, pattern: str) -> Subscription:
|
||||
if self._closed:
|
||||
raise RuntimeError("subscribe on closed bus")
|
||||
queue: asyncio.Queue[Any] = asyncio.Queue(maxsize=_INBOUND_QUEUE_SIZE)
|
||||
sub = _UnixSubscription(self, pattern, queue)
|
||||
self._subs.append(sub)
|
||||
# Schedule the SUB frame asynchronously so subscribe() stays sync,
|
||||
# matching the BaseBus signature. The caller will shortly `async
|
||||
# with` / `async for` the subscription, which will run the event
|
||||
# loop and pick this task up.
|
||||
asyncio.ensure_future(self._send_sub(pattern))
|
||||
return sub
|
||||
|
||||
async def _send_sub(self, pattern: str) -> None:
|
||||
try:
|
||||
if self._writer is None:
|
||||
await self.connect()
|
||||
await self._send(protocol.encode(protocol.SUB, args=pattern))
|
||||
except Exception as exc: # pragma: no cover - network paths in live tests
|
||||
log.warning("bus.client: SUB %s failed: %s", pattern, exc)
|
||||
|
||||
async def _unregister(self, sub: _UnixSubscription) -> None:
|
||||
try:
|
||||
self._subs.remove(sub)
|
||||
except ValueError:
|
||||
return
|
||||
# Tell the server we no longer want events for this pattern if no
|
||||
# other local subscription still wants it.
|
||||
if not any(s.pattern == sub.pattern for s in self._subs):
|
||||
with contextlib.suppress(Exception):
|
||||
await self._send(protocol.encode(protocol.UNSUB, args=sub.pattern))
|
||||
|
||||
# ─── Internal I/O ───────────────────────────────────────────────────────
|
||||
|
||||
async def _send(self, frame_bytes: bytes) -> None:
|
||||
if self._writer is None:
|
||||
raise ConnectionError("bus.client: not connected")
|
||||
async with self._write_lock:
|
||||
self._writer.write(frame_bytes)
|
||||
await self._writer.drain()
|
||||
|
||||
async def _reader_loop(self) -> None:
|
||||
if self._reader is None:
|
||||
return
|
||||
try:
|
||||
while True:
|
||||
frame = await protocol.read_frame(self._reader)
|
||||
if frame is None:
|
||||
break
|
||||
if frame.verb != protocol.EVT:
|
||||
# Clients only ever legitimately receive EVT (or BYE).
|
||||
if frame.verb == protocol.BYE:
|
||||
break
|
||||
log.warning("bus.client: unexpected verb from server: %s", frame.verb)
|
||||
continue
|
||||
topic = frame.args
|
||||
data = protocol.decode_body(frame.body) if frame.body else {}
|
||||
event = Event.from_dict(topic, data)
|
||||
self._dispatch(event)
|
||||
except protocol.ProtocolError as exc:
|
||||
log.warning("bus.client: protocol error: %s", exc)
|
||||
except (asyncio.IncompleteReadError, ConnectionError):
|
||||
pass
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception: # pragma: no cover
|
||||
log.exception("bus.client: reader loop crashed")
|
||||
finally:
|
||||
# Server-side close — wake every subscription.
|
||||
for sub in list(self._subs):
|
||||
with contextlib.suppress(asyncio.QueueFull):
|
||||
sub._queue.put_nowait(_CLOSE_SENTINEL)
|
||||
|
||||
def _dispatch(self, event: Event) -> None:
|
||||
for sub in self._subs:
|
||||
if matches(sub.pattern, event.topic):
|
||||
_enqueue_event_drop_oldest(sub._queue, event)
|
||||
@@ -1,310 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""UNIX-socket server for the DECNET bus.
|
||||
|
||||
One :class:`BusServer` per host. Accepts local connections on a UNIX-domain
|
||||
socket; each connection may:
|
||||
|
||||
* publish events (``PUB`` frames) that the server fans out to all matching
|
||||
subscribers on other connections, and
|
||||
* subscribe to patterns (``SUB`` frames) and receive matching events as
|
||||
``EVT`` frames.
|
||||
|
||||
Authorization is socket file permissions (0660, group=``decnet`` if that
|
||||
POSIX group exists, else the server process's own group). Anything the
|
||||
kernel lets ``connect()`` is trusted — there is no verb-level auth. This
|
||||
matches the "local processes on the same host" threat model; cross-host
|
||||
federation is out of scope (see DEBT-029).
|
||||
|
||||
Backpressure is per-connection, drop-oldest: if a subscriber can't drain its
|
||||
outbound queue fast enough, the server discards the oldest pending event
|
||||
rather than blocking publishers. The bus is at-most-once by contract, so
|
||||
drops are acceptable; stalled publishers are not.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import grp
|
||||
import os
|
||||
import pathlib
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus import protocol
|
||||
from decnet.bus.base import Event, matches
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.server")
|
||||
|
||||
_SOCKET_MODE = 0o660
|
||||
_DEFAULT_GROUP = "decnet"
|
||||
_OUTBOUND_QUEUE_SIZE = 1024
|
||||
|
||||
|
||||
@dataclass(eq=False)
|
||||
class _Connection:
|
||||
"""Per-connection server state."""
|
||||
|
||||
writer: asyncio.StreamWriter
|
||||
peer_name: str = "<unknown>"
|
||||
patterns: set[str] = field(default_factory=set)
|
||||
outbound: asyncio.Queue[bytes] = field(
|
||||
default_factory=lambda: asyncio.Queue(maxsize=_OUTBOUND_QUEUE_SIZE)
|
||||
)
|
||||
closed: bool = False
|
||||
|
||||
|
||||
class BusServer:
|
||||
"""Serve a UNIX-socket bus on *socket_path*.
|
||||
|
||||
Lifecycle: construct → :meth:`start` → :meth:`serve_forever` (or rely
|
||||
on :meth:`start` returning once bound) → :meth:`close` for teardown.
|
||||
Safe to :meth:`close` multiple times.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
socket_path: pathlib.Path | str,
|
||||
*,
|
||||
group: str | None = _DEFAULT_GROUP,
|
||||
mode: int = _SOCKET_MODE,
|
||||
) -> None:
|
||||
self._path = pathlib.Path(socket_path)
|
||||
self._group = group
|
||||
self._mode = mode
|
||||
self._server: asyncio.base_events.Server | None = None
|
||||
self._connections: set[_Connection] = set()
|
||||
self._closed = False
|
||||
|
||||
# ─── Lifecycle ──────────────────────────────────────────────────────────
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Bind the socket and begin accepting connections.
|
||||
|
||||
Removes any stale socket file at *socket_path* first (common case:
|
||||
the previous worker crashed without cleaning up). The parent
|
||||
directory must already exist; we do NOT create it blindly because
|
||||
the chosen directory (typically ``/run/decnet``) may require
|
||||
systemd ``RuntimeDirectory=`` to set up.
|
||||
"""
|
||||
if self._server is not None:
|
||||
return
|
||||
|
||||
parent = self._path.parent
|
||||
if not parent.exists():
|
||||
raise FileNotFoundError(
|
||||
f"bus socket parent directory {parent} does not exist; "
|
||||
f"create it with systemd RuntimeDirectory= or mkdir"
|
||||
)
|
||||
|
||||
# Clean up a stale socket from a previous crash. If a live server
|
||||
# is actually listening there, ``bind()`` below will fail — we do
|
||||
# not try to detect live vs. stale ourselves.
|
||||
with contextlib.suppress(FileNotFoundError):
|
||||
if self._path.is_socket():
|
||||
self._path.unlink()
|
||||
|
||||
self._server = await asyncio.start_unix_server(
|
||||
self._handle_connection, path=str(self._path),
|
||||
)
|
||||
_chmod_and_chown(self._path, self._mode, self._group)
|
||||
log.info("bus.server: listening on %s (mode=%o group=%s)",
|
||||
self._path, self._mode, self._group or "<inherit>")
|
||||
|
||||
async def serve_forever(self) -> None:
|
||||
if self._server is None:
|
||||
raise RuntimeError("BusServer not started")
|
||||
async with self._server:
|
||||
await self._server.serve_forever()
|
||||
|
||||
async def close(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
|
||||
if self._server is not None:
|
||||
self._server.close()
|
||||
with contextlib.suppress(Exception):
|
||||
await self._server.wait_closed()
|
||||
self._server = None
|
||||
|
||||
# Drain every live connection.
|
||||
for conn in list(self._connections):
|
||||
await self._close_connection(conn)
|
||||
self._connections.clear()
|
||||
|
||||
with contextlib.suppress(FileNotFoundError):
|
||||
self._path.unlink()
|
||||
log.info("bus.server: closed")
|
||||
|
||||
# ─── Internal publish fan-out ───────────────────────────────────────────
|
||||
|
||||
async def publish(self, topic: str, payload: dict[str, Any], event_type: str = "") -> None:
|
||||
"""Server-side publish helper — used by the worker to emit
|
||||
``system.bus.health`` heartbeats without opening a client loop."""
|
||||
event = Event(topic=topic, payload=payload, type=event_type)
|
||||
self._fanout(event)
|
||||
|
||||
# ─── Connection handler ─────────────────────────────────────────────────
|
||||
|
||||
async def _handle_connection(
|
||||
self,
|
||||
reader: asyncio.StreamReader,
|
||||
writer: asyncio.StreamWriter,
|
||||
) -> None:
|
||||
conn = _Connection(writer=writer)
|
||||
self._connections.add(conn)
|
||||
writer_task = asyncio.create_task(self._writer_loop(conn))
|
||||
try:
|
||||
await self._reader_loop(conn, reader)
|
||||
except protocol.ProtocolError as exc:
|
||||
log.warning("bus.server: protocol error from %s: %s", conn.peer_name, exc)
|
||||
except (asyncio.IncompleteReadError, ConnectionError) as exc:
|
||||
log.debug("bus.server: %s disconnected: %s", conn.peer_name, exc)
|
||||
except Exception: # pragma: no cover - defensive
|
||||
log.exception("bus.server: unhandled error in connection")
|
||||
finally:
|
||||
await self._close_connection(conn)
|
||||
self._connections.discard(conn)
|
||||
writer_task.cancel()
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
await writer_task
|
||||
|
||||
async def _reader_loop(
|
||||
self, conn: _Connection, reader: asyncio.StreamReader,
|
||||
) -> None:
|
||||
while True:
|
||||
frame = await protocol.read_frame(reader)
|
||||
if frame is None:
|
||||
return
|
||||
await self._dispatch(conn, frame)
|
||||
if frame.verb == protocol.BYE:
|
||||
return
|
||||
|
||||
async def _dispatch(self, conn: _Connection, frame: protocol.Frame) -> None:
|
||||
if frame.verb == protocol.HELLO:
|
||||
conn.peer_name = frame.args or conn.peer_name
|
||||
log.debug("bus.server: HELLO from %s", conn.peer_name)
|
||||
return
|
||||
if frame.verb == protocol.SUB:
|
||||
pattern = frame.args
|
||||
if not pattern:
|
||||
raise protocol.ProtocolError("SUB requires a pattern")
|
||||
conn.patterns.add(pattern)
|
||||
log.debug("bus.server: %s SUB %s", conn.peer_name, pattern)
|
||||
return
|
||||
if frame.verb == protocol.UNSUB:
|
||||
conn.patterns.discard(frame.args)
|
||||
return
|
||||
if frame.verb == protocol.PUB:
|
||||
topic = frame.args
|
||||
if not topic:
|
||||
raise protocol.ProtocolError("PUB requires a topic")
|
||||
data = protocol.decode_body(frame.body) if frame.body else {}
|
||||
event = Event(
|
||||
topic=topic,
|
||||
payload=data.get("payload", {}) or {},
|
||||
type=data.get("type", "") or "",
|
||||
)
|
||||
self._fanout(event, origin=conn)
|
||||
return
|
||||
if frame.verb == protocol.BYE:
|
||||
return
|
||||
# EVT is server-to-client only; receiving one is a protocol violation.
|
||||
raise protocol.ProtocolError(f"unexpected verb {frame.verb!r} from client")
|
||||
|
||||
def _fanout(self, event: Event, *, origin: _Connection | None = None) -> None:
|
||||
"""Enqueue *event* as an EVT frame on every matching connection.
|
||||
|
||||
We do NOT deliver back to the originating connection (a publisher
|
||||
does not receive its own event). Encoding happens once per event,
|
||||
not once per subscriber.
|
||||
"""
|
||||
try:
|
||||
frame_bytes = protocol.encode(
|
||||
protocol.EVT, args=event.topic, body=event.to_dict(),
|
||||
)
|
||||
except protocol.ProtocolError:
|
||||
log.exception("bus.server: failed to encode EVT for topic=%s", event.topic)
|
||||
return
|
||||
|
||||
for conn in self._connections:
|
||||
if conn is origin or conn.closed:
|
||||
continue
|
||||
if not any(matches(p, event.topic) for p in conn.patterns):
|
||||
continue
|
||||
_enqueue_drop_oldest(conn.outbound, frame_bytes, event.topic)
|
||||
|
||||
async def _writer_loop(self, conn: _Connection) -> None:
|
||||
"""Serialize writes onto *conn*'s socket.
|
||||
|
||||
One writer task per connection so a slow peer only blocks its own
|
||||
queue, not the fan-out loop. The queue is bounded with drop-oldest
|
||||
policy applied at enqueue time (see :func:`_enqueue_drop_oldest`).
|
||||
"""
|
||||
try:
|
||||
while not conn.closed:
|
||||
data = await conn.outbound.get()
|
||||
conn.writer.write(data)
|
||||
await conn.writer.drain()
|
||||
except (ConnectionError, BrokenPipeError):
|
||||
log.debug("bus.server: %s writer: peer closed", conn.peer_name)
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except Exception: # pragma: no cover - defensive
|
||||
log.exception("bus.server: writer loop crashed for %s", conn.peer_name)
|
||||
|
||||
async def _close_connection(self, conn: _Connection) -> None:
|
||||
if conn.closed:
|
||||
return
|
||||
conn.closed = True
|
||||
with contextlib.suppress(Exception):
|
||||
conn.writer.close()
|
||||
await conn.writer.wait_closed()
|
||||
|
||||
|
||||
# ─── Helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def _chmod_and_chown(path: pathlib.Path, mode: int, group: str | None) -> None:
|
||||
"""Apply socket file perms and best-effort group ownership.
|
||||
|
||||
If *group* is ``None`` or the named group does not exist, we leave the
|
||||
socket owned by the current process group. This keeps the server
|
||||
usable on dev boxes that don't have a ``decnet`` group set up.
|
||||
"""
|
||||
try:
|
||||
os.chmod(path, mode)
|
||||
except OSError as exc:
|
||||
log.warning("bus.server: chmod(%s, %o) failed: %s", path, mode, exc)
|
||||
|
||||
if not group:
|
||||
return
|
||||
try:
|
||||
gid = grp.getgrnam(group).gr_gid
|
||||
except KeyError:
|
||||
log.debug("bus.server: group %r not found, leaving socket group unchanged", group)
|
||||
return
|
||||
try:
|
||||
os.chown(path, -1, gid)
|
||||
except PermissionError:
|
||||
# Dev box running as an unprivileged user can't chown. Log once at
|
||||
# debug and move on — the socket is still usable by the owner.
|
||||
log.debug("bus.server: chown(%s, gid=%d) denied; leaving as-is", path, gid)
|
||||
except OSError as exc:
|
||||
log.warning("bus.server: chown(%s, gid=%d) failed: %s", path, gid, exc)
|
||||
|
||||
|
||||
def _enqueue_drop_oldest(
|
||||
queue: "asyncio.Queue[bytes]", data: bytes, topic: str,
|
||||
) -> None:
|
||||
"""Drop-oldest backpressure — mirrors :func:`decnet.bus.fake._enqueue_drop_oldest`."""
|
||||
while True:
|
||||
try:
|
||||
queue.put_nowait(data)
|
||||
return
|
||||
except asyncio.QueueFull:
|
||||
try:
|
||||
queue.get_nowait()
|
||||
log.warning("bus.server: subscriber queue full, dropped event topic=%s", topic)
|
||||
except asyncio.QueueEmpty:
|
||||
return
|
||||
@@ -1,122 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""``decnet bus`` worker entrypoint.
|
||||
|
||||
Starts a :class:`~decnet.bus.unix_server.BusServer` on the configured UNIX
|
||||
socket and serves forever, emitting a ``system.bus.health`` heartbeat on
|
||||
its own bus every :data:`HEARTBEAT_INTERVAL_SEC` seconds so liveness-aware
|
||||
consumers (dashboards, watchdogs) can tell the bus is up without polling
|
||||
the filesystem.
|
||||
|
||||
Cross-host federation is **out of scope** for the MVP; each host runs its
|
||||
own bus independently. See DEBT-029 for the deferred ``--bridge-tcp``
|
||||
mode that would proxy the socket over the swarm mTLS channel.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import pathlib
|
||||
import signal
|
||||
import time
|
||||
|
||||
from decnet.bus import topics
|
||||
from decnet.bus.unix_server import BusServer
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("bus.worker")
|
||||
|
||||
HEARTBEAT_INTERVAL_SEC = 10
|
||||
|
||||
|
||||
async def bus_worker(
|
||||
socket_path: str | pathlib.Path,
|
||||
*,
|
||||
group: str | None = "decnet",
|
||||
heartbeat_interval: int = HEARTBEAT_INTERVAL_SEC,
|
||||
) -> None:
|
||||
"""Run the bus server until cancelled or SIGTERM/SIGINT is received.
|
||||
|
||||
The parent directory of *socket_path* must already exist (systemd's
|
||||
``RuntimeDirectory=decnet`` handles this in prod; dev code is expected
|
||||
to ``mkdir`` first). This function does not create it implicitly
|
||||
because the right choice of perms/owner depends on the deployment
|
||||
context.
|
||||
"""
|
||||
path = pathlib.Path(socket_path)
|
||||
_ensure_parent(path)
|
||||
|
||||
server = BusServer(path, group=group)
|
||||
await server.start()
|
||||
log.info("bus.worker: pid=%d socket=%s", os.getpid(), path)
|
||||
|
||||
stop_event = asyncio.Event()
|
||||
_install_signal_handlers(stop_event)
|
||||
|
||||
heartbeat_task = asyncio.create_task(_heartbeat_loop(server, heartbeat_interval))
|
||||
serve_task = asyncio.create_task(server.serve_forever())
|
||||
|
||||
try:
|
||||
await stop_event.wait()
|
||||
log.info("bus.worker: shutdown signal received")
|
||||
finally:
|
||||
heartbeat_task.cancel()
|
||||
serve_task.cancel()
|
||||
for task in (heartbeat_task, serve_task):
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001 - draining shutdown
|
||||
pass
|
||||
await server.close()
|
||||
log.info("bus.worker: stopped")
|
||||
|
||||
|
||||
async def _heartbeat_loop(server: BusServer, interval: int) -> None:
|
||||
"""Publish ``system.bus.health`` on the server's own fan-out."""
|
||||
started_at = time.time()
|
||||
while True:
|
||||
try:
|
||||
await server.publish(
|
||||
topics.system(topics.SYSTEM_BUS_HEALTH),
|
||||
{
|
||||
"pid": os.getpid(),
|
||||
"uptime_sec": round(time.time() - started_at, 3),
|
||||
"ts": time.time(),
|
||||
},
|
||||
event_type=topics.SYSTEM_BUS_HEALTH,
|
||||
)
|
||||
except Exception: # pragma: no cover - heartbeat must never kill the worker
|
||||
log.exception("bus.worker: heartbeat publish failed")
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
|
||||
def _install_signal_handlers(stop_event: asyncio.Event) -> None:
|
||||
loop = asyncio.get_running_loop()
|
||||
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||
try:
|
||||
loop.add_signal_handler(sig, stop_event.set)
|
||||
except (NotImplementedError, RuntimeError):
|
||||
# add_signal_handler is not supported on Windows / in some
|
||||
# test harnesses where the loop is running in a non-main thread.
|
||||
# The worker still exits via KeyboardInterrupt bubbling up.
|
||||
pass
|
||||
|
||||
|
||||
def _ensure_parent(path: pathlib.Path) -> None:
|
||||
parent = path.parent
|
||||
if parent.exists():
|
||||
return
|
||||
# Dev-box convenience: if the parent is the user's ``~/.decnet`` dir,
|
||||
# create it. We do not auto-mkdir ``/run/decnet`` — that's systemd's job
|
||||
# and silently creating it as the wrong user would cause permission
|
||||
# confusion later.
|
||||
home_prefix = pathlib.Path.home() / ".decnet"
|
||||
try:
|
||||
parent.relative_to(home_prefix.parent)
|
||||
except ValueError:
|
||||
raise FileNotFoundError(
|
||||
f"bus socket parent {parent} does not exist; create it first"
|
||||
)
|
||||
parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
__all__ = ["bus_worker", "HEARTBEAT_INTERVAL_SEC"]
|
||||
@@ -1,38 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Canary tokens — decoy artifacts planted in decky filesystems.
|
||||
|
||||
Public surface is exported here so callers can ``from decnet.canary
|
||||
import CanaryArtifact, get_generator, get_instrumenter`` without
|
||||
knowing the submodule layout. Concrete generators / instrumenters
|
||||
live under :mod:`decnet.canary.generators` and
|
||||
:mod:`decnet.canary.instrumenters` respectively; the factory keeps
|
||||
import-time cost down by deferring those imports until first use
|
||||
(same pattern as :mod:`decnet.intel.factory`).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryGenerator,
|
||||
CanaryInstrumenter,
|
||||
)
|
||||
from decnet.canary.factory import (
|
||||
KNOWN_GENERATORS,
|
||||
KNOWN_INSTRUMENTERS,
|
||||
get_generator,
|
||||
get_instrumenter,
|
||||
pick_instrumenter_for_mime,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"CanaryArtifact",
|
||||
"CanaryContext",
|
||||
"CanaryGenerator",
|
||||
"CanaryInstrumenter",
|
||||
"KNOWN_GENERATORS",
|
||||
"KNOWN_INSTRUMENTERS",
|
||||
"get_generator",
|
||||
"get_instrumenter",
|
||||
"pick_instrumenter_for_mime",
|
||||
]
|
||||
@@ -1,19 +0,0 @@
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
// Node helper invoked by decnet.canary.obfuscator.
|
||||
// Reads {code, options} JSON from stdin, writes obfuscated JS to stdout.
|
||||
// Kept dependency-light on purpose: only javascript-obfuscator.
|
||||
const JsObf = require('javascript-obfuscator');
|
||||
|
||||
let raw = '';
|
||||
process.stdin.setEncoding('utf8');
|
||||
process.stdin.on('data', (chunk) => { raw += chunk; });
|
||||
process.stdin.on('end', () => {
|
||||
try {
|
||||
const { code, options } = JSON.parse(raw);
|
||||
const result = JsObf.obfuscate(code, options || {});
|
||||
process.stdout.write(result.getObfuscatedCode());
|
||||
} catch (e) {
|
||||
process.stderr.write(String(e && e.stack || e));
|
||||
process.exit(2);
|
||||
}
|
||||
});
|
||||
@@ -1,152 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Canary generator / instrumenter ABCs and the artifact dataclass.
|
||||
|
||||
Two flavors of producer share the same return shape:
|
||||
|
||||
* :class:`CanaryGenerator` synthesises a fake artifact from scratch
|
||||
(e.g. a plausible ``~/.aws/credentials`` block, a ``.git/config``
|
||||
pointing at an attacker-bait remote URL). Operators don't supply
|
||||
any input.
|
||||
|
||||
* :class:`CanaryInstrumenter` mutates an operator-uploaded blob to
|
||||
embed the callback (HTTP slug + DNS host). The original blob bytes
|
||||
are passed in; the instrumenter returns the mutated version.
|
||||
|
||||
Both return a :class:`CanaryArtifact` — the planter doesn't care
|
||||
which path produced it. Same dataclass keeps the planter's
|
||||
docker-exec injector trivial.
|
||||
|
||||
ABCs intentionally do not include I/O — generators and instrumenters
|
||||
are pure functions of (slug, host, blob?). All filesystem work
|
||||
happens in :mod:`decnet.canary.planter` and :mod:`decnet.canary.storage`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class CanaryContext:
|
||||
"""Inputs every generator/instrumenter needs to embed a working callback.
|
||||
|
||||
``callback_token`` is the unique slug; it appears verbatim in HTTP
|
||||
URLs (``https://<host>/c/<callback_token>``) and as the leftmost
|
||||
DNS label (``<callback_token>.canary.<dns_zone>``) so a single
|
||||
slug resolves to a single :class:`CanaryToken` row regardless of
|
||||
which path the attacker tripped.
|
||||
|
||||
``http_base`` and ``dns_zone`` come from the canary worker's
|
||||
public-facing config (``DECNET_CANARY_HTTP_BASE``,
|
||||
``DECNET_CANARY_DNS_ZONE``). When DNS isn't deployed,
|
||||
``dns_zone`` is empty and instrumenters that only have a DNS
|
||||
surface (e.g. an artifact whose only realistic embed point is a
|
||||
hostname) raise.
|
||||
"""
|
||||
|
||||
callback_token: str
|
||||
http_base: str # e.g. "https://canary.example.test" — no trailing slash
|
||||
dns_zone: str = "" # e.g. "canary.example.test"; "" disables DNS embeds
|
||||
persona: str = "linux" # "linux" | "windows" — drives default username, path style
|
||||
|
||||
|
||||
@dataclass
|
||||
class CanaryArtifact:
|
||||
"""Bytes-and-placement bundle produced by a generator/instrumenter."""
|
||||
|
||||
path: str
|
||||
"""Absolute path inside the target container."""
|
||||
|
||||
content: bytes
|
||||
"""Final bytes that hit the decky filesystem.
|
||||
|
||||
Always raw bytes — the planter base64-encodes for the wire so
|
||||
binary blobs (DOCX/PNG/PDF) survive ``docker exec sh -c`` safely.
|
||||
"""
|
||||
|
||||
mode: int = 0o600
|
||||
"""Unix file mode. Defaults to ``0600`` because most realistic
|
||||
canary placements (``~/.aws/credentials``, ``.env``, ``id_rsa``)
|
||||
are operator-only. Honeydocs in user docs folders should pass
|
||||
``0o644``.
|
||||
"""
|
||||
|
||||
mtime_offset: int = 0
|
||||
"""Seconds relative to *now* for the planted file's mtime.
|
||||
|
||||
Negative values backdate the file so it doesn't look like it
|
||||
appeared the moment the decky was deployed. ``-86400 * 90`` (90
|
||||
days ago) is a common choice for ``honeydoc`` artifacts; ``0``
|
||||
means "stamp it now," which is fine for ``aws_creds``-like files
|
||||
that would plausibly be touched recently.
|
||||
"""
|
||||
|
||||
instrumenter: Optional[str] = None
|
||||
"""Identifier of the instrumenter that produced this artifact (for
|
||||
upload-driven tokens). Mirrored into ``CanaryToken.instrumenter``.
|
||||
Mutually exclusive with :attr:`generator`.
|
||||
"""
|
||||
|
||||
generator: Optional[str] = None
|
||||
"""Identifier of the generator that produced this artifact (for
|
||||
synthesised tokens). Mirrored into ``CanaryToken.generator``.
|
||||
Mutually exclusive with :attr:`instrumenter`.
|
||||
"""
|
||||
|
||||
notes: list[str] = field(default_factory=list)
|
||||
"""Human-readable notes about the embedding (e.g. "DOCX: injected
|
||||
1×1 remote image at relsId rId99"). Surfaced in the API
|
||||
``preview`` response so the operator sees what we did before
|
||||
planting. Never leaked to the attacker-facing surface.
|
||||
"""
|
||||
|
||||
fingerprint_nonce: Optional[str] = None
|
||||
"""Per-mint HMAC nonce for fingerprint canaries; ``None`` for everything
|
||||
else. Cultivator reads this and persists it on ``CanaryToken.fingerprint_nonce``
|
||||
so the worker can validate incoming ``?k=`` params.
|
||||
"""
|
||||
|
||||
|
||||
class CanaryGenerator(ABC):
|
||||
"""Produces a fake artifact from scratch."""
|
||||
|
||||
name: str #: short tag — matches ``CanaryToken.generator``
|
||||
|
||||
@abstractmethod
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
"""Synthesise the artifact.
|
||||
|
||||
MUST NOT do I/O. MUST be deterministic for the same
|
||||
``(callback_token, http_base, dns_zone, persona)`` so re-seeding
|
||||
from :attr:`CanaryToken.secret_seed` produces byte-identical
|
||||
output and the planter is naturally idempotent.
|
||||
"""
|
||||
|
||||
|
||||
class CanaryInstrumenter(ABC):
|
||||
"""Mutates an operator-uploaded blob to embed a callback."""
|
||||
|
||||
name: str #: short tag — matches ``CanaryToken.instrumenter``
|
||||
|
||||
#: MIME prefixes this instrumenter handles. The factory uses these
|
||||
#: to dispatch by sniffed content-type. Sub-string match against
|
||||
#: the prefix list (e.g. ``("application/pdf",)`` or
|
||||
#: ``("text/",)``).
|
||||
mime_prefixes: tuple[str, ...] = ()
|
||||
|
||||
@abstractmethod
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
"""Return the mutated bytes with the callback embedded.
|
||||
|
||||
MUST raise :class:`InstrumenterRejectedError` when the blob
|
||||
can't be safely mutated (corrupt zip, encrypted PDF, etc.) so
|
||||
the API can surface a 400 with the specific reason rather than
|
||||
silently shipping the original bytes.
|
||||
"""
|
||||
|
||||
|
||||
class InstrumenterRejectedError(ValueError):
|
||||
"""Raised when an instrumenter can't safely mutate the input."""
|
||||
@@ -1,193 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Realism contract adapter for canary generators.
|
||||
|
||||
Stage 7 of the realism migration. The orchestrator's planner picks a
|
||||
``canary_*`` :class:`~decnet.realism.taxonomy.ContentClass` 1–3% of
|
||||
the time on file ticks; this module turns that pick into a
|
||||
:class:`~decnet.canary.base.CanaryArtifact` (bytes the SSH driver
|
||||
plants) plus a persisted :class:`~decnet.web.db.models.CanaryToken`
|
||||
row so the canary worker recognises the slug when an attacker trips
|
||||
it.
|
||||
|
||||
What this is NOT: it doesn't pick *when* canaries fire — that's the
|
||||
realism planner's job. It doesn't decide *where* on the filesystem
|
||||
the canary lands beyond what realism naming + persona conventions
|
||||
already produce. It's a thin bytes-and-row factory bolted onto the
|
||||
realism contract.
|
||||
|
||||
Stealth (per ``feedback_stealth.md``): we never leak the
|
||||
``DECNET`` literal into anything that survives to the planted file.
|
||||
The underlying generators are already stealth-clean; this wrapper
|
||||
must not undo that.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import secrets as _secrets
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext
|
||||
from decnet.canary.factory import get_generator
|
||||
from decnet.logging import get_logger
|
||||
from decnet.realism.personas import login_for
|
||||
from decnet.realism.taxonomy import ContentClass, Plan
|
||||
|
||||
log = get_logger("canary.cultivator")
|
||||
|
||||
|
||||
# realism content_class → canary generator name. Mirrors
|
||||
# :data:`decnet.canary.factory.KNOWN_GENERATORS`.
|
||||
_CLASS_TO_GENERATOR: dict[ContentClass, str] = {
|
||||
ContentClass.CANARY_AWS_CREDS: "aws_creds",
|
||||
ContentClass.CANARY_ENV_FILE: "env_file",
|
||||
ContentClass.CANARY_GIT_CONFIG: "git_config",
|
||||
ContentClass.CANARY_SSH_KEY: "ssh_key",
|
||||
ContentClass.CANARY_HONEYDOC: "honeydoc",
|
||||
ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
|
||||
ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
|
||||
ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
|
||||
ContentClass.CANARY_FINGERPRINT_HTML: "fingerprint_html",
|
||||
ContentClass.CANARY_FINGERPRINT_SVG: "fingerprint_svg",
|
||||
}
|
||||
|
||||
|
||||
# Generator → CanaryKind. The trip surface (HTTP slug callback / DNS
|
||||
# resolution / passive bait) determines how the canary worker matches
|
||||
# an attacker callback to this token. Aligned with
|
||||
# :data:`decnet.web.db.models.canary.CanaryKind`.
|
||||
_GENERATOR_TO_KIND: dict[str, str] = {
|
||||
"aws_creds": "aws_passive", # no embedded callback; passive bait
|
||||
"env_file": "http",
|
||||
"git_config": "http",
|
||||
"honeydoc": "http",
|
||||
"honeydoc_docx": "http",
|
||||
"honeydoc_pdf": "http",
|
||||
"ssh_key": "dns", # trip is DNS resolution of host comment
|
||||
"mysql_dump": "dns", # trip is DNS resolution of subdomain
|
||||
"fingerprint_html": "http", # obfuscated JS beacons GET /c/<slug>
|
||||
"fingerprint_svg": "http", # same, embedded inside SVG <script>
|
||||
}
|
||||
|
||||
|
||||
# Path conventions per generator. The realism planner doesn't know
|
||||
# about decoy-realistic credential locations (``~/.aws/credentials``,
|
||||
# ``~/.git/config``); we map them per-class here so the planted
|
||||
# artifact lands somewhere an attacker would actually look.
|
||||
_DEFAULT_PATH: dict[ContentClass, str] = {
|
||||
ContentClass.CANARY_AWS_CREDS: "/home/{persona}/.aws/credentials",
|
||||
ContentClass.CANARY_ENV_FILE: "/home/{persona}/app/.env",
|
||||
ContentClass.CANARY_GIT_CONFIG: "/home/{persona}/.git/config",
|
||||
ContentClass.CANARY_SSH_KEY: "/home/{persona}/.ssh/id_rsa",
|
||||
ContentClass.CANARY_HONEYDOC: "/home/{persona}/Documents/notes.html",
|
||||
ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
|
||||
ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
|
||||
ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
|
||||
ContentClass.CANARY_FINGERPRINT_HTML: "/home/{persona}/Documents/asset_directory.html",
|
||||
ContentClass.CANARY_FINGERPRINT_SVG: "/home/{persona}/Documents/network_topology.svg",
|
||||
}
|
||||
|
||||
|
||||
def _path_for(plan: Plan) -> str:
|
||||
"""Produce the canary placement path for *plan*.
|
||||
|
||||
The realism planner already filled in ``plan.target_path`` from
|
||||
the namer, but canary placements have stronger conventions
|
||||
(``~/.aws/credentials``, ``~/.ssh/id_rsa``) than the realism
|
||||
namer's vocabulary. When :data:`_DEFAULT_PATH` has an entry,
|
||||
that wins.
|
||||
"""
|
||||
template = _DEFAULT_PATH.get(plan.content_class)
|
||||
if template is None:
|
||||
return plan.target_path
|
||||
return template.format(persona=login_for(plan.persona))
|
||||
|
||||
|
||||
def _new_callback_token() -> str:
|
||||
"""16 url-safe bytes — same shape canary slug fields use elsewhere."""
|
||||
return _secrets.token_urlsafe(16)
|
||||
|
||||
|
||||
async def cultivate(
|
||||
plan: Plan,
|
||||
repo: Any,
|
||||
*,
|
||||
http_base: Optional[str] = None,
|
||||
dns_zone: Optional[str] = None,
|
||||
created_by: str = "system",
|
||||
) -> CanaryArtifact:
|
||||
"""Realism-driven canary plant.
|
||||
|
||||
Build a :class:`CanaryContext`, ask the right generator for bytes,
|
||||
persist a ``canary_tokens`` row so the canary worker can attribute
|
||||
callbacks to this token, and return the artifact for the SSH
|
||||
driver to plant.
|
||||
|
||||
*http_base* and *dns_zone* default to ``DECNET_CANARY_HTTP_BASE``
|
||||
and ``DECNET_CANARY_DNS_ZONE`` env vars respectively — same
|
||||
pattern the canary worker uses. When both are empty, generators
|
||||
that need a callback host (``ssh_key`` DNS, ``mysql_dump``)
|
||||
raise; the planner's caller logs and falls back to a non-canary
|
||||
plan.
|
||||
"""
|
||||
if not plan.content_class.is_canary():
|
||||
raise ValueError(
|
||||
f"cultivate() called with non-canary content_class="
|
||||
f"{plan.content_class!r}"
|
||||
)
|
||||
gen_name = _CLASS_TO_GENERATOR.get(plan.content_class)
|
||||
if gen_name is None:
|
||||
raise KeyError(
|
||||
f"no canary generator mapped for content_class="
|
||||
f"{plan.content_class!r}"
|
||||
)
|
||||
|
||||
callback_token = _new_callback_token()
|
||||
http_base_str: str = http_base or os.environ.get("DECNET_CANARY_HTTP_BASE") or ""
|
||||
dns_zone_str: str = dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE") or ""
|
||||
ctx = CanaryContext(
|
||||
callback_token=callback_token,
|
||||
http_base=http_base_str,
|
||||
dns_zone=dns_zone_str,
|
||||
persona="linux", # all our deckies are POSIX in MVP
|
||||
)
|
||||
generator = get_generator(gen_name)
|
||||
artifact = generator.generate(ctx)
|
||||
|
||||
# The generator returns ``path=""`` (planter fills it normally).
|
||||
# We have a realism-derived path on hand; stuff it in for the SSH
|
||||
# driver's plant_file call AND the canary_tokens row.
|
||||
placement_path = _path_for(plan)
|
||||
|
||||
# Persist the token row before planting so the canary worker can
|
||||
# attribute a callback if the artifact trips during the plant
|
||||
# itself (improbable but possible — DOCX viewers can preview
|
||||
# autoplay-style).
|
||||
token_data: dict = {
|
||||
"kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
|
||||
"decky_name": plan.decky_name,
|
||||
"instrumenter": None,
|
||||
"generator": gen_name,
|
||||
"placement_path": placement_path,
|
||||
"callback_token": callback_token,
|
||||
"secret_seed": callback_token, # deterministic re-seed compatible
|
||||
"placed_at": datetime.now(timezone.utc),
|
||||
"created_by": created_by,
|
||||
"state": "planted",
|
||||
}
|
||||
if artifact.fingerprint_nonce is not None:
|
||||
token_data["fingerprint_nonce"] = artifact.fingerprint_nonce
|
||||
await repo.create_canary_token(token_data)
|
||||
|
||||
# Carry the placement_path on the artifact so the orchestrator's
|
||||
# plant_file call uses it. We don't mutate the generator's
|
||||
# original — copy with the new path.
|
||||
return CanaryArtifact(
|
||||
path=placement_path,
|
||||
content=artifact.content,
|
||||
mode=artifact.mode,
|
||||
mtime_offset=artifact.mtime_offset,
|
||||
instrumenter=artifact.instrumenter,
|
||||
generator=artifact.generator,
|
||||
notes=list(artifact.notes),
|
||||
)
|
||||
@@ -1,208 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Minimal authoritative DNS server for canary tokens (stdlib only).
|
||||
|
||||
We don't need a full resolver — only enough to:
|
||||
|
||||
1. Decode an inbound query's qname.
|
||||
2. If the qname matches ``<slug>.<canary_zone>``, log the callback,
|
||||
publish ``canary.<token_id>.triggered`` on the bus, and return a
|
||||
plausible A record (any RFC-5737 reserved address would do; we
|
||||
use 192.0.2.1) so the attacker's resolver doesn't loop on
|
||||
NXDOMAIN.
|
||||
3. For unknown qnames return NXDOMAIN.
|
||||
|
||||
DNS-over-UDP wire format is well-trodden: 12-byte header + name
|
||||
labels + qtype + qclass. We implement just the bits we need.
|
||||
|
||||
This module deliberately avoids the ``dnslib`` PyPI package so the
|
||||
canary worker has no extra dependency surface. If we ever need
|
||||
EDNS0, DNSSEC, or other niceties we'll swap to dnslib then.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import struct
|
||||
from dataclasses import dataclass
|
||||
from typing import Awaitable, Callable, Optional, Tuple
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DNSQuery:
|
||||
"""Decoded query — only the bits the canary worker cares about."""
|
||||
|
||||
txid: int
|
||||
qname: str # lowercase, no trailing dot
|
||||
qtype: int
|
||||
qclass: int
|
||||
flags: int
|
||||
|
||||
|
||||
def _decode_name(buf: bytes, offset: int) -> Tuple[str, int]:
|
||||
"""Return ``(qname_lowercase_no_dot, bytes_consumed)``.
|
||||
|
||||
Supports compressed pointers (RFC 1035 §4.1.4). Doesn't recurse —
|
||||
we walk the pointer chain iteratively with a hop cap to avoid
|
||||
pointer-loop DoS.
|
||||
"""
|
||||
labels: list[str] = []
|
||||
pos = offset
|
||||
consumed = 0
|
||||
jumped = False
|
||||
hops = 0
|
||||
while True:
|
||||
if pos >= len(buf):
|
||||
raise ValueError("truncated DNS name")
|
||||
length = buf[pos]
|
||||
if length == 0:
|
||||
pos += 1
|
||||
if not jumped:
|
||||
consumed = pos - offset
|
||||
break
|
||||
if (length & 0xC0) == 0xC0:
|
||||
# Compression pointer.
|
||||
if pos + 1 >= len(buf):
|
||||
raise ValueError("truncated DNS pointer")
|
||||
ptr = ((length & 0x3F) << 8) | buf[pos + 1]
|
||||
if not jumped:
|
||||
consumed = (pos + 2) - offset
|
||||
pos = ptr
|
||||
jumped = True
|
||||
hops += 1
|
||||
if hops > 10:
|
||||
raise ValueError("DNS pointer loop")
|
||||
continue
|
||||
pos += 1
|
||||
if pos + length > len(buf):
|
||||
raise ValueError("truncated DNS label")
|
||||
labels.append(buf[pos:pos + length].decode("ascii", "replace"))
|
||||
pos += length
|
||||
return ".".join(labels).lower(), consumed
|
||||
|
||||
|
||||
def parse_query(packet: bytes) -> DNSQuery:
|
||||
"""Parse the (single) question of a DNS query packet."""
|
||||
if len(packet) < 12:
|
||||
raise ValueError("DNS packet too short")
|
||||
txid, flags, qdcount, _ancount, _nscount, _arcount = struct.unpack(
|
||||
"!HHHHHH", packet[:12]
|
||||
)
|
||||
if qdcount != 1:
|
||||
raise ValueError(f"expected 1 question, got {qdcount}")
|
||||
qname, consumed = _decode_name(packet, 12)
|
||||
pos = 12 + consumed
|
||||
if pos + 4 > len(packet):
|
||||
raise ValueError("truncated DNS qtype/qclass")
|
||||
qtype, qclass = struct.unpack("!HH", packet[pos:pos + 4])
|
||||
return DNSQuery(
|
||||
txid=txid, qname=qname, qtype=qtype, qclass=qclass, flags=flags,
|
||||
)
|
||||
|
||||
|
||||
def _encode_name(name: str) -> bytes:
|
||||
out = bytearray()
|
||||
for label in name.split("."):
|
||||
if not label:
|
||||
continue
|
||||
b = label.encode("ascii", "replace")
|
||||
out.append(len(b))
|
||||
out.extend(b)
|
||||
out.append(0)
|
||||
return bytes(out)
|
||||
|
||||
|
||||
def _build_response(
|
||||
query: DNSQuery,
|
||||
*,
|
||||
rcode: int = 0,
|
||||
answer_ip: Optional[str] = None,
|
||||
) -> bytes:
|
||||
"""Encode a DNS response packet.
|
||||
|
||||
*rcode* 0 = NOERROR, 3 = NXDOMAIN. When *answer_ip* is supplied
|
||||
and the query was for an A record we include exactly one answer
|
||||
(TTL 60, class IN).
|
||||
"""
|
||||
qd_count = 1
|
||||
an_count = 1 if (answer_ip and query.qtype == 1 and rcode == 0) else 0
|
||||
flags = 0x8400 | rcode # response + authoritative + RA bit clear + rcode
|
||||
header = struct.pack(
|
||||
"!HHHHHH", query.txid, flags, qd_count, an_count, 0, 0,
|
||||
)
|
||||
qname_bytes = _encode_name(query.qname)
|
||||
question = qname_bytes + struct.pack("!HH", query.qtype, query.qclass)
|
||||
|
||||
answer = b""
|
||||
if an_count and answer_ip is not None:
|
||||
# Use a name pointer back to the question (offset 12).
|
||||
ptr = struct.pack("!H", 0xC000 | 12)
|
||||
rdata = bytes(int(o) for o in answer_ip.split("."))
|
||||
answer = ptr + struct.pack("!HHIH", 1, 1, 60, 4) + rdata
|
||||
|
||||
return header + question + answer
|
||||
|
||||
|
||||
# Hook signature: receives the matched slug + the query; returns
|
||||
# nothing. The worker uses it to persist a CanaryTrigger row and
|
||||
# publish the bus event.
|
||||
TriggerHook = Callable[[str, DNSQuery, str], Awaitable[None]]
|
||||
|
||||
|
||||
class CanaryDNSProtocol(asyncio.DatagramProtocol):
|
||||
"""asyncio UDP server endpoint for canary DNS callbacks.
|
||||
|
||||
Constructor takes the canary zone (``"canary.example.test"``) and
|
||||
a coroutine called when a query matches ``<slug>.<zone>``. The
|
||||
hook runs in the event loop's task; we don't block the receive
|
||||
path on it.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
zone: str,
|
||||
hook: TriggerHook,
|
||||
*,
|
||||
answer_ip: str = "192.0.2.1",
|
||||
) -> None:
|
||||
# Normalise: lowercase, no leading/trailing dot.
|
||||
self._zone = zone.lower().strip(".")
|
||||
self._suffix = "." + self._zone if self._zone else ""
|
||||
self._hook = hook
|
||||
self._answer_ip = answer_ip
|
||||
self._transport: Optional[asyncio.DatagramTransport] = None
|
||||
|
||||
def connection_made(self, transport) -> None:
|
||||
self._transport = transport
|
||||
|
||||
def datagram_received(
|
||||
self, data: bytes, addr: Tuple[str, int],
|
||||
) -> None:
|
||||
try:
|
||||
query = parse_query(data)
|
||||
except ValueError:
|
||||
# Malformed query — drop silently. Returning a FORMERR
|
||||
# would tip off the attacker that *something* is listening
|
||||
# on this port; the stealth posture (feedback_stealth)
|
||||
# prefers radio silence on parse errors.
|
||||
return
|
||||
slug = self._slug_for(query.qname)
|
||||
if slug is None:
|
||||
# Unknown name — NXDOMAIN.
|
||||
self._send(addr, _build_response(query, rcode=3))
|
||||
return
|
||||
# Known name — answer with our sinkhole IP, then fire the hook.
|
||||
self._send(addr, _build_response(query, answer_ip=self._answer_ip))
|
||||
asyncio.ensure_future(self._hook(slug, query, addr[0]))
|
||||
|
||||
def _slug_for(self, qname: str) -> Optional[str]:
|
||||
if not self._zone or not qname.endswith(self._suffix):
|
||||
return None
|
||||
slug = qname[: -len(self._suffix)]
|
||||
# Single-label slug only; multi-label means the attacker is
|
||||
# querying a sub-resource we don't model.
|
||||
if not slug or "." in slug:
|
||||
return None
|
||||
return slug
|
||||
|
||||
def _send(self, addr: Tuple[str, int], packet: bytes) -> None:
|
||||
if self._transport is not None:
|
||||
self._transport.sendto(packet, addr)
|
||||
@@ -1,154 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Generator and instrumenter factories.
|
||||
|
||||
Same lazy-import pattern as :mod:`decnet.intel.factory` — concrete
|
||||
implementations stay un-imported until first use so importing
|
||||
:mod:`decnet.canary` from a CLI subcommand doesn't drag in
|
||||
``pikepdf`` / ``python-docx`` / ``Pillow`` for callers that only
|
||||
need the model layer.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from decnet.canary.base import CanaryGenerator, CanaryInstrumenter
|
||||
|
||||
KNOWN_GENERATORS: Tuple[str, ...] = (
|
||||
"git_config",
|
||||
"env_file",
|
||||
"ssh_key",
|
||||
"aws_creds",
|
||||
"honeydoc",
|
||||
"honeydoc_docx",
|
||||
"honeydoc_pdf",
|
||||
"mysql_dump",
|
||||
"fingerprint_html",
|
||||
"fingerprint_svg",
|
||||
)
|
||||
|
||||
KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
|
||||
"docx",
|
||||
"xlsx",
|
||||
"pdf",
|
||||
"html",
|
||||
"image",
|
||||
"plain",
|
||||
"passthrough",
|
||||
)
|
||||
|
||||
|
||||
def get_generator(name: str) -> CanaryGenerator:
|
||||
"""Return the generator registered under ``name``.
|
||||
|
||||
Raises :class:`ValueError` for unknown names so a typo in the API
|
||||
request surfaces as a 400 rather than silently producing nothing.
|
||||
"""
|
||||
if name == "git_config":
|
||||
from decnet.canary.generators.git_config import GitConfigGenerator
|
||||
return GitConfigGenerator()
|
||||
if name == "env_file":
|
||||
from decnet.canary.generators.env_file import EnvFileGenerator
|
||||
return EnvFileGenerator()
|
||||
if name == "ssh_key":
|
||||
from decnet.canary.generators.ssh_key import SSHKeyGenerator
|
||||
return SSHKeyGenerator()
|
||||
if name == "aws_creds":
|
||||
from decnet.canary.generators.aws_creds import AWSCredsGenerator
|
||||
return AWSCredsGenerator()
|
||||
if name == "honeydoc":
|
||||
from decnet.canary.generators.honeydoc import HoneydocGenerator
|
||||
return HoneydocGenerator()
|
||||
if name == "honeydoc_docx":
|
||||
from decnet.canary.generators.honeydoc_docx import HoneydocDocxGenerator
|
||||
return HoneydocDocxGenerator()
|
||||
if name == "honeydoc_pdf":
|
||||
from decnet.canary.generators.honeydoc_pdf import HoneydocPdfGenerator
|
||||
return HoneydocPdfGenerator()
|
||||
if name == "mysql_dump":
|
||||
from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
|
||||
return MySQLDumpGenerator()
|
||||
if name == "fingerprint_html":
|
||||
from decnet.canary.generators.fingerprint_html import (
|
||||
FingerprintHtmlGenerator,
|
||||
)
|
||||
return FingerprintHtmlGenerator()
|
||||
if name == "fingerprint_svg":
|
||||
from decnet.canary.generators.fingerprint_svg import (
|
||||
FingerprintSvgGenerator,
|
||||
)
|
||||
return FingerprintSvgGenerator()
|
||||
raise ValueError(
|
||||
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
|
||||
)
|
||||
|
||||
|
||||
def get_instrumenter(name: str) -> CanaryInstrumenter:
|
||||
"""Return the instrumenter registered under ``name``."""
|
||||
if name == "docx":
|
||||
from decnet.canary.instrumenters.docx import DocxInstrumenter
|
||||
return DocxInstrumenter()
|
||||
if name == "xlsx":
|
||||
from decnet.canary.instrumenters.xlsx import XlsxInstrumenter
|
||||
return XlsxInstrumenter()
|
||||
if name == "pdf":
|
||||
from decnet.canary.instrumenters.pdf import PdfInstrumenter
|
||||
return PdfInstrumenter()
|
||||
if name == "html":
|
||||
from decnet.canary.instrumenters.html import HtmlInstrumenter
|
||||
return HtmlInstrumenter()
|
||||
if name == "image":
|
||||
from decnet.canary.instrumenters.image import ImageInstrumenter
|
||||
return ImageInstrumenter()
|
||||
if name == "plain":
|
||||
from decnet.canary.instrumenters.plain import PlainInstrumenter
|
||||
return PlainInstrumenter()
|
||||
if name == "passthrough":
|
||||
from decnet.canary.instrumenters.passthrough import PassthroughInstrumenter
|
||||
return PassthroughInstrumenter()
|
||||
raise ValueError(
|
||||
f"Unknown canary instrumenter: {name!r}. Known: {KNOWN_INSTRUMENTERS}"
|
||||
)
|
||||
|
||||
|
||||
# MIME → instrumenter dispatch. Order matters: we walk the table
|
||||
# top-to-bottom and the first prefix match wins, so put the more
|
||||
# specific (DOCX/XLSX) before the generic (zip/octet-stream).
|
||||
_MIME_DISPATCH: tuple[tuple[str, str], ...] = (
|
||||
# Office Open XML — DOCX/XLSX share a zip structure but expose
|
||||
# different inner trees, so dispatch by MIME alias rather than
|
||||
# zip-poking.
|
||||
("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx"),
|
||||
("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx"),
|
||||
("application/pdf", "pdf"),
|
||||
("text/html", "html"),
|
||||
("application/xhtml+xml", "html"),
|
||||
("image/png", "image"),
|
||||
("image/jpeg", "image"),
|
||||
("image/gif", "image"),
|
||||
# Plaintext catch-alls — config files, .env, .ini, .yaml, .json,
|
||||
# source code. All handled by the same regex-substitution pass.
|
||||
("text/", "plain"),
|
||||
("application/json", "plain"),
|
||||
("application/x-yaml", "plain"),
|
||||
("application/yaml", "plain"),
|
||||
("application/toml", "plain"),
|
||||
)
|
||||
|
||||
|
||||
def pick_instrumenter_for_mime(content_type: str) -> str:
|
||||
"""Return the instrumenter name registered for a sniffed MIME.
|
||||
|
||||
Falls back to ``"passthrough"`` for anything we don't have an
|
||||
embedder for (binary blobs we can't mutate safely — random
|
||||
container images, archives, executables). ``passthrough`` only
|
||||
supports DNS-callback tokens (the slug ends up in the filename or
|
||||
an accompanying README), so the API surfaces that constraint to
|
||||
the operator before they pick a kind.
|
||||
"""
|
||||
if not content_type:
|
||||
return "passthrough"
|
||||
lowered = content_type.lower()
|
||||
for prefix, name in _MIME_DISPATCH:
|
||||
if lowered.startswith(prefix):
|
||||
return name
|
||||
return "passthrough"
|
||||
@@ -1,292 +0,0 @@
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
// Canary fingerprint payload — the JS that runs inside an opened HTML/SVG
|
||||
// canary, harvests browser primitives, and beacons the result back to the
|
||||
// canary worker. Ported from canary-self-test.html with the rendering UI
|
||||
// stripped out.
|
||||
//
|
||||
// Three placeholders are substituted by the Python builder BEFORE
|
||||
// javascript-obfuscator runs:
|
||||
//
|
||||
// {{BEACON_URL}} → full URL to /c/<callback_token> (no trailing slash)
|
||||
// {{MINT_UUID}} → per-mint UUID, baked into the string-array post-obf
|
||||
// {{MINT_NONCE}} → 16-hex HMAC nonce; the worker rejects ?d=/?o= without it
|
||||
//
|
||||
// Beacon strategy (MVP): a bare GET pixel for "I was opened" reliability,
|
||||
// then a fingerprint payload sent as a base64-URL query param on a second
|
||||
// GET so the existing worker records the hit even before step-4 POST
|
||||
// support lands. Both fail-open: any error short-circuits to next step.
|
||||
|
||||
(async function () {
|
||||
var BEACON_URL = "{{BEACON_URL}}";
|
||||
var MINT_UUID = "{{MINT_UUID}}";
|
||||
var MINT_NONCE = "{{MINT_NONCE}}";
|
||||
var fp = { mint: MINT_UUID };
|
||||
|
||||
function fire(url) {
|
||||
try {
|
||||
var img = new Image();
|
||||
img.src = url;
|
||||
} catch (e) { /* swallow */ }
|
||||
}
|
||||
|
||||
// 1) bare-open beacon — fires regardless of whether the rest succeeds
|
||||
fire(BEACON_URL + "?o=1&k=" + MINT_NONCE);
|
||||
|
||||
function sha256(str) {
|
||||
var buf = new TextEncoder().encode(str);
|
||||
return crypto.subtle.digest("SHA-256", buf).then(function (h) {
|
||||
return Array.from(new Uint8Array(h))
|
||||
.map(function (b) { return b.toString(16).padStart(2, "0"); })
|
||||
.join("");
|
||||
});
|
||||
}
|
||||
|
||||
// navigator
|
||||
try {
|
||||
fp.nav = {
|
||||
ua: navigator.userAgent,
|
||||
pl: navigator.platform,
|
||||
lg: navigator.language,
|
||||
lgs: (navigator.languages || []).join(","),
|
||||
ck: navigator.cookieEnabled,
|
||||
dnt: navigator.doNotTrack,
|
||||
hc: navigator.hardwareConcurrency,
|
||||
dm: navigator.deviceMemory || null,
|
||||
tp: navigator.maxTouchPoints,
|
||||
wd: navigator.webdriver === true,
|
||||
pdf: navigator.pdfViewerEnabled || null,
|
||||
};
|
||||
} catch (e) { fp.nav = { err: String(e) }; }
|
||||
|
||||
// screen
|
||||
try {
|
||||
fp.scr = {
|
||||
w: screen.width, h: screen.height,
|
||||
aw: screen.availWidth, ah: screen.availHeight,
|
||||
cd: screen.colorDepth, pd: screen.pixelDepth,
|
||||
dpr: window.devicePixelRatio,
|
||||
iw: window.innerWidth, ih: window.innerHeight,
|
||||
or: (screen.orientation && screen.orientation.type) || null,
|
||||
};
|
||||
} catch (e) { fp.scr = { err: String(e) }; }
|
||||
|
||||
// tz / locale
|
||||
try {
|
||||
var dtf = Intl.DateTimeFormat().resolvedOptions();
|
||||
fp.tz = {
|
||||
z: dtf.timeZone, lc: dtf.locale,
|
||||
ca: dtf.calendar, ns: dtf.numberingSystem,
|
||||
off: new Date().getTimezoneOffset(),
|
||||
};
|
||||
} catch (e) { fp.tz = { err: String(e) }; }
|
||||
|
||||
// connection
|
||||
try {
|
||||
var c = navigator.connection;
|
||||
fp.cn = c ? {
|
||||
t: c.effectiveType, dl: c.downlink, rtt: c.rtt, sd: c.saveData,
|
||||
} : null;
|
||||
} catch (e) { fp.cn = { err: String(e) }; }
|
||||
|
||||
// canvas
|
||||
try {
|
||||
var cv = document.createElement("canvas");
|
||||
cv.width = 280; cv.height = 60;
|
||||
var ctx = cv.getContext("2d");
|
||||
ctx.textBaseline = "top";
|
||||
ctx.font = "14px Arial";
|
||||
ctx.fillStyle = "#f60";
|
||||
ctx.fillRect(125, 1, 62, 20);
|
||||
ctx.fillStyle = "#069";
|
||||
ctx.fillText("c-" + String.fromCharCode(0x1f600), 2, 15);
|
||||
ctx.fillStyle = "rgba(102,204,0,0.7)";
|
||||
ctx.fillText("c-" + String.fromCharCode(0x1f600), 4, 17);
|
||||
var dataURL = cv.toDataURL();
|
||||
fp.cv = { h: await sha256(dataURL), n: dataURL.length };
|
||||
} catch (e) { fp.cv = { err: String(e) }; }
|
||||
|
||||
// webgl
|
||||
try {
|
||||
var gc = document.createElement("canvas");
|
||||
var gl = gc.getContext("webgl") || gc.getContext("experimental-webgl");
|
||||
if (gl) {
|
||||
var ext = gl.getExtension("WEBGL_debug_renderer_info");
|
||||
fp.gl = {
|
||||
v: gl.getParameter(gl.VENDOR),
|
||||
r: gl.getParameter(gl.RENDERER),
|
||||
ver: gl.getParameter(gl.VERSION),
|
||||
sl: gl.getParameter(gl.SHADING_LANGUAGE_VERSION),
|
||||
uv: ext ? gl.getParameter(ext.UNMASKED_VENDOR_WEBGL) : null,
|
||||
ur: ext ? gl.getParameter(ext.UNMASKED_RENDERER_WEBGL) : null,
|
||||
};
|
||||
} else { fp.gl = { err: "unavailable" }; }
|
||||
} catch (e) { fp.gl = { err: String(e) }; }
|
||||
|
||||
// audio
|
||||
try {
|
||||
var ACtx = window.OfflineAudioContext || window.webkitOfflineAudioContext;
|
||||
if (ACtx) {
|
||||
var actx = new ACtx(1, 44100, 44100);
|
||||
var osc = actx.createOscillator();
|
||||
var cmp = actx.createDynamicsCompressor();
|
||||
osc.type = "triangle"; osc.frequency.value = 10000;
|
||||
cmp.threshold.value = -50; cmp.knee.value = 40;
|
||||
cmp.ratio.value = 12; cmp.attack.value = 0; cmp.release.value = 0.25;
|
||||
osc.connect(cmp); cmp.connect(actx.destination);
|
||||
osc.start(0);
|
||||
var buf = await actx.startRendering();
|
||||
var data = buf.getChannelData(0).slice(4500, 5000);
|
||||
var sum = 0;
|
||||
for (var i = 0; i < data.length; i++) sum += Math.abs(data[i]);
|
||||
fp.au = { h: await sha256(sum.toString()), s: sum.toFixed(8) };
|
||||
} else { fp.au = { err: "unavailable" }; }
|
||||
} catch (e) { fp.au = { err: String(e) }; }
|
||||
|
||||
// fonts
|
||||
try {
|
||||
var bases = ["monospace", "sans-serif", "serif"];
|
||||
var tests = [
|
||||
"Arial", "Helvetica", "Times New Roman", "Courier New", "Verdana",
|
||||
"Georgia", "Trebuchet MS", "Comic Sans MS", "Impact",
|
||||
"Calibri", "Cambria", "Consolas", "Segoe UI", "Tahoma",
|
||||
"JetBrains Mono", "Fira Code", "Cascadia Code", "SF Mono",
|
||||
"Menlo", "Monaco", "Source Code Pro", "Inconsolata", "Hack",
|
||||
"San Francisco", "Helvetica Neue", "Lucida Grande",
|
||||
"DejaVu Sans", "DejaVu Sans Mono", "Liberation Sans",
|
||||
"Liberation Mono", "Ubuntu", "Ubuntu Mono", "Roboto",
|
||||
"Noto Sans", "Noto Mono",
|
||||
"Microsoft YaHei", "SimSun", "PingFang SC", "Hiragino Sans",
|
||||
"Hiragino Kaku Gothic Pro", "Yu Gothic", "Meiryo",
|
||||
"Malgun Gothic", "Noto Sans CJK",
|
||||
"Adobe Garamond Pro", "Myriad Pro", "Minion Pro",
|
||||
"Bahnschrift", "Cyberpunk",
|
||||
];
|
||||
var sp = document.createElement("span");
|
||||
sp.style.fontSize = "72px";
|
||||
sp.style.position = "absolute";
|
||||
sp.style.left = "-9999px";
|
||||
sp.innerHTML = "mmmmmmmmmmlli";
|
||||
document.body.appendChild(sp);
|
||||
var bs = {};
|
||||
for (var bi = 0; bi < bases.length; bi++) {
|
||||
sp.style.fontFamily = bases[bi];
|
||||
bs[bases[bi]] = { w: sp.offsetWidth, h: sp.offsetHeight };
|
||||
}
|
||||
var det = [];
|
||||
for (var ti = 0; ti < tests.length; ti++) {
|
||||
for (var bj = 0; bj < bases.length; bj++) {
|
||||
sp.style.fontFamily = "'" + tests[ti] + "'," + bases[bj];
|
||||
if (sp.offsetWidth !== bs[bases[bj]].w ||
|
||||
sp.offsetHeight !== bs[bases[bj]].h) {
|
||||
det.push(tests[ti]); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
document.body.removeChild(sp);
|
||||
fp.ft = {
|
||||
h: await sha256(det.slice().sort().join(",")),
|
||||
n: det.length, t: tests.length, d: det,
|
||||
};
|
||||
} catch (e) { fp.ft = { err: String(e) }; }
|
||||
|
||||
// webrtc local ip leak
|
||||
try {
|
||||
var ips = {}; var cands = [];
|
||||
var RPC = window.RTCPeerConnection || window.webkitRTCPeerConnection ||
|
||||
window.mozRTCPeerConnection;
|
||||
if (RPC) {
|
||||
var pc = new RPC({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
|
||||
pc.createDataChannel("");
|
||||
pc.onicecandidate = function (e) {
|
||||
if (!e.candidate) return;
|
||||
cands.push(e.candidate.candidate);
|
||||
var m = e.candidate.candidate.match(
|
||||
/(\d+\.\d+\.\d+\.\d+|[a-f0-9:]+::[a-f0-9:]+)/);
|
||||
if (m) ips[m[1]] = 1;
|
||||
};
|
||||
var off = await pc.createOffer();
|
||||
await pc.setLocalDescription(off);
|
||||
await new Promise(function (r) { setTimeout(r, 1500); });
|
||||
pc.close();
|
||||
fp.rtc = { ip: Object.keys(ips), n: cands.length, c: cands.slice(0, 3) };
|
||||
} else { fp.rtc = { err: "unavailable" }; }
|
||||
} catch (e) { fp.rtc = { err: String(e) }; }
|
||||
|
||||
// battery
|
||||
try {
|
||||
if (navigator.getBattery) {
|
||||
var bat = await navigator.getBattery();
|
||||
fp.bt = {
|
||||
c: bat.charging, l: bat.level,
|
||||
ct: bat.chargingTime === Infinity ? "inf" : bat.chargingTime,
|
||||
dt: bat.dischargingTime === Infinity ? "inf" : bat.dischargingTime,
|
||||
};
|
||||
} else { fp.bt = { err: "unavailable" }; }
|
||||
} catch (e) { fp.bt = { err: String(e) }; }
|
||||
|
||||
// perf timing jitter
|
||||
try {
|
||||
var samples = [];
|
||||
for (var pi = 0; pi < 1000; pi++) {
|
||||
var pa = performance.now();
|
||||
var x = 0;
|
||||
for (var pj = 0; pj < 1000; pj++) x += Math.sqrt(pj);
|
||||
samples.push(performance.now() - pa);
|
||||
}
|
||||
samples.sort(function (a, b) { return a - b; });
|
||||
fp.pf = {
|
||||
med: samples[500].toFixed(4),
|
||||
p95: samples[950].toFixed(4),
|
||||
mn: samples[0].toFixed(4),
|
||||
mx: samples[999].toFixed(4),
|
||||
};
|
||||
} catch (e) { fp.pf = { err: String(e) }; }
|
||||
|
||||
// permissions
|
||||
try {
|
||||
if (navigator.permissions) {
|
||||
var names = ["geolocation", "notifications", "camera", "microphone",
|
||||
"persistent-storage", "clipboard-read", "clipboard-write"];
|
||||
var st = {};
|
||||
for (var ni = 0; ni < names.length; ni++) {
|
||||
try {
|
||||
var r = await navigator.permissions.query({ name: names[ni] });
|
||||
st[names[ni]] = r.state;
|
||||
} catch (e) { st[names[ni]] = "unsupported"; }
|
||||
}
|
||||
fp.pm = st;
|
||||
} else { fp.pm = { err: "unavailable" }; }
|
||||
} catch (e) { fp.pm = { err: String(e) }; }
|
||||
|
||||
// composite identity hash — stable inputs only
|
||||
try {
|
||||
var stable = [
|
||||
fp.cv && fp.cv.h, fp.au && fp.au.h, fp.ft && fp.ft.h,
|
||||
fp.gl && fp.gl.ur, fp.nav && fp.nav.pl,
|
||||
fp.nav && fp.nav.hc, fp.tz && fp.tz.z,
|
||||
fp.scr && (fp.scr.w + "x" + fp.scr.h),
|
||||
].filter(Boolean).join("|");
|
||||
fp.id = await sha256(stable);
|
||||
} catch (e) { fp.id = { err: String(e) }; }
|
||||
|
||||
// 2) ship the payload as base64url JSON on a GET query param.
|
||||
// The current worker records the hit on /c/<slug>; step-4 worker
|
||||
// will decode ?d= and persist the fingerprint blob.
|
||||
try {
|
||||
var json = JSON.stringify(fp);
|
||||
var b64 = btoa(unescape(encodeURIComponent(json)))
|
||||
.replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
|
||||
// chunk if URL would exceed safe limit (~6KB)
|
||||
var MAX = 6000;
|
||||
if (b64.length <= MAX) {
|
||||
fire(BEACON_URL + "?d=" + b64 + "&k=" + MINT_NONCE);
|
||||
} else {
|
||||
var sid = (Math.random() * 1e9 | 0).toString(36);
|
||||
var total = Math.ceil(b64.length / MAX);
|
||||
for (var ci = 0; ci < total; ci++) {
|
||||
var part = b64.substr(ci * MAX, MAX);
|
||||
fire(BEACON_URL + "?s=" + sid + "&i=" + ci + "&n=" + total + "&d=" + part + "&k=" + MINT_NONCE);
|
||||
}
|
||||
}
|
||||
} catch (e) { /* swallow */ }
|
||||
})();
|
||||
@@ -1,8 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Built-in canary generators (synthesised fake artifacts).
|
||||
|
||||
Concrete classes live in sibling modules and are imported lazily by
|
||||
:func:`decnet.canary.factory.get_generator` to keep the import-time
|
||||
cost of :mod:`decnet.canary` cheap for callers that only need the
|
||||
ABCs.
|
||||
"""
|
||||
@@ -1,87 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fake ``~/.aws/credentials`` block (passive bait).
|
||||
|
||||
This is the **passive** variant — no callback wiring. An attacker
|
||||
who exfils these keys can't trip a detection unless we run a real
|
||||
AWS account with a deny-all CloudTrail listener (post-v1). The
|
||||
realism is the point: the file looks like a routinely used credentials
|
||||
file, so the rest of the decky's persona feels lived-in.
|
||||
|
||||
If the operator picks ``kind="aws_passive"`` we accept that no slug
|
||||
will be embedded. If they pick ``kind="http"`` or ``kind="dns"`` for
|
||||
this generator, the API will reject the combination with a 400 — AWS
|
||||
keys have no plausible field where a URL or hostname survives a
|
||||
``grep -E '[A-Z0-9]{20}'`` smell test.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from secrets import token_urlsafe
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
# Stable AWS-style key body derived from the slug. Keeping the
|
||||
# generator deterministic (per-slug) means re-seeding produces the
|
||||
# same bytes — the planter is naturally idempotent and an operator
|
||||
# who runs ``decnet canary verify`` can re-derive the expected file
|
||||
# without touching the DB.
|
||||
|
||||
def _fake_access_key(seed: str) -> str:
|
||||
# AWS access keys are 20 chars, uppercase alphanum, AKIA prefix.
|
||||
body = hashlib.sha256(seed.encode()).hexdigest().upper()
|
||||
return "AKIA" + body[:16]
|
||||
|
||||
|
||||
def _fake_secret_key(seed: str) -> str:
|
||||
# AWS secret keys are 40 chars, mixed-case base64-ish. We use
|
||||
# base64-safe characters from token_urlsafe seeded by a SHA-256
|
||||
# of the seed so the output is stable per slug.
|
||||
h = hashlib.sha256(("secret:" + seed).encode()).digest()
|
||||
# Reuse token_urlsafe for the alphabet but pad to 40 chars from
|
||||
# the deterministic bytes so we don't depend on os.urandom.
|
||||
import base64
|
||||
return base64.b64encode(h)[:40].decode()
|
||||
|
||||
|
||||
class AWSCredsGenerator(CanaryGenerator):
|
||||
name = "aws_creds"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
seed = ctx.callback_token
|
||||
access = _fake_access_key(seed)
|
||||
secret = _fake_secret_key(seed)
|
||||
body = (
|
||||
"[default]\n"
|
||||
f"aws_access_key_id = {access}\n"
|
||||
f"aws_secret_access_key = {secret}\n"
|
||||
"region = us-east-1\n"
|
||||
"\n"
|
||||
"[prod]\n"
|
||||
f"aws_access_key_id = {_fake_access_key('prod-' + seed)}\n"
|
||||
f"aws_secret_access_key = {_fake_secret_key('prod-' + seed)}\n"
|
||||
"region = us-west-2\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="", # caller (planter) fills this from CanaryToken.placement_path
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 14, # 2 weeks ago — looks lived-in
|
||||
generator=self.name,
|
||||
notes=[
|
||||
"fake AWS keys; no callback embedded — passive bait only",
|
||||
f"derived deterministically from slug={seed}",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
# Re-exported so the slug helper is reusable from the
|
||||
# instrumenters/passthrough module without an internal import path.
|
||||
__all__ = ["AWSCredsGenerator", "_fake_access_key", "_fake_secret_key"]
|
||||
|
||||
|
||||
# Imports at the bottom keep the public dataclasses on top — pylint
|
||||
# doesn't run on this repo, but tests do, and putting ``token_urlsafe``
|
||||
# in a public symbol confuses readers. Suppress the unused warning by
|
||||
# referencing it once.
|
||||
_ = token_urlsafe
|
||||
@@ -1,57 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fake ``.env`` with embedded callback URLs.
|
||||
|
||||
Modern web stacks read environment variables for everything from
|
||||
database DSNs to webhook URLs, so dropping a few realistic-looking
|
||||
``KEY=value`` pairs alongside the canary URL is unremarkable. The
|
||||
slug appears in two fields:
|
||||
|
||||
* ``API_BASE_URL`` — the obvious one; an attacker scripting against
|
||||
the credentials hits the worker on first invocation.
|
||||
* ``WEBHOOK_NOTIFY_URL`` — secondary, in case the attacker greps for
|
||||
``WEBHOOK`` and pivots there.
|
||||
|
||||
Other fields (``DB_PASSWORD``, ``REDIS_URL``, ``JWT_SECRET``) are
|
||||
plausible but inert — they're realism filler, not detection
|
||||
mechanisms.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
def _stable_token(seed: str, prefix: str = "") -> str:
|
||||
h = hashlib.sha256((prefix + seed).encode()).hexdigest()
|
||||
return h[:32]
|
||||
|
||||
|
||||
class EnvFileGenerator(CanaryGenerator):
|
||||
name = "env_file"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
base = ctx.http_base.rstrip("/")
|
||||
slug = ctx.callback_token
|
||||
api_url = f"{base}/c/{slug}"
|
||||
body = (
|
||||
"# Production environment — DO NOT COMMIT\n"
|
||||
f"API_BASE_URL={api_url}\n"
|
||||
f"WEBHOOK_NOTIFY_URL={api_url}/webhook\n"
|
||||
f"DB_PASSWORD={_stable_token(slug, 'db:')}\n"
|
||||
f"REDIS_URL=redis://:{_stable_token(slug, 'redis:')[:16]}@redis.internal:6379/0\n"
|
||||
f"JWT_SECRET={_stable_token(slug, 'jwt:')}\n"
|
||||
"LOG_LEVEL=info\n"
|
||||
"ENVIRONMENT=production\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 7, # last edited a week ago
|
||||
generator=self.name,
|
||||
notes=[
|
||||
f"API_BASE_URL embeds {api_url}",
|
||||
f"WEBHOOK_NOTIFY_URL embeds {api_url}/webhook",
|
||||
],
|
||||
)
|
||||
@@ -1,141 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""HTML fingerprint canary — plausible-looking page with an obfuscated
|
||||
browser-fingerprinting payload inlined at the bottom of ``<body>``.
|
||||
|
||||
The visible content is a deliberately mundane "internal directory"
|
||||
table — the kind of file a curious attacker pulls off a decky's
|
||||
filesystem and opens locally to triage. When the file is opened in
|
||||
*any* network-connected browser the obfuscated payload runs and beacons
|
||||
to ``/c/<callback_token>``: first a bare-open pixel, then a chunked
|
||||
fingerprint dump (canvas, audio, fonts, WebGL, WebRTC local IPs,
|
||||
timing jitter, permissions, composite identity hash).
|
||||
|
||||
Determinism: the mint UUID is derived from the callback token via
|
||||
:func:`uuid.uuid5` so the same ``ctx`` always produces byte-identical
|
||||
output, satisfying the generator contract in :mod:`decnet.canary.base`.
|
||||
The obfuscator's seed and polymorphic config bits are likewise
|
||||
callback-token-derived (see :mod:`decnet.canary.obfuscator`).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
|
||||
|
||||
_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
|
||||
|
||||
|
||||
def _mint_uuid_for(callback_token: str) -> str:
|
||||
return str(uuid.uuid5(_MINT_NAMESPACE, callback_token))
|
||||
|
||||
|
||||
def _stable_int(callback_token: str, salt: str = "") -> int:
|
||||
"""Deterministic non-negative int derived from the callback token.
|
||||
|
||||
``builtins.hash`` is salted per-process — useless for a generator
|
||||
that must be byte-identical across runs. SHA-256 prefix is
|
||||
overkill but free.
|
||||
"""
|
||||
h = hashlib.sha256((callback_token + "|" + salt).encode("utf-8")).digest()
|
||||
return int.from_bytes(h[:4], "big")
|
||||
|
||||
|
||||
_PAGE_TEMPLATE = """<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Internal Asset Directory</title>
|
||||
<style>
|
||||
body{{font-family:Segoe UI,Arial,sans-serif;background:#fafafa;color:#222;
|
||||
margin:24px;font-size:13px}}
|
||||
h1{{font-size:18px;margin:0 0 4px 0}}
|
||||
.sub{{color:#777;font-size:11px;margin-bottom:18px}}
|
||||
table{{border-collapse:collapse;width:100%;background:#fff;
|
||||
box-shadow:0 1px 2px rgba(0,0,0,.05)}}
|
||||
th,td{{padding:6px 10px;border-bottom:1px solid #eee;text-align:left}}
|
||||
th{{background:#f4f4f4;font-weight:600;font-size:11px;
|
||||
text-transform:uppercase;letter-spacing:.5px;color:#555}}
|
||||
tr:hover td{{background:#fafbff}}
|
||||
.foot{{margin-top:16px;color:#999;font-size:11px}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Internal Asset Directory</h1>
|
||||
<div class="sub">last sync: {sync_label} · {row_count} entries · CONFIDENTIAL</div>
|
||||
<table>
|
||||
<tr><th>Hostname</th><th>Owner</th><th>Role</th><th>VLAN</th><th>Notes</th></tr>
|
||||
{rows}
|
||||
</table>
|
||||
<div class="foot">page generated by directory-sync v2.4.1 — do not redistribute</div>
|
||||
<script>{payload}</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
_ROW_POOL = (
|
||||
("ny-app-01.corp.local", "k.tanaka", "app server", "vlan20", "primary"),
|
||||
("ny-db-01.corp.local", "ops", "postgres primary", "vlan30", "backup nightly"),
|
||||
("ny-build-02.corp.local", "ci-bot", "jenkins agent", "vlan40", ""),
|
||||
("sf-vpn-01.corp.local", "netsec", "wireguard endpoint", "vlan10", "external"),
|
||||
("ldn-mail-03.corp.local", "j.weber", "exchange edge", "vlan50", ""),
|
||||
("hk-cache-01.corp.local", "ops", "redis replica", "vlan30", "lag <1s"),
|
||||
("br-dev-04.corp.local", "m.silva", "dev sandbox", "vlan60", "ephemeral"),
|
||||
("eu-bastion-02.corp.local", "secops", "ssh jump host", "vlan10", "mfa required"),
|
||||
("us-archive-01.corp.local", "compliance", "log archive", "vlan70", "retain 7y"),
|
||||
)
|
||||
|
||||
|
||||
def _build_rows(callback_token: str) -> tuple[str, int]:
|
||||
pick = _stable_int(callback_token, "pick") % len(_ROW_POOL)
|
||||
take = 5 + (_stable_int(callback_token, "take") % 4)
|
||||
selected = [_ROW_POOL[(pick + i) % len(_ROW_POOL)] for i in range(take)]
|
||||
cells = "\n".join(
|
||||
"<tr>" + "".join(f"<td>{c}</td>" for c in row) + "</tr>"
|
||||
for row in selected
|
||||
)
|
||||
return cells, len(selected)
|
||||
|
||||
|
||||
def _sync_label(callback_token: str) -> str:
|
||||
day = _stable_int(callback_token, "day") % 28 + 1
|
||||
hour = _stable_int(callback_token, "hour") % 24
|
||||
return f"2026-04-{day:02d} {hour:02d}:14 UTC"
|
||||
|
||||
|
||||
class FingerprintHtmlGenerator(CanaryGenerator):
|
||||
"""Synthesise an HTML page that fingerprints the browser opening it."""
|
||||
|
||||
name = "fingerprint_html"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
mint_uuid = _mint_uuid_for(ctx.callback_token)
|
||||
nonce = nonce_for(ctx.callback_token, mint_uuid)
|
||||
payload = render_fingerprint_js(
|
||||
callback_token=ctx.callback_token,
|
||||
http_base=ctx.http_base,
|
||||
mint_uuid=mint_uuid,
|
||||
nonce=nonce,
|
||||
)
|
||||
rows, row_count = _build_rows(ctx.callback_token)
|
||||
body = _PAGE_TEMPLATE.format(
|
||||
sync_label=_sync_label(ctx.callback_token),
|
||||
row_count=row_count,
|
||||
rows=rows,
|
||||
payload=payload,
|
||||
)
|
||||
beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 14,
|
||||
generator=self.name,
|
||||
fingerprint_nonce=nonce,
|
||||
notes=[
|
||||
f"obfuscated fingerprinter beacons={beacon}",
|
||||
f"mint_uuid={mint_uuid}",
|
||||
],
|
||||
)
|
||||
@@ -1,89 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""SVG fingerprint canary — standalone SVG with an embedded ``<script>``
|
||||
that runs the obfuscated fingerprinter when the file is opened directly
|
||||
in a browser.
|
||||
|
||||
SVG ``<script>`` only fires when the SVG is loaded as a top-level
|
||||
document (or via ``<object>``/``<iframe>``); it's *blocked* when the
|
||||
SVG is referenced from another page's ``<img>``. That's the right
|
||||
posture for canary use: an attacker browsing the decky filesystem and
|
||||
double-clicking a stray ``network_diagram.svg`` triggers it; rendering
|
||||
inside a sandboxed CMS preview does not.
|
||||
|
||||
Same determinism guarantees as :mod:`fingerprint_html`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
from decnet.canary.generators.fingerprint_html import _mint_uuid_for, _stable_int
|
||||
from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
|
||||
|
||||
|
||||
_DIAGRAM_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 600 360" width="600" height="360">
|
||||
<style>
|
||||
.box{{fill:#f7f9fb;stroke:#7a93ad;stroke-width:1.2}}
|
||||
.lbl{{font:12px Segoe UI,Arial,sans-serif;fill:#2a3a4a}}
|
||||
.edge{{stroke:#7a93ad;stroke-width:1.2;fill:none}}
|
||||
.title{{font:bold 14px Segoe UI,Arial,sans-serif;fill:#1a2a3a}}
|
||||
.cap{{font:10px Segoe UI,Arial,sans-serif;fill:#6a7a8a}}
|
||||
</style>
|
||||
<text class="title" x="20" y="28">Network Topology — {region} segment</text>
|
||||
<text class="cap" x="20" y="44">draft v{ver} · last reviewed {review}</text>
|
||||
<rect class="box" x="40" y="80" width="120" height="50" rx="4"/>
|
||||
<text class="lbl" x="100" y="110" text-anchor="middle">edge gw</text>
|
||||
<rect class="box" x="240" y="80" width="120" height="50" rx="4"/>
|
||||
<text class="lbl" x="300" y="110" text-anchor="middle">core sw</text>
|
||||
<rect class="box" x="440" y="80" width="120" height="50" rx="4"/>
|
||||
<text class="lbl" x="500" y="110" text-anchor="middle">app cluster</text>
|
||||
<rect class="box" x="240" y="220" width="120" height="50" rx="4"/>
|
||||
<text class="lbl" x="300" y="250" text-anchor="middle">db tier</text>
|
||||
<path class="edge" d="M160 105 L240 105"/>
|
||||
<path class="edge" d="M360 105 L440 105"/>
|
||||
<path class="edge" d="M300 130 L300 220"/>
|
||||
<script type="application/ecmascript"><![CDATA[
|
||||
{payload}
|
||||
]]></script>
|
||||
</svg>
|
||||
"""
|
||||
|
||||
|
||||
_REGIONS = ("us-east", "eu-central", "ap-south", "us-west", "sa-east")
|
||||
|
||||
|
||||
class FingerprintSvgGenerator(CanaryGenerator):
|
||||
"""Synthesise an SVG that fingerprints the browser opening it."""
|
||||
|
||||
name = "fingerprint_svg"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
mint_uuid = _mint_uuid_for(ctx.callback_token)
|
||||
nonce = nonce_for(ctx.callback_token, mint_uuid)
|
||||
payload = render_fingerprint_js(
|
||||
callback_token=ctx.callback_token,
|
||||
http_base=ctx.http_base,
|
||||
mint_uuid=mint_uuid,
|
||||
nonce=nonce,
|
||||
)
|
||||
region = _REGIONS[_stable_int(ctx.callback_token, "reg") % len(_REGIONS)]
|
||||
ver = 1 + (_stable_int(ctx.callback_token, "ver") % 6)
|
||||
day = _stable_int(ctx.callback_token, "day") % 28 + 1
|
||||
body = _DIAGRAM_TEMPLATE.format(
|
||||
region=region,
|
||||
ver=ver,
|
||||
review=f"2026-03-{day:02d}",
|
||||
payload=payload,
|
||||
)
|
||||
beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 30,
|
||||
generator=self.name,
|
||||
fingerprint_nonce=nonce,
|
||||
notes=[
|
||||
f"obfuscated fingerprinter beacons={beacon}",
|
||||
f"mint_uuid={mint_uuid}",
|
||||
],
|
||||
)
|
||||
@@ -1,54 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fake ``.git/config`` with an attacker-bait remote URL.
|
||||
|
||||
The ``[remote "origin"]`` ``url`` field is the natural place to embed
|
||||
an HTTP-callback URL: it's normal for git remotes to be HTTPS, the
|
||||
URL is read by every git command an attacker runs (``git pull``,
|
||||
``git fetch``, ``git remote -v``), and the slug fits naturally as
|
||||
part of a path.
|
||||
|
||||
The generator emits a plausible private-mirror remote (``git.<org>``
|
||||
or the canary host's hostname) so an attacker doesn't immediately
|
||||
recognise it as a honeypot. The slug ends up in the URL path:
|
||||
|
||||
[remote "origin"]
|
||||
url = https://canary.example.test/c/<slug>/repo.git
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
class GitConfigGenerator(CanaryGenerator):
|
||||
name = "git_config"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
# Strip trailing slash defensively — operator may have
|
||||
# configured DECNET_CANARY_HTTP_BASE either way.
|
||||
base = ctx.http_base.rstrip("/")
|
||||
slug = ctx.callback_token
|
||||
# The /c/<slug>/repo.git suffix gives us a realistic-looking
|
||||
# path the worker can route on a single ``startswith("/c/")``
|
||||
# check, while still surviving a quick grep for the slug.
|
||||
url = f"{base}/c/{slug}/repo.git"
|
||||
body = (
|
||||
"[core]\n"
|
||||
"\trepositoryformatversion = 0\n"
|
||||
"\tfilemode = true\n"
|
||||
"\tbare = false\n"
|
||||
"\tlogallrefupdates = true\n"
|
||||
"[remote \"origin\"]\n"
|
||||
f"\turl = {url}\n"
|
||||
"\tfetch = +refs/heads/*:refs/remotes/origin/*\n"
|
||||
"[branch \"main\"]\n"
|
||||
"\tremote = origin\n"
|
||||
"\tmerge = refs/heads/main\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 30, # checked out a month ago
|
||||
generator=self.name,
|
||||
notes=[f"git remote 'origin' embeds {url}"],
|
||||
)
|
||||
@@ -1,62 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Built-in honeydoc — a minimal HTML "report" with a tracking pixel.
|
||||
|
||||
This is the *fallback* honeydoc used when the operator hasn't
|
||||
uploaded a real document. The HTML instrumenter handles operator
|
||||
uploads via :mod:`decnet.canary.instrumenters.html`; this generator
|
||||
exists so the deploy-time baseline can plant *something* convincing
|
||||
without first prompting the operator to drop a file.
|
||||
|
||||
The realism here is intentionally modest: a Documents-folder HTML
|
||||
page with internal-looking content and a 1×1 remote image at the
|
||||
bottom whose ``src`` is the canary callback URL. Most desktop
|
||||
HTML renderers fetch the image as soon as the file is opened in a
|
||||
browser preview, so opening the doc trips the callback.
|
||||
|
||||
Operators who want a richer artifact should upload their own DOCX
|
||||
or PDF; the corresponding instrumenter embeds the same callback in
|
||||
the appropriate format.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
class HoneydocGenerator(CanaryGenerator):
|
||||
name = "honeydoc"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
base = ctx.http_base.rstrip("/")
|
||||
slug = ctx.callback_token
|
||||
pixel_url = f"{base}/c/{slug}"
|
||||
body = (
|
||||
"<!DOCTYPE html>\n"
|
||||
"<html lang=\"en\">\n"
|
||||
"<head>\n"
|
||||
"<meta charset=\"utf-8\">\n"
|
||||
"<title>Q3 Operations Review — DRAFT</title>\n"
|
||||
"</head>\n"
|
||||
"<body>\n"
|
||||
"<h1>Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)</h1>\n"
|
||||
"<p>Forecast and remediation timeline below. Numbers are\n"
|
||||
"preliminary and subject to revision before the all-hands.</p>\n"
|
||||
"<table>\n"
|
||||
"<tr><th>Region</th><th>Incidents</th><th>MTTR (h)</th></tr>\n"
|
||||
"<tr><td>us-east</td><td>14</td><td>3.2</td></tr>\n"
|
||||
"<tr><td>us-west</td><td>9</td><td>4.7</td></tr>\n"
|
||||
"<tr><td>eu-central</td><td>22</td><td>2.1</td></tr>\n"
|
||||
"</table>\n"
|
||||
"<p>Internal contact: <a href=\"mailto:secops@internal\">"
|
||||
"secops@internal</a></p>\n"
|
||||
f"<img src=\"{pixel_url}\" width=\"1\" height=\"1\" alt=\"\">\n"
|
||||
"</body>\n"
|
||||
"</html>\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o644, # docs are typically world-readable
|
||||
mtime_offset=-86400 * 21, # 3 weeks ago
|
||||
generator=self.name,
|
||||
notes=[f"tracking pixel src={pixel_url}"],
|
||||
)
|
||||
@@ -1,134 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Real-DOCX honeydoc generator.
|
||||
|
||||
Synthesises a minimal but structurally valid DOCX from scratch via
|
||||
stdlib :mod:`zipfile`, then uses the same external-image relationship
|
||||
trick that powers :mod:`decnet.canary.instrumenters.docx` to embed
|
||||
the callback URL. No python-docx dependency.
|
||||
|
||||
The output opens cleanly in Word / LibreOffice; both fetch the
|
||||
external image relationship on document load.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import zipfile
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
from decnet.canary.instrumenters.docx import _drawing, _next_rid
|
||||
|
||||
|
||||
_CONTENT_TYPES = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
|
||||
'<Default Extension="xml" ContentType="application/xml"/>'
|
||||
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
|
||||
'<Override PartName="/word/document.xml" '
|
||||
'ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
|
||||
'</Types>'
|
||||
).encode()
|
||||
|
||||
_PACKAGE_RELS = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
'<Relationship Id="rId1" '
|
||||
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" '
|
||||
'Target="word/document.xml"/>'
|
||||
'</Relationships>'
|
||||
).encode()
|
||||
|
||||
_BODY_PARAGRAPHS = (
|
||||
"Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)",
|
||||
"",
|
||||
"Forecast and remediation timeline below. Numbers are preliminary "
|
||||
"and subject to revision before the all-hands.",
|
||||
"",
|
||||
"Region Incidents MTTR (h)",
|
||||
"us-east 14 3.2",
|
||||
"us-west 9 4.7",
|
||||
"eu-central 22 2.1",
|
||||
"",
|
||||
"Internal contact: secops@internal",
|
||||
)
|
||||
|
||||
|
||||
def _document_xml(rid_with_drawing: str | None = None) -> bytes:
|
||||
"""Build the body XML.
|
||||
|
||||
``rid_with_drawing`` is the rId of the external image relationship;
|
||||
when set, we append the same ``<w:drawing>`` element that the DOCX
|
||||
instrumenter inserts so the body references the external resource.
|
||||
"""
|
||||
paragraphs = []
|
||||
for line in _BODY_PARAGRAPHS:
|
||||
if line:
|
||||
paragraphs.append(
|
||||
"<w:p><w:r><w:t xml:space=\"preserve\">"
|
||||
+ _xml_escape(line)
|
||||
+ "</w:t></w:r></w:p>"
|
||||
)
|
||||
else:
|
||||
paragraphs.append("<w:p/>")
|
||||
body = "".join(paragraphs)
|
||||
drawing = _drawing(rid_with_drawing).decode() if rid_with_drawing else ""
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
|
||||
f'<w:body>{body}{drawing}</w:body>'
|
||||
'</w:document>'
|
||||
).encode()
|
||||
|
||||
|
||||
def _xml_escape(s: str) -> str:
|
||||
return (
|
||||
s.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
)
|
||||
|
||||
|
||||
def _document_rels(rid: str, url: str) -> bytes:
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
f'<Relationship Id="{rid}" '
|
||||
f'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" '
|
||||
f'Target="{url}" TargetMode="External"/>'
|
||||
'</Relationships>'
|
||||
).encode()
|
||||
|
||||
|
||||
class HoneydocDocxGenerator(CanaryGenerator):
|
||||
name = "honeydoc_docx"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
# Pick a stable rId — there's only one relationship in the
|
||||
# synthesised file, so any unused id works. Reuse the
|
||||
# instrumenter's allocator against the bare relationships
|
||||
# skeleton for parity with operator-uploaded DOCX flow.
|
||||
skeleton = (
|
||||
b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
b'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
b'</Relationships>'
|
||||
)
|
||||
rid = _next_rid(skeleton)
|
||||
|
||||
out = io.BytesIO()
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("[Content_Types].xml", _CONTENT_TYPES)
|
||||
zf.writestr("_rels/.rels", _PACKAGE_RELS)
|
||||
zf.writestr("word/document.xml", _document_xml(rid))
|
||||
zf.writestr("word/_rels/document.xml.rels", _document_rels(rid, url))
|
||||
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=out.getvalue(),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 21,
|
||||
generator=self.name,
|
||||
notes=[
|
||||
"synthesised DOCX with realistic Q3 review body",
|
||||
f"external-image relationship {rid} -> {url}",
|
||||
],
|
||||
)
|
||||
@@ -1,128 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Real-PDF honeydoc generator (uses :mod:`pikepdf`).
|
||||
|
||||
Builds a one-page PDF with the same Q3-review body as the HTML/DOCX
|
||||
flavors and installs an ``/OpenAction`` ``/URI`` action on the
|
||||
catalog so most viewers fire the callback the moment the document
|
||||
opens.
|
||||
|
||||
Pikepdf is now a hard dependency for this generator (the operator
|
||||
installed it explicitly so we can use it). We still surface a
|
||||
clear :class:`InstrumenterRejectedError` when imports fail, so a
|
||||
deployment without pikepdf can fall back to the DOCX or HTML
|
||||
generators rather than crashing the API.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryGenerator,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
|
||||
|
||||
_BODY_LINES = (
|
||||
("Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)", 14),
|
||||
("", 12),
|
||||
("Forecast and remediation timeline below.", 11),
|
||||
("Numbers are preliminary, subject to revision.", 11),
|
||||
("", 12),
|
||||
("Region Incidents MTTR (h)", 11),
|
||||
("us-east 14 3.2", 11),
|
||||
("us-west 9 4.7", 11),
|
||||
("eu-central 22 2.1", 11),
|
||||
("", 12),
|
||||
("Internal contact: secops@internal", 11),
|
||||
)
|
||||
|
||||
|
||||
class HoneydocPdfGenerator(CanaryGenerator):
|
||||
name = "honeydoc_pdf"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
try:
|
||||
from pikepdf import Pdf, Name, Dictionary, String
|
||||
except ImportError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
"honeydoc_pdf requires pikepdf; install it (`pip install "
|
||||
"pikepdf`) or pick honeydoc / honeydoc_docx instead."
|
||||
) from e
|
||||
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
|
||||
pdf = Pdf.new()
|
||||
# Helvetica is one of the 14 PDF base fonts — every viewer ships
|
||||
# it, so no font embedding is required.
|
||||
font = pdf.make_indirect(Dictionary(
|
||||
Type=Name("/Font"),
|
||||
Subtype=Name("/Type1"),
|
||||
BaseFont=Name("/Helvetica"),
|
||||
))
|
||||
|
||||
# Build a single content stream that writes each body line at a
|
||||
# decreasing y-coordinate. PDF coordinates start at the bottom-
|
||||
# left (US Letter = 612 x 792 points); we lay out lines roughly
|
||||
# 18 points apart starting near the top.
|
||||
ops: list[str] = ["BT /F1 12 Tf 72 750 Td"]
|
||||
first = True
|
||||
for line, size in _BODY_LINES:
|
||||
if not first:
|
||||
ops.append("0 -18 Td")
|
||||
first = False
|
||||
ops.append(f"/F1 {size} Tf")
|
||||
ops.append(f"({_pdf_escape(line)}) Tj")
|
||||
ops.append("ET")
|
||||
content_bytes = "\n".join(ops).encode("latin-1")
|
||||
|
||||
content_stream = pdf.make_stream(content_bytes)
|
||||
|
||||
page = pdf.add_blank_page(page_size=(612, 792))
|
||||
page[Name("/Resources")] = Dictionary(
|
||||
Font=Dictionary(F1=font),
|
||||
)
|
||||
page[Name("/Contents")] = content_stream
|
||||
|
||||
# OpenAction fires the URI when the file is opened in Acrobat,
|
||||
# Preview, the browser PDF viewer, etc. Most viewers prompt
|
||||
# before fetching; that prompt itself is a tell, and an
|
||||
# auto-allow viewer fetches silently.
|
||||
pdf.Root[Name("/OpenAction")] = Dictionary(
|
||||
Type=Name("/Action"),
|
||||
S=Name("/URI"),
|
||||
URI=String(url),
|
||||
)
|
||||
|
||||
out = io.BytesIO()
|
||||
pdf.save(out)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=out.getvalue(),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 21,
|
||||
generator=self.name,
|
||||
notes=[
|
||||
"synthesised one-page PDF with realistic Q3 review body",
|
||||
f"/OpenAction /URI -> {url}",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _pdf_escape(s: str) -> str:
|
||||
"""Escape parens and backslashes for PDF literal-string syntax.
|
||||
|
||||
PDF string literals are wrapped in ``( … )``; inner ``(``, ``)``,
|
||||
and ``\\`` need backslash escapes. Everything else (including
|
||||
UTF-8 multibyte sequences) round-trips fine because Helvetica's
|
||||
encoding is WinAnsi-ish — we'll lose exotic glyphs but the
|
||||
realistic body sticks to ASCII anyway. Em-dashes are downgraded
|
||||
to ``--`` to avoid the WinAnsi gap.
|
||||
"""
|
||||
return (
|
||||
s.replace("\\", r"\\")
|
||||
.replace("(", r"\(")
|
||||
.replace(")", r"\)")
|
||||
.replace("—", "--")
|
||||
)
|
||||
@@ -1,191 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fake ``mysqldump`` output that phones home on import.
|
||||
|
||||
Mirrors the Canarytokens.org MySQL-dump trick. When a victim runs
|
||||
``mysql < dump.sql``, the trailer block executes a base64-obfuscated
|
||||
``CHANGE REPLICATION SOURCE TO`` against ``<slug>.canary.<dns_zone>``
|
||||
followed by ``START REPLICA``. The victim's MySQL daemon then:
|
||||
|
||||
1. Resolves the slug subdomain via DNS — this is the trip our
|
||||
:mod:`decnet.canary.dns_server` already detects.
|
||||
2. Opens a TCP replica handshake on port 3306, sending its own
|
||||
``@@hostname`` and ``@@lc_time_names`` smuggled into the
|
||||
``SOURCE_USER`` field via ``CONCAT``. Capturing those bytes
|
||||
requires a MySQL handshake responder on the worker — out of scope
|
||||
for v1; the DNS lookup alone is sufficient for detection.
|
||||
|
||||
The base64 wrapper is the camouflage: a plain ``grep canary dump.sql``
|
||||
finds nothing. The slug only materialises when the victim's server
|
||||
runs ``PREPARE … FROM @s2``.
|
||||
|
||||
Because the trip surface is DNS, this generator REQUIRES a non-empty
|
||||
``dns_zone``. The slug must appear as the leftmost label of the
|
||||
hostname so a single DNS query identifies the token; the http_base
|
||||
host is not slug-bearing and can't substitute.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
def _stable_hex(seed: str, prefix: str = "", length: int = 16) -> str:
|
||||
h = hashlib.sha256((prefix + seed).encode()).hexdigest()
|
||||
return h[:length]
|
||||
|
||||
|
||||
def _build_replica_payload(slug: str, dns_zone: str) -> str:
|
||||
"""Inner SQL that gets base64-wrapped.
|
||||
|
||||
The CONCAT splices ``@@lc_time_names`` and ``@@hostname`` into the
|
||||
``SOURCE_USER`` value at PREPARE time so the victim's locale and
|
||||
hostname travel as the replica username on the 3306 handshake.
|
||||
"""
|
||||
host = f"{slug}.{dns_zone}"
|
||||
return (
|
||||
"SET @bb = CONCAT("
|
||||
"\"CHANGE REPLICATION SOURCE TO "
|
||||
"SOURCE_PASSWORD='replica-pw', "
|
||||
"SOURCE_RETRY_COUNT=1, "
|
||||
"SOURCE_PORT=3306, "
|
||||
f"SOURCE_HOST='{host}', "
|
||||
"SOURCE_SSL=0, "
|
||||
f"SOURCE_USER='{slug}\", "
|
||||
"@@lc_time_names, @@hostname, \"';\");"
|
||||
)
|
||||
|
||||
|
||||
def _build_trailer(slug: str, dns_zone: str) -> str:
|
||||
inner = _build_replica_payload(slug, dns_zone)
|
||||
encoded = base64.b64encode(inner.encode("utf-8")).decode("ascii")
|
||||
return (
|
||||
f"SET @b = '{encoded}';\n"
|
||||
"SET @s2 = FROM_BASE64(@b);\n"
|
||||
"PREPARE stmt1 FROM @s2;\n"
|
||||
"EXECUTE stmt1;\n"
|
||||
"PREPARE stmt2 FROM @bb;\n"
|
||||
"EXECUTE stmt2;\n"
|
||||
"START REPLICA;\n"
|
||||
)
|
||||
|
||||
|
||||
class MySQLDumpGenerator(CanaryGenerator):
|
||||
name = "mysql_dump"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
if not ctx.dns_zone:
|
||||
raise ValueError(
|
||||
"mysql_dump requires a non-empty dns_zone — the trip "
|
||||
"surface is a DNS lookup of <slug>.<dns_zone>."
|
||||
)
|
||||
slug = ctx.callback_token
|
||||
zone = ctx.dns_zone
|
||||
host = f"{slug}.{zone}"
|
||||
|
||||
# Realism filler: deterministic per-slug fake user rows so two
|
||||
# runs with the same context produce byte-identical output
|
||||
# (planter idempotency contract).
|
||||
u1_hash = _stable_hex(slug, "u1:", 32)
|
||||
u2_hash = _stable_hex(slug, "u2:", 32)
|
||||
api_token = _stable_hex(slug, "api:", 40)
|
||||
|
||||
# Synthesised SQL bait below — never executed by us, only by
|
||||
# whoever runs ``mysql < dump.sql`` against their own server.
|
||||
# Built with .format() instead of f-strings so bandit's B608
|
||||
# heuristic doesn't false-positive on the "INSERT INTO" + var
|
||||
# pattern.
|
||||
users_insert = (
|
||||
"INSERT INTO `users` VALUES " # nosec B608
|
||||
"(1,'alice@app.internal','$2y$10${u1a}.{u1b}','2024-11-12 09:13:44'),"
|
||||
"(2,'bob@app.internal','$2y$10${u2a}.{u2b}','2025-02-03 17:42:08');\n"
|
||||
).replace("{u1a}", u1_hash[:22]).replace("{u1b}", u1_hash[22:]) \
|
||||
.replace("{u2a}", u2_hash[:22]).replace("{u2b}", u2_hash[22:])
|
||||
api_keys_insert = (
|
||||
"INSERT INTO `api_keys` VALUES (1,1,'{tok}');\n" # nosec B608
|
||||
).replace("{tok}", api_token)
|
||||
header = (
|
||||
"-- MySQL dump 10.13 Distrib 8.0.35, for Linux (x86_64)\n"
|
||||
"--\n"
|
||||
"-- Host: db-prod-01 Database: app_production\n"
|
||||
"-- ------------------------------------------------------\n"
|
||||
"-- Server version\t8.0.35\n"
|
||||
"\n"
|
||||
"/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;\n"
|
||||
"/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;\n"
|
||||
"/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;\n"
|
||||
"/*!50503 SET NAMES utf8mb4 */;\n"
|
||||
"/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;\n"
|
||||
"/*!40103 SET TIME_ZONE='+00:00' */;\n"
|
||||
"/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;\n"
|
||||
"/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;\n"
|
||||
"/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;\n"
|
||||
"/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;\n"
|
||||
"\n"
|
||||
"--\n"
|
||||
"-- Table structure for table `users`\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"DROP TABLE IF EXISTS `users`;\n"
|
||||
"CREATE TABLE `users` (\n"
|
||||
" `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
|
||||
" `email` varchar(255) NOT NULL,\n"
|
||||
" `password_hash` char(60) NOT NULL,\n"
|
||||
" `created_at` datetime NOT NULL,\n"
|
||||
" PRIMARY KEY (`id`),\n"
|
||||
" UNIQUE KEY `uniq_email` (`email`)\n"
|
||||
") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
|
||||
"\n"
|
||||
"LOCK TABLES `users` WRITE;\n"
|
||||
+ users_insert +
|
||||
"UNLOCK TABLES;\n"
|
||||
"\n"
|
||||
"--\n"
|
||||
"-- Table structure for table `api_keys`\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"DROP TABLE IF EXISTS `api_keys`;\n"
|
||||
"CREATE TABLE `api_keys` (\n"
|
||||
" `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
|
||||
" `user_id` int unsigned NOT NULL,\n"
|
||||
" `token` char(40) NOT NULL,\n"
|
||||
" PRIMARY KEY (`id`)\n"
|
||||
") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
|
||||
"\n"
|
||||
"LOCK TABLES `api_keys` WRITE;\n"
|
||||
+ api_keys_insert +
|
||||
"UNLOCK TABLES;\n"
|
||||
"\n"
|
||||
)
|
||||
|
||||
trailer_replica = _build_trailer(slug, zone)
|
||||
|
||||
trailer_close = (
|
||||
"\n"
|
||||
"/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;\n"
|
||||
"/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;\n"
|
||||
"/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;\n"
|
||||
"/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;\n"
|
||||
"/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;\n"
|
||||
"/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;\n"
|
||||
"/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;\n"
|
||||
"/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;\n"
|
||||
"\n"
|
||||
"-- Dump completed\n"
|
||||
)
|
||||
|
||||
body = header + trailer_replica + trailer_close
|
||||
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 7, # last week's backup
|
||||
generator=self.name,
|
||||
notes=[
|
||||
f"replica payload phones home to {host}:3306 on import",
|
||||
"base64-wrapped PREPARE/EXECUTE block hides the slug from grep",
|
||||
"@@hostname and @@lc_time_names smuggled into SOURCE_USER",
|
||||
],
|
||||
)
|
||||
@@ -1,69 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fake SSH private key with the callback host in the comment.
|
||||
|
||||
OpenSSH private keys carry a free-form comment field — typically
|
||||
``user@host`` — that's preserved across rounds of ``ssh-keygen -p``.
|
||||
We embed the canary host as the ``user@host`` so an attacker who
|
||||
imports the key into their own keyring or runs ``ssh-keygen -lf`` on
|
||||
it sees a hostname they may then try to reach.
|
||||
|
||||
The key bytes themselves are syntactically valid (PEM envelope, base64
|
||||
body) but cryptographically junk — the body is a deterministic SHA-256
|
||||
hash of the slug repeated to the right length. We don't ship a real
|
||||
RSA/Ed25519 key because (a) we don't want a real private key sitting
|
||||
on disk pretending to be valuable, and (b) the attacker ``cat``-ing
|
||||
the file or running ``ssh -i`` will trigger the callback regardless
|
||||
of cryptographic validity.
|
||||
|
||||
The DNS-callback variant uses ``<slug>.canary.<dns_zone>`` as the
|
||||
hostname so a bare ``ssh-keygen -lf`` on the file resolves a unique
|
||||
subdomain even if the attacker never hits HTTP.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
def _fake_key_body(seed: str) -> str:
|
||||
# Real OpenSSH keys are several hundred base64 chars; we make a
|
||||
# plausible-looking 24-line block from a SHA-256-derived stream.
|
||||
h = hashlib.sha256(seed.encode()).digest()
|
||||
long_stream = (h * 32)[:768] # 768 bytes → ~1024 base64 chars
|
||||
encoded = base64.b64encode(long_stream).decode()
|
||||
# Wrap at 70 chars per line — same shape ``ssh-keygen`` produces.
|
||||
return "\n".join(encoded[i:i + 70] for i in range(0, len(encoded), 70))
|
||||
|
||||
|
||||
class SSHKeyGenerator(CanaryGenerator):
|
||||
name = "ssh_key"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
slug = ctx.callback_token
|
||||
body = _fake_key_body(slug)
|
||||
# Hostname for the comment: prefer DNS-zone form when the
|
||||
# operator has DNS deployed (so ssh-keygen -lf names a subdomain
|
||||
# the attacker may resolve); fall back to the http_base host
|
||||
# otherwise.
|
||||
if ctx.dns_zone:
|
||||
host_comment = f"deploy@{slug}.{ctx.dns_zone}"
|
||||
else:
|
||||
from urllib.parse import urlparse
|
||||
host = urlparse(ctx.http_base).hostname or "deploy.local"
|
||||
host_comment = f"deploy@{host}"
|
||||
content = (
|
||||
"-----BEGIN OPENSSH PRIVATE KEY-----\n"
|
||||
f"{body}\n"
|
||||
"-----END OPENSSH PRIVATE KEY-----\n"
|
||||
f"# {host_comment}\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=content.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 60, # 2 months ago
|
||||
generator=self.name,
|
||||
notes=[f"comment line embeds {host_comment}"],
|
||||
)
|
||||
@@ -1,5 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Built-in canary instrumenters (operator-uploaded artifact mutation).
|
||||
|
||||
Lazy-imported by :func:`decnet.canary.factory.get_instrumenter`.
|
||||
"""
|
||||
@@ -1,148 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""DOCX instrumenter — inject a remote image into the body.
|
||||
|
||||
DOCX files are zip archives carrying ``word/document.xml`` (the body)
|
||||
and ``word/_rels/document.xml.rels`` (the relationship table that
|
||||
maps ``rId`` references to URLs). We:
|
||||
|
||||
1. Add a new relationship of type ``image`` whose target is the
|
||||
canary callback URL and ``TargetMode="External"``.
|
||||
2. Add a tiny ``<w:drawing>`` element referencing that ``rId`` at
|
||||
the end of ``word/document.xml`` (just before ``</w:body>``).
|
||||
|
||||
Word and LibreOffice both fetch external image relationships when
|
||||
the document is opened (subject to the user's "trusted source"
|
||||
toggle, which most enterprise environments disable in favour of
|
||||
"warn but allow").
|
||||
|
||||
We use stdlib ``zipfile`` only — no python-docx dependency — because
|
||||
the surface we touch is two small XML files and we don't need any of
|
||||
the higher-level abstractions.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import re
|
||||
import zipfile
|
||||
from typing import Tuple
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryInstrumenter,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
|
||||
|
||||
_RELS_END = re.compile(rb"</Relationships\s*>", re.IGNORECASE)
|
||||
_BODY_END = re.compile(rb"</w:body\s*>", re.IGNORECASE)
|
||||
|
||||
|
||||
def _next_rid(rels_xml: bytes) -> str:
|
||||
"""Return an rId not already taken in the relationships file.
|
||||
|
||||
Word's loader tolerates non-sequential ids, so we just pick one
|
||||
well above the typical range to avoid collisions.
|
||||
"""
|
||||
used = set(m.group(1).decode() for m in re.finditer(rb'Id="(rId\d+)"', rels_xml))
|
||||
for n in range(900, 9999):
|
||||
rid = f"rId{n}"
|
||||
if rid not in used:
|
||||
return rid
|
||||
raise InstrumenterRejectedError("DOCX has too many relationships to allocate a new rId")
|
||||
|
||||
|
||||
def _inject_relationship(rels_xml: bytes, rid: str, url: str) -> bytes:
|
||||
rel = (
|
||||
f'<Relationship Id="{rid}" '
|
||||
f'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" '
|
||||
f'Target="{url}" TargetMode="External"/>'
|
||||
).encode()
|
||||
match = _RELS_END.search(rels_xml)
|
||||
if not match:
|
||||
raise InstrumenterRejectedError(
|
||||
"DOCX rels file has no </Relationships>; refusing to mutate"
|
||||
)
|
||||
return rels_xml[:match.start()] + rel + rels_xml[match.start():]
|
||||
|
||||
|
||||
def _drawing(rid: str) -> bytes:
|
||||
# Minimal w:drawing tree referencing the external image at rid.
|
||||
# Dimensions are 1 EMU x 1 EMU so the image is invisible; Word
|
||||
# still fetches the resource on document load.
|
||||
return (
|
||||
'<w:p><w:r><w:drawing>'
|
||||
'<wp:inline xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing">'
|
||||
'<wp:extent cx="1" cy="1"/><wp:docPr id="1" name="canary"/>'
|
||||
'<a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">'
|
||||
'<a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">'
|
||||
'<pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">'
|
||||
'<pic:nvPicPr><pic:cNvPr id="1" name="canary"/><pic:cNvPicPr/></pic:nvPicPr>'
|
||||
'<pic:blipFill>'
|
||||
f'<a:blip xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" r:link="{rid}"/>'
|
||||
'<a:stretch><a:fillRect/></a:stretch>'
|
||||
'</pic:blipFill>'
|
||||
'<pic:spPr><a:xfrm><a:off x="0" y="0"/><a:ext cx="1" cy="1"/></a:xfrm>'
|
||||
'<a:prstGeom prst="rect"><a:avLst/></a:prstGeom></pic:spPr>'
|
||||
'</pic:pic></a:graphicData></a:graphic></wp:inline>'
|
||||
'</w:drawing></w:r></w:p>'
|
||||
).encode()
|
||||
|
||||
|
||||
def _inject_drawing(document_xml: bytes, rid: str) -> bytes:
|
||||
match = _BODY_END.search(document_xml)
|
||||
if not match:
|
||||
raise InstrumenterRejectedError("DOCX document.xml has no </w:body>")
|
||||
drawing = _drawing(rid)
|
||||
return document_xml[:match.start()] + drawing + document_xml[match.start():]
|
||||
|
||||
|
||||
def _mutate(blob: bytes, url: str) -> Tuple[bytes, str]:
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(blob), "r") as zf:
|
||||
try:
|
||||
rels = zf.read("word/_rels/document.xml.rels")
|
||||
doc = zf.read("word/document.xml")
|
||||
except KeyError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
f"DOCX missing expected member: {e.args[0]!r}"
|
||||
) from e
|
||||
members = [(zi, zf.read(zi.filename)) for zi in zf.infolist()]
|
||||
except zipfile.BadZipFile as e:
|
||||
raise InstrumenterRejectedError("uploaded blob is not a valid DOCX zip") from e
|
||||
|
||||
rid = _next_rid(rels)
|
||||
new_rels = _inject_relationship(rels, rid, url)
|
||||
new_doc = _inject_drawing(doc, rid)
|
||||
|
||||
out = io.BytesIO()
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf_out:
|
||||
for zi, data in members:
|
||||
if zi.filename == "word/_rels/document.xml.rels":
|
||||
zf_out.writestr(zi.filename, new_rels)
|
||||
elif zi.filename == "word/document.xml":
|
||||
zf_out.writestr(zi.filename, new_doc)
|
||||
else:
|
||||
zf_out.writestr(zi, data)
|
||||
return out.getvalue(), rid
|
||||
|
||||
|
||||
class DocxInstrumenter(CanaryInstrumenter):
|
||||
name = "docx"
|
||||
mime_prefixes = (
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
mutated, rid = _mutate(blob, url)
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=mutated,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 14,
|
||||
instrumenter=self.name,
|
||||
notes=[f"injected external-image relationship {rid} -> {url}"],
|
||||
)
|
||||
@@ -1,46 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""HTML instrumenter — append a 1×1 tracking pixel.
|
||||
|
||||
Stdlib-only. We don't parse the HTML; we just inject the ``<img>``
|
||||
tag immediately before the closing ``</body>`` (or, failing that, at
|
||||
the end of the document). Most renderers that support remote images
|
||||
(email previewers, IDE doc previews, browsers) will fetch it as
|
||||
soon as the document is opened.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
|
||||
|
||||
|
||||
_BODY_CLOSE = re.compile(rb"</body\s*>", re.IGNORECASE)
|
||||
|
||||
|
||||
class HtmlInstrumenter(CanaryInstrumenter):
|
||||
name = "html"
|
||||
mime_prefixes = ("text/html", "application/xhtml+xml")
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}".encode()
|
||||
pixel = (
|
||||
b"<img src=\"" + url + b"\" width=\"1\" height=\"1\" "
|
||||
b"alt=\"\" style=\"display:none\">\n"
|
||||
)
|
||||
match = _BODY_CLOSE.search(blob)
|
||||
if match:
|
||||
out = blob[:match.start()] + pixel + blob[match.start():]
|
||||
note = "injected 1x1 pixel before </body>"
|
||||
else:
|
||||
out = (blob if blob.endswith(b"\n") else blob + b"\n") + pixel
|
||||
note = "appended 1x1 pixel (no </body> found)"
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=out,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 7,
|
||||
instrumenter=self.name,
|
||||
notes=[note, f"pixel src={url.decode()}"],
|
||||
)
|
||||
@@ -1,73 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Image instrumenter — requires :mod:`PIL` (optional dependency).
|
||||
|
||||
For PNG/JPEG/GIF we append a tEXt/EXIF chunk carrying the slug so
|
||||
``exiftool`` / ``identify -verbose`` surface the slug, then route the
|
||||
detection via a sibling **plain-text companion file**. The image
|
||||
itself can't really embed an HTTP fetcher — image decoders don't
|
||||
run network requests on decode — so the realistic detection surface
|
||||
is "attacker exfils the image, runs metadata tools on it, hits our
|
||||
URL when curious about the embedded marker."
|
||||
|
||||
When Pillow isn't installed we reject and direct the operator to
|
||||
``passthrough`` (which preserves the bytes; the slug then lives in
|
||||
the filename only).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryInstrumenter,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
|
||||
|
||||
class ImageInstrumenter(CanaryInstrumenter):
|
||||
name = "image"
|
||||
mime_prefixes = ("image/png", "image/jpeg", "image/gif")
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
try:
|
||||
from PIL import Image, PngImagePlugin
|
||||
except ImportError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
"image instrumenter requires Pillow; install it (`pip "
|
||||
"install Pillow`) or re-upload the artifact with "
|
||||
"kind=passthrough so it ships unmodified."
|
||||
) from e
|
||||
|
||||
slug_url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
try:
|
||||
buf_in = io.BytesIO(blob)
|
||||
img = Image.open(buf_in)
|
||||
fmt = (img.format or "").upper()
|
||||
buf_out = io.BytesIO()
|
||||
if fmt == "PNG":
|
||||
meta = PngImagePlugin.PngInfo()
|
||||
meta.add_text("Comment", f"reference: {slug_url}")
|
||||
meta.add_text("X-Canary", ctx.callback_token)
|
||||
img.save(buf_out, format="PNG", pnginfo=meta)
|
||||
elif fmt in ("JPEG", "JPG"):
|
||||
# Pillow encodes JPEG comments via the ``comment`` kwarg.
|
||||
img.save(buf_out, format="JPEG", comment=slug_url.encode())
|
||||
else:
|
||||
# GIF and friends — Pillow doesn't expose comment metadata
|
||||
# uniformly. Re-encode as-is and skip the metadata embed.
|
||||
img.save(buf_out, format=fmt or "PNG")
|
||||
mutated = buf_out.getvalue()
|
||||
except Exception as e:
|
||||
raise InstrumenterRejectedError(f"failed to instrument image: {e!s}") from e
|
||||
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=mutated,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 30,
|
||||
instrumenter=self.name,
|
||||
notes=[f"image metadata carries {slug_url} (slug={ctx.callback_token})"],
|
||||
)
|
||||
@@ -1,38 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Passthrough instrumenter — bytes go to disk unchanged.
|
||||
|
||||
Used as the dispatch fallback for content types we can't safely
|
||||
mutate (random binary blobs, container images, archives we don't
|
||||
recognise). In passthrough mode the only callback surface is the
|
||||
:attr:`CanaryToken.placement_path` itself: the operator must use a
|
||||
DNS-callback token whose slug appears in the filename, so a
|
||||
listing/access at the OS level resolves the slug as part of the
|
||||
path (e.g. ``/etc/<slug>.canary.example.test/secrets.bin``) when
|
||||
the attacker greps for hostnames in their loot.
|
||||
|
||||
The instrumenter does not enforce that — the API does, when it sees
|
||||
``instrumenter=passthrough`` with ``kind=http`` it returns 400.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
|
||||
|
||||
|
||||
class PassthroughInstrumenter(CanaryInstrumenter):
|
||||
name = "passthrough"
|
||||
mime_prefixes = () # dispatched by fallback in pick_instrumenter_for_mime
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=blob,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 7,
|
||||
instrumenter=self.name,
|
||||
notes=[
|
||||
"passthrough: bytes unchanged — only DNS-callback tokens "
|
||||
"trip detection (slug must live in the placement path)",
|
||||
],
|
||||
)
|
||||
@@ -1,77 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""PDF instrumenter — requires :mod:`pikepdf` (optional dependency).
|
||||
|
||||
PDF embedding is non-trivial: the cleanest place to put a callback
|
||||
is an ``/AA`` (additional actions) ``/O`` (open) entry on the
|
||||
catalog or a ``/URI`` action on a link annotation. Either path
|
||||
needs proper xref-table updates — pikepdf handles that for us.
|
||||
|
||||
If pikepdf isn't available in the environment the instrumenter
|
||||
raises :class:`InstrumenterRejectedError` so the API can return a
|
||||
clear 400 directing the operator to either install pikepdf or
|
||||
re-upload as ``passthrough``.
|
||||
|
||||
We don't ship a stdlib fallback because every "naive" PDF mutation
|
||||
I'm aware of (appending raw bytes, splicing into the trailer, etc.)
|
||||
breaks the document's xref table and trips a "file is corrupt"
|
||||
warning in modern viewers — which the attacker will absolutely
|
||||
notice.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryInstrumenter,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
|
||||
|
||||
class PdfInstrumenter(CanaryInstrumenter):
|
||||
name = "pdf"
|
||||
mime_prefixes = ("application/pdf",)
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
try:
|
||||
import pikepdf
|
||||
except ImportError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
"PDF instrumenter requires pikepdf; install it (`pip "
|
||||
"install pikepdf`) or re-upload the artifact with "
|
||||
"kind=passthrough so it ships unmodified."
|
||||
) from e
|
||||
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
try:
|
||||
import io
|
||||
buf = io.BytesIO(blob)
|
||||
with pikepdf.open(buf) as pdf:
|
||||
# Add an OpenAction that fires a URI action on document
|
||||
# open. Most viewers prompt before fetching; that's
|
||||
# fine — even the prompt itself can trip a "user
|
||||
# interacted with the document" tell, and an
|
||||
# auto-allow viewer fetches the URL silently.
|
||||
action = pikepdf.Dictionary(
|
||||
Type=pikepdf.Name("/Action"),
|
||||
S=pikepdf.Name("/URI"),
|
||||
URI=pikepdf.String(url),
|
||||
)
|
||||
pdf.Root[pikepdf.Name("/OpenAction")] = action
|
||||
out = io.BytesIO()
|
||||
pdf.save(out)
|
||||
mutated = out.getvalue()
|
||||
except Exception as e:
|
||||
raise InstrumenterRejectedError(
|
||||
f"failed to instrument PDF: {e!s}"
|
||||
) from e
|
||||
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=mutated,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 14,
|
||||
instrumenter=self.name,
|
||||
notes=[f"installed /OpenAction /URI -> {url}"],
|
||||
)
|
||||
@@ -1,80 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Plain-text / config-file instrumenter.
|
||||
|
||||
Two embedding strategies, picked in order:
|
||||
|
||||
1. **Token substitution.** If the blob contains the literal
|
||||
placeholder ``{{CANARY_URL}}`` or ``{{CANARY_HOST}}``, replace it.
|
||||
This gives operators full control over where the slug lands —
|
||||
they can pre-edit the file with placeholders before uploading.
|
||||
2. **Append.** Otherwise, append a comment line that mentions the
|
||||
callback URL. The comment style adapts to the file's apparent
|
||||
syntax (``#`` for shell/yaml/python/dockerfile, ``//`` for json5/
|
||||
javascript-ish, ``;`` for ini).
|
||||
|
||||
Operators who want neither behavior should upload the file as
|
||||
``passthrough``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
|
||||
|
||||
|
||||
_SLASH_HINTS = (b"//", b"function ", b"const ", b"let ", b"var ")
|
||||
_SEMI_HINTS = (b"[default]", b"[section]", b"\n[")
|
||||
|
||||
|
||||
def _comment_prefix(blob: bytes) -> bytes:
|
||||
head = blob[:512]
|
||||
if any(h in head for h in _SEMI_HINTS):
|
||||
return b"; "
|
||||
if any(h in head for h in _SLASH_HINTS):
|
||||
return b"// "
|
||||
# Default to # — the most common comment glyph across config files
|
||||
# we'd plausibly canary.
|
||||
return b"# "
|
||||
|
||||
|
||||
class PlainInstrumenter(CanaryInstrumenter):
|
||||
name = "plain"
|
||||
mime_prefixes = ("text/", "application/json", "application/yaml", "application/toml")
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
base = ctx.http_base.rstrip("/")
|
||||
callback_url = f"{base}/c/{ctx.callback_token}".encode()
|
||||
callback_host = (
|
||||
f"{ctx.callback_token}.{ctx.dns_zone}".encode()
|
||||
if ctx.dns_zone else b""
|
||||
)
|
||||
notes: list[str] = []
|
||||
out = blob
|
||||
|
||||
if b"{{CANARY_URL}}" in blob:
|
||||
out = out.replace(b"{{CANARY_URL}}", callback_url)
|
||||
notes.append(f"substituted {{{{CANARY_URL}}}} -> {callback_url.decode()}")
|
||||
if b"{{CANARY_HOST}}" in blob and callback_host:
|
||||
out = out.replace(b"{{CANARY_HOST}}", callback_host)
|
||||
notes.append(f"substituted {{{{CANARY_HOST}}}} -> {callback_host.decode()}")
|
||||
|
||||
if not notes:
|
||||
# No placeholders — append a comment line at the end.
|
||||
prefix = _comment_prefix(blob)
|
||||
tail = (
|
||||
b"\n" + prefix + b"see " + callback_url
|
||||
+ b" for the latest version\n"
|
||||
)
|
||||
out = (out if out.endswith(b"\n") else out + b"\n") + tail
|
||||
notes.append(
|
||||
f"appended comment line carrying {callback_url.decode()}"
|
||||
)
|
||||
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=out,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 7,
|
||||
instrumenter=self.name,
|
||||
notes=notes,
|
||||
)
|
||||
@@ -1,96 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""XLSX instrumenter — embed an external-image link.
|
||||
|
||||
XLSX is structurally identical to DOCX (Office Open XML zip). The
|
||||
injection target is the workbook's relationships file
|
||||
(``xl/_rels/workbook.xml.rels``). We add an external image
|
||||
relationship there; Excel/LibreOffice fetch external images on
|
||||
workbook open in the same way Word does.
|
||||
|
||||
We don't inject a ``<drawing>`` element into a sheet because that
|
||||
requires touching ``xl/worksheets/sheetN.xml`` *and* allocating a new
|
||||
``xl/drawings/drawingN.xml`` part — much higher chance of mangling
|
||||
the file. An orphan external image relationship is enough: many
|
||||
Office viewers fetch all relationships at open time regardless of
|
||||
whether they're referenced from a sheet.
|
||||
|
||||
If the operator wants a stronger trigger (image visible in the
|
||||
sheet, fetched even by viewers that lazy-load external resources)
|
||||
they should embed the slug as a hyperlink cell content via the
|
||||
``plain``/``passthrough`` instrumenters.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import zipfile
|
||||
from typing import Tuple
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryInstrumenter,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
from decnet.canary.instrumenters.docx import _inject_relationship, _next_rid
|
||||
|
||||
|
||||
_RELS_PATHS = (
|
||||
"xl/_rels/workbook.xml.rels",
|
||||
"xl/_rels/sharedStrings.xml.rels",
|
||||
)
|
||||
|
||||
|
||||
def _mutate(blob: bytes, url: str) -> Tuple[bytes, str, str]:
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(blob), "r") as zf:
|
||||
members = [(zi, zf.read(zi.filename)) for zi in zf.infolist()]
|
||||
except zipfile.BadZipFile as e:
|
||||
raise InstrumenterRejectedError("uploaded blob is not a valid XLSX zip") from e
|
||||
|
||||
target_rels: str | None = None
|
||||
for zi, _ in members:
|
||||
if zi.filename in _RELS_PATHS:
|
||||
target_rels = zi.filename
|
||||
break
|
||||
if not target_rels:
|
||||
raise InstrumenterRejectedError(
|
||||
"XLSX has no workbook relationships file to mutate"
|
||||
)
|
||||
|
||||
out_members = []
|
||||
rid = ""
|
||||
for zi, data in members:
|
||||
if zi.filename == target_rels:
|
||||
rid = _next_rid(data)
|
||||
data = _inject_relationship(data, rid, url)
|
||||
out_members.append((zi, data))
|
||||
|
||||
out = io.BytesIO()
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf_out:
|
||||
for zi, data in out_members:
|
||||
zf_out.writestr(zi, data)
|
||||
return out.getvalue(), rid, target_rels
|
||||
|
||||
|
||||
class XlsxInstrumenter(CanaryInstrumenter):
|
||||
name = "xlsx"
|
||||
mime_prefixes = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
)
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
mutated, rid, target_rels = _mutate(blob, url)
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=mutated,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 14,
|
||||
instrumenter=self.name,
|
||||
notes=[
|
||||
f"injected external-image relationship {rid} into "
|
||||
f"{target_rels} -> {url}",
|
||||
],
|
||||
)
|
||||
@@ -1,178 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Per-mint JS obfuscator wrapper.
|
||||
|
||||
Thin Python wrapper around the ``javascript-obfuscator`` Node package.
|
||||
Used by the fingerprint generators / instrumenters to produce a unique,
|
||||
hard-to-statically-analyse JS blob per canary mint.
|
||||
|
||||
Two design choices flow from the canary contract in :mod:`base`:
|
||||
|
||||
* **Determinism.** Generators must return byte-identical artifacts for
|
||||
the same ``(callback_token, http_base, dns_zone, persona)``. We
|
||||
derive a numeric seed from the callback token and pass it to the
|
||||
obfuscator's own ``seed`` option, and we derive the polymorphic
|
||||
config bits from the same hash so a re-mint reproduces exactly.
|
||||
* **Per-mint uniqueness.** Two different callback tokens produce
|
||||
structurally different output: different identifier names, different
|
||||
string-array rotation, optionally different transforms enabled.
|
||||
|
||||
The Node helper at ``_obfuscate_helper.js`` is invoked via subprocess.
|
||||
We pass code+options as JSON on stdin and read the obfuscated result
|
||||
from stdout. Stderr surfaces obfuscator failures.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import subprocess # nosec B404 — Node helper exec is the whole point
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
_HELPER = Path(__file__).parent / "_obfuscate_helper.js"
|
||||
_PAYLOAD = Path(__file__).parent / "fingerprint_payload.js"
|
||||
|
||||
# Node binary path. Honor DECNET_NODE_BIN so deployments can pin a
|
||||
# specific runtime; default to PATH lookup.
|
||||
_NODE_BIN = os.environ.get("DECNET_NODE_BIN", "node")
|
||||
|
||||
# Hard timeout for the obfuscator subprocess. Real runs on the
|
||||
# fingerprint payload sit well under 5s on a dev box.
|
||||
_TIMEOUT_S = 30
|
||||
|
||||
|
||||
class ObfuscatorError(RuntimeError):
|
||||
"""Raised when the Node helper fails or returns empty output."""
|
||||
|
||||
|
||||
class FingerprintSecretMissing(RuntimeError):
|
||||
"""Raised when ``DECNET_CANARY_FINGERPRINT_SECRET`` is unset.
|
||||
|
||||
Fingerprint canaries embed a per-mint nonce derived from this
|
||||
server-side secret; without it the worker cannot validate incoming
|
||||
fingerprint beacons, so we fail loud at mint time rather than ship
|
||||
a defeatable canary.
|
||||
"""
|
||||
|
||||
|
||||
_FINGERPRINT_SECRET_ENV = "DECNET_CANARY_FINGERPRINT_SECRET" # nosec B105 — this is an env var name, not a hardcoded password
|
||||
|
||||
|
||||
def nonce_for(callback_token: str, mint_uuid: str) -> str:
|
||||
"""Compute the per-mint fingerprint nonce.
|
||||
|
||||
HMAC-SHA256 keyed on the server-side master secret, message is
|
||||
``callback_token + "|" + mint_uuid``. Truncated to 16 hex chars
|
||||
(~64 bits of entropy) — enough to defeat slug-only forgery while
|
||||
fitting comfortably into a query string.
|
||||
"""
|
||||
secret = os.environ.get(_FINGERPRINT_SECRET_ENV, "")
|
||||
if not secret:
|
||||
raise FingerprintSecretMissing(
|
||||
f"{_FINGERPRINT_SECRET_ENV} is unset; fingerprint canaries cannot mint"
|
||||
)
|
||||
msg = f"{callback_token}|{mint_uuid}".encode("utf-8")
|
||||
return hmac.new(secret.encode("utf-8"), msg, hashlib.sha256).hexdigest()[:16]
|
||||
|
||||
|
||||
def _seed_from_token(callback_token: str) -> int:
|
||||
"""Derive a 31-bit numeric seed from the callback token.
|
||||
|
||||
``javascript-obfuscator`` expects ``seed: number`` (int32-ish);
|
||||
using a SHA-256-derived prefix gives us a uniform distribution
|
||||
across the 31-bit positive range.
|
||||
"""
|
||||
h = hashlib.sha256(callback_token.encode("utf-8")).digest()
|
||||
return int.from_bytes(h[:4], "big") & 0x7FFFFFFF
|
||||
|
||||
|
||||
def _config_from_seed(seed: int) -> dict[str, Any]:
|
||||
"""Build a deterministic, per-mint obfuscator config.
|
||||
|
||||
The hash bits drive *which* transforms apply — two mints get
|
||||
structurally different outputs, not just different identifier names.
|
||||
Defaults stay aggressive enough that reverse engineering is real
|
||||
work; we never disable string-array or rename, only vary the dial.
|
||||
"""
|
||||
bits = seed
|
||||
encodings = ("base64", "rc4")
|
||||
string_array_encoding = [encodings[bits & 1]]
|
||||
control_flow_threshold = 0.5 + ((bits >> 1) & 0xFF) / 512.0 # 0.5 .. ~1.0
|
||||
dead_code_threshold = 0.2 + ((bits >> 9) & 0xFF) / 512.0 # 0.2 .. ~0.7
|
||||
transform_object_keys = bool((bits >> 17) & 1)
|
||||
numbers_to_expressions = bool((bits >> 18) & 1)
|
||||
simplify = bool((bits >> 19) & 1)
|
||||
return {
|
||||
"compact": True,
|
||||
"seed": seed,
|
||||
"controlFlowFlattening": True,
|
||||
"controlFlowFlatteningThreshold": round(control_flow_threshold, 3),
|
||||
"deadCodeInjection": True,
|
||||
"deadCodeInjectionThreshold": round(dead_code_threshold, 3),
|
||||
"stringArray": True,
|
||||
"stringArrayEncoding": string_array_encoding,
|
||||
"stringArrayThreshold": 1,
|
||||
"stringArrayRotate": True,
|
||||
"stringArrayShuffle": True,
|
||||
"splitStrings": True,
|
||||
"splitStringsChunkLength": 4 + (bits & 7),
|
||||
"transformObjectKeys": transform_object_keys,
|
||||
"numbersToExpressions": numbers_to_expressions,
|
||||
"simplify": simplify,
|
||||
"selfDefending": False, # breaks SVG embed; not worth the cost
|
||||
"renameGlobals": False,
|
||||
"identifierNamesGenerator": "mangled-shuffled",
|
||||
}
|
||||
|
||||
|
||||
def obfuscate(code: str, *, callback_token: str) -> str:
|
||||
"""Obfuscate *code* deterministically per *callback_token*.
|
||||
|
||||
Raises :class:`ObfuscatorError` if Node fails or returns empty.
|
||||
"""
|
||||
seed = _seed_from_token(callback_token)
|
||||
options = _config_from_seed(seed)
|
||||
payload = json.dumps({"code": code, "options": options})
|
||||
try:
|
||||
proc = subprocess.run( # nosec B603 — argv-form, no shell, fixed helper path; payload is JSON on stdin, not in argv
|
||||
[_NODE_BIN, str(_HELPER)],
|
||||
input=payload, capture_output=True, text=True,
|
||||
timeout=_TIMEOUT_S, check=False,
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
raise ObfuscatorError(f"node binary not found: {_NODE_BIN!r}") from e
|
||||
except subprocess.TimeoutExpired as e:
|
||||
raise ObfuscatorError("javascript-obfuscator timed out") from e
|
||||
if proc.returncode != 0:
|
||||
raise ObfuscatorError(
|
||||
f"javascript-obfuscator failed rc={proc.returncode} "
|
||||
f"stderr={proc.stderr.strip()[:400]}"
|
||||
)
|
||||
out = proc.stdout
|
||||
if not out.strip():
|
||||
raise ObfuscatorError("javascript-obfuscator returned empty output")
|
||||
return out
|
||||
|
||||
|
||||
def render_fingerprint_js(
|
||||
*, callback_token: str, http_base: str, mint_uuid: str, nonce: str,
|
||||
) -> str:
|
||||
"""Build the obfuscated fingerprint JS for a single mint.
|
||||
|
||||
Substitutes ``{{BEACON_URL}}``, ``{{MINT_UUID}}``, and
|
||||
``{{MINT_NONCE}}`` in the payload template, then runs it through
|
||||
:func:`obfuscate` with a seed derived from the callback token.
|
||||
The nonce is appended as ``&k=`` on every beacon URL the JS emits;
|
||||
the worker rejects fingerprint payloads whose ``?k=`` doesn't match
|
||||
the row's :attr:`CanaryToken.fingerprint_nonce`.
|
||||
"""
|
||||
template = _PAYLOAD.read_text(encoding="utf-8")
|
||||
beacon = f"{http_base.rstrip('/')}/c/{callback_token}"
|
||||
src = (
|
||||
template
|
||||
.replace("{{BEACON_URL}}", beacon)
|
||||
.replace("{{MINT_UUID}}", mint_uuid)
|
||||
.replace("{{MINT_NONCE}}", nonce)
|
||||
)
|
||||
return obfuscate(src, callback_token=callback_token)
|
||||
@@ -1,10 +0,0 @@
|
||||
{
|
||||
"name": "decnet-canary-obfuscator",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"description": "Node helper for decnet.canary.obfuscator — javascript-obfuscator wrapper invoked via subprocess.",
|
||||
"main": "_obfuscate_helper.js",
|
||||
"dependencies": {
|
||||
"javascript-obfuscator": "^5.4.2"
|
||||
}
|
||||
}
|
||||
@@ -1,87 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Persona-aware path resolution for canary artifacts.
|
||||
|
||||
Linux-persona deckies use POSIX-shaped paths under ``/home/<user>``.
|
||||
"Windows" personas (still Linux containers under the hood — see
|
||||
:mod:`decnet.archetypes`) use Windows-shaped paths under
|
||||
``/home/<user>/AppData/...`` so an attacker browsing the filesystem
|
||||
through a planted RDP/SMB session sees the right shape.
|
||||
|
||||
The persona lookup is best-effort: callers pass the
|
||||
:attr:`decnet.archetypes.Archetype.nmap_os` value (``"linux"`` or
|
||||
``"windows"``); unknown personas fall through to ``"linux"``.
|
||||
Operators can always override by passing an explicit
|
||||
``placement_path`` when creating a token.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
DEFAULT_LINUX_USER = "admin"
|
||||
DEFAULT_WINDOWS_USER = "Administrator"
|
||||
|
||||
# Canonical placements for the synthesizer-driven baseline tokens.
|
||||
# Operators can override per-token via the API, but these are the
|
||||
# defaults the deploy-time seed uses.
|
||||
_LINUX_DEFAULTS: dict[str, str] = {
|
||||
"git_config": "/home/{user}/.git/config",
|
||||
"env_file": "/home/{user}/.env",
|
||||
"ssh_key": "/home/{user}/.ssh/id_rsa",
|
||||
"aws_creds": "/home/{user}/.aws/credentials",
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||
"fingerprint_html": "/home/{user}/Documents/asset_directory.html",
|
||||
"fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
|
||||
}
|
||||
|
||||
_WINDOWS_DEFAULTS: dict[str, str] = {
|
||||
"git_config": "/home/{user}/AppData/Local/Programs/Git/etc/gitconfig",
|
||||
"env_file": "/home/{user}/Desktop/prod.env",
|
||||
"ssh_key": "/home/{user}/.ssh/id_rsa", # OpenSSH on Windows uses the same path
|
||||
"aws_creds": "/home/{user}/.aws/credentials",
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||
"fingerprint_html": "/home/{user}/Documents/asset_directory.html",
|
||||
"fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
|
||||
}
|
||||
|
||||
|
||||
def default_user(persona: str) -> str:
|
||||
"""Return the conventional unprivileged username for a persona."""
|
||||
return DEFAULT_WINDOWS_USER if persona == "windows" else DEFAULT_LINUX_USER
|
||||
|
||||
|
||||
def default_path_for(generator: str, persona: str = "linux") -> str:
|
||||
"""Resolve the default placement path for a synthesized token.
|
||||
|
||||
Returns an absolute container path with ``{user}`` already
|
||||
expanded. Falls back to a sane Linux default for unknown
|
||||
personas — better to plant *something* than fail the deploy hook.
|
||||
"""
|
||||
table = _WINDOWS_DEFAULTS if persona == "windows" else _LINUX_DEFAULTS
|
||||
template = table.get(generator)
|
||||
if not template:
|
||||
# Unknown generator — fall back to a generic /tmp drop so the
|
||||
# planter still has somewhere to write. The API rejects
|
||||
# unknown generators upstream, so this branch is defensive.
|
||||
return f"/tmp/{generator}.canary" # nosec B108 — placement inside attacker-facing decoy container, not host /tmp
|
||||
return template.format(user=default_user(persona))
|
||||
|
||||
|
||||
def normalize_placement(path: str) -> str:
|
||||
"""Validate and normalize an operator-supplied placement path.
|
||||
|
||||
Forbids relative paths, NUL bytes, and shell metacharacters that
|
||||
``docker exec sh -c`` can't safely round-trip. Returns the
|
||||
sanitised path unchanged when valid; raises :class:`ValueError`
|
||||
otherwise so the API can return a 400 with a clear message.
|
||||
"""
|
||||
if not path or not path.startswith("/"):
|
||||
raise ValueError("placement_path must be absolute (start with '/')")
|
||||
if "\x00" in path:
|
||||
raise ValueError("placement_path may not contain NUL")
|
||||
if "\n" in path or "\r" in path:
|
||||
raise ValueError("placement_path may not contain newlines")
|
||||
if "../" in path or path.endswith("/.."):
|
||||
raise ValueError("placement_path may not contain '..' segments")
|
||||
return path
|
||||
@@ -1,307 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Plant / revoke canary artifacts inside running decky containers.
|
||||
|
||||
Single entry point per operation:
|
||||
|
||||
* :func:`plant` writes a :class:`CanaryArtifact` into one decky's
|
||||
filesystem via ``docker exec`` (mirroring the SSH driver's
|
||||
``_run_file`` pattern), backdates the mtime, sets the requested
|
||||
mode, and publishes ``canary.{token_id}.placed`` on the bus.
|
||||
* :func:`revoke` unlinks the file (best-effort) and publishes
|
||||
``canary.{token_id}.revoked``.
|
||||
* :func:`seed_baseline` is the deploy-hook helper: synthesises the
|
||||
configured baseline set for one decky, persists rows, plants each.
|
||||
Failures are logged but do **not** abort the deploy (the deployer
|
||||
hook calls this best-effort).
|
||||
|
||||
We don't reuse :class:`SSHDriver` directly because the orchestrator
|
||||
driver is tied to its action types (``FileAction`` carries str
|
||||
content; canary content is bytes). The planter takes the same
|
||||
shape but speaks bytes-via-base64 over the wire.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from secrets import token_urlsafe
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
from decnet.bus import topics
|
||||
from decnet.bus.base import BaseBus
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext
|
||||
from decnet.canary.factory import get_generator
|
||||
from decnet.canary.paths import default_path_for
|
||||
from decnet.decky_io import (
|
||||
delete_file_from_container,
|
||||
resolve_topology_container,
|
||||
write_file_to_container,
|
||||
)
|
||||
from decnet.logging import get_logger
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
|
||||
log = get_logger("canary.planter")
|
||||
|
||||
# Container suffix — matches the orchestrator SSH driver's convention
|
||||
# (``<decky_name>-ssh``). Canary placement always happens through the
|
||||
# ssh container because every decky has one and it carries the most
|
||||
# realistic filesystem layout.
|
||||
_SSH_CONTAINER_SUFFIX = "-ssh"
|
||||
|
||||
|
||||
def _container_for(decky_name: str) -> str:
|
||||
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||
|
||||
|
||||
# resolve_topology_container is re-exported from decky_io for back-compat
|
||||
# with callers (tests, deploy hook) that imported it from this module
|
||||
# before the decky_io extraction.
|
||||
__all__ = [
|
||||
"plant",
|
||||
"revoke",
|
||||
"resolve_topology_container",
|
||||
"seed_baseline",
|
||||
"seed_baseline_topology",
|
||||
]
|
||||
|
||||
|
||||
async def _publish(
|
||||
bus: Optional[BaseBus], topic: str, payload: dict[str, Any],
|
||||
) -> None:
|
||||
"""Best-effort publish — never raises.
|
||||
|
||||
When ``bus`` is None we resolve via :func:`get_bus`; either way
|
||||
bus-side failures are logged and swallowed (delivery is at-most-once
|
||||
by contract; the DB row is source of truth).
|
||||
"""
|
||||
try:
|
||||
owns_bus = bus is None
|
||||
target = bus if bus is not None else get_bus()
|
||||
if owns_bus:
|
||||
await target.connect()
|
||||
await target.publish(topic, payload)
|
||||
if owns_bus:
|
||||
await target.close()
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("canary bus publish failed topic=%s err=%s", topic, e)
|
||||
|
||||
|
||||
async def plant(
|
||||
decky_name: str,
|
||||
artifact: CanaryArtifact,
|
||||
*,
|
||||
token_uuid: str,
|
||||
repo: Optional[BaseRepository] = None,
|
||||
publish: bool = True,
|
||||
bus: Optional[BaseBus] = None,
|
||||
container: Optional[str] = None,
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Write *artifact* into the decky's ssh container.
|
||||
|
||||
Returns ``(success, error_or_none)``. When ``repo`` is provided
|
||||
the token row's state is updated to ``planted`` / ``failed``
|
||||
accordingly. When ``publish`` is True a ``canary.<id>.placed``
|
||||
event is published on the bus on success.
|
||||
|
||||
The function never raises on docker errors — callers (the API,
|
||||
the deploy hook) treat the result as data.
|
||||
"""
|
||||
if not artifact.path:
|
||||
err = "planter requires a non-empty artifact.path"
|
||||
log.warning("canary.plant skipped: %s decky=%s token=%s", err, decky_name, token_uuid)
|
||||
if repo is not None:
|
||||
await repo.update_canary_token_state(token_uuid, "failed", err)
|
||||
return False, err
|
||||
|
||||
target_container = container or _container_for(decky_name)
|
||||
mtime = datetime.now(timezone.utc) + timedelta(seconds=artifact.mtime_offset)
|
||||
success, error = await write_file_to_container(
|
||||
target_container, artifact.path, artifact.content,
|
||||
mode=artifact.mode, mtime=mtime,
|
||||
)
|
||||
|
||||
if repo is not None:
|
||||
if success:
|
||||
await repo.update_canary_token_state(token_uuid, "planted", None)
|
||||
else:
|
||||
await repo.update_canary_token_state(token_uuid, "failed", error)
|
||||
|
||||
if success and publish:
|
||||
await _publish(bus, topics.canary(token_uuid, topics.CANARY_PLACED), {
|
||||
"token_id": token_uuid,
|
||||
"decky_name": decky_name,
|
||||
"placement_path": artifact.path,
|
||||
"instrumenter": artifact.instrumenter,
|
||||
"generator": artifact.generator,
|
||||
})
|
||||
|
||||
if not success:
|
||||
log.warning(
|
||||
"canary.plant failed decky=%s token=%s container=%s err=%r",
|
||||
decky_name, token_uuid, target_container, error,
|
||||
)
|
||||
return success, error
|
||||
|
||||
|
||||
async def revoke(
|
||||
decky_name: str,
|
||||
placement_path: str,
|
||||
*,
|
||||
token_uuid: str,
|
||||
repo: Optional[BaseRepository] = None,
|
||||
publish: bool = True,
|
||||
bus: Optional[BaseBus] = None,
|
||||
container: Optional[str] = None,
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Best-effort unlink + state transition + bus publish.
|
||||
|
||||
Returns ``(success, error_or_none)``. ``success`` is True when
|
||||
the file is gone after the call (whether we deleted it or it was
|
||||
already missing); only docker / container-down errors return False.
|
||||
"""
|
||||
target_container = container or _container_for(decky_name)
|
||||
success, error = await delete_file_from_container(
|
||||
target_container, placement_path,
|
||||
)
|
||||
|
||||
if repo is not None:
|
||||
await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)
|
||||
|
||||
if publish:
|
||||
await _publish(bus, topics.canary(token_uuid, topics.CANARY_REVOKED), {
|
||||
"token_id": token_uuid,
|
||||
"decky_name": decky_name,
|
||||
"placement_path": placement_path,
|
||||
})
|
||||
|
||||
return success, error
|
||||
|
||||
|
||||
def _baseline_set() -> Iterable[str]:
|
||||
"""Return the configured baseline generator names.
|
||||
|
||||
Honors ``DECNET_CANARY_BASELINE`` (comma-separated). Default is
|
||||
a sensible mix that exercises every callback-bearing generator
|
||||
plus a passive aws_creds drop for realism.
|
||||
"""
|
||||
raw = os.environ.get(
|
||||
"DECNET_CANARY_BASELINE",
|
||||
"git_config,env_file,honeydoc,aws_creds",
|
||||
)
|
||||
return [n.strip() for n in raw.split(",") if n.strip()]
|
||||
|
||||
|
||||
def _ctx_for(slug: str) -> CanaryContext:
|
||||
"""Build a :class:`CanaryContext` from the canary worker config."""
|
||||
base = os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088")
|
||||
zone = os.environ.get("DECNET_CANARY_DNS_ZONE", "")
|
||||
return CanaryContext(callback_token=slug, http_base=base, dns_zone=zone)
|
||||
|
||||
|
||||
async def seed_baseline(
|
||||
decky_name: str,
|
||||
repo: BaseRepository,
|
||||
*,
|
||||
persona: str = "linux",
|
||||
created_by: str = "system",
|
||||
bus: Optional[BaseBus] = None,
|
||||
container: Optional[str] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Plant the configured baseline canary set on one decky.
|
||||
|
||||
Best-effort: any individual placement that fails is logged and
|
||||
the row is left in ``state=failed``; the deployer hook treats the
|
||||
return value as informational, not authoritative.
|
||||
|
||||
Returns the list of token rows created (whether their planting
|
||||
ultimately succeeded or not), so the caller can surface them in
|
||||
the deploy report.
|
||||
"""
|
||||
out: list[dict[str, Any]] = []
|
||||
for gen_name in _baseline_set():
|
||||
try:
|
||||
generator = get_generator(gen_name)
|
||||
except ValueError:
|
||||
log.warning("canary.seed_baseline: unknown generator %r — skipping", gen_name)
|
||||
continue
|
||||
slug = token_urlsafe(16)
|
||||
ctx = _ctx_for(slug)
|
||||
artifact = generator.generate(ctx)
|
||||
artifact.path = default_path_for(gen_name, persona)
|
||||
kind = "aws_passive" if gen_name == "aws_creds" else "http"
|
||||
# Persist first so the planter has a row to update; that way a
|
||||
# crash mid-plant leaves a recoverable failed-state row.
|
||||
from uuid import uuid4
|
||||
token_uuid = str(uuid4())
|
||||
await repo.create_canary_token({
|
||||
"uuid": token_uuid,
|
||||
"kind": kind,
|
||||
"decky_name": decky_name,
|
||||
"blob_uuid": None,
|
||||
"instrumenter": None,
|
||||
"generator": gen_name,
|
||||
"placement_path": artifact.path,
|
||||
"callback_token": slug,
|
||||
"secret_seed": slug,
|
||||
"created_by": created_by,
|
||||
"state": "planted", # optimistic — plant() flips to failed on error
|
||||
})
|
||||
await plant(
|
||||
decky_name, artifact,
|
||||
token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
|
||||
container=container,
|
||||
)
|
||||
out.append({
|
||||
"token_uuid": token_uuid, "generator": gen_name, "kind": kind,
|
||||
"callback_token": slug, "placement_path": artifact.path,
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
async def seed_baseline_topology(
|
||||
repo: BaseRepository,
|
||||
topology_id: str,
|
||||
*,
|
||||
created_by: str = "system",
|
||||
bus: Optional[BaseBus] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Plant baseline canaries on every decky in a MazeNET topology.
|
||||
|
||||
Mirrors :func:`seed_baseline` for the topology path. Container name
|
||||
resolution uses :func:`resolve_topology_container` since topology
|
||||
deckies may not have an ssh service — in that case we target the
|
||||
base container instead.
|
||||
|
||||
Best-effort: failures on any single decky are logged inside
|
||||
:func:`plant`; the deploy hook treats the return value as
|
||||
informational. Returns a flat list of per-token dicts (with an added
|
||||
``decky_name`` key) across all deckies.
|
||||
"""
|
||||
from decnet.topology.persistence import hydrate
|
||||
|
||||
hydrated = await hydrate(repo, topology_id)
|
||||
if hydrated is None:
|
||||
log.warning(
|
||||
"canary.seed_baseline_topology: topology %s not found", topology_id,
|
||||
)
|
||||
return []
|
||||
|
||||
out: list[dict[str, Any]] = []
|
||||
for decky in hydrated["deckies"]:
|
||||
cfg = decky.get("decky_config") or {}
|
||||
decky_name = cfg.get("name") or decky.get("name")
|
||||
if not decky_name:
|
||||
continue
|
||||
services = decky.get("services") or []
|
||||
container = resolve_topology_container(topology_id, decky_name, services)
|
||||
# MazeNET deckies don't carry an OS persona today; default to
|
||||
# linux (every base image we ship is Linux).
|
||||
rows = await seed_baseline(
|
||||
decky_name, repo,
|
||||
persona="linux", created_by=created_by, bus=bus,
|
||||
container=container,
|
||||
)
|
||||
for r in rows:
|
||||
r["decky_name"] = decky_name
|
||||
out.append(r)
|
||||
return out
|
||||
@@ -1,90 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Filesystem store for operator-uploaded canary blobs.
|
||||
|
||||
Blobs live under ``/var/lib/decnet/canary/blobs/<sha256>`` (override
|
||||
via ``DECNET_CANARY_BLOB_DIR``) and are deduplicated by content hash.
|
||||
The DB table :class:`decnet.web.db.models.CanaryBlob` mirrors
|
||||
metadata; the bytes are read on demand at instrumentation time, so
|
||||
the API process never holds large operator uploads in memory longer
|
||||
than the request itself.
|
||||
|
||||
Refcount-aware deletion is enforced at the DB layer (see
|
||||
:meth:`decnet.web.db.repository.BaseRepository.delete_canary_blob`);
|
||||
this module only provides write/read/unlink primitives keyed by
|
||||
sha256.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
|
||||
|
||||
def blob_dir() -> Path:
|
||||
"""Return the on-disk root for canary blobs.
|
||||
|
||||
Honors ``DECNET_CANARY_BLOB_DIR`` so tests can point at a tmp
|
||||
path. The directory is created lazily on first write.
|
||||
"""
|
||||
raw = os.environ.get("DECNET_CANARY_BLOB_DIR", "/var/lib/decnet/canary/blobs")
|
||||
return Path(raw)
|
||||
|
||||
|
||||
def _path_for(sha256: str) -> Path:
|
||||
# Two-level fan-out (``ab/cd/abcd...``) keeps any one directory
|
||||
# from accumulating thousands of entries on busy fleets. Same
|
||||
# shape as Git's loose-object store.
|
||||
if len(sha256) < 4:
|
||||
raise ValueError("sha256 must be at least 4 chars")
|
||||
root = blob_dir()
|
||||
return root / sha256[:2] / sha256[2:4] / sha256
|
||||
|
||||
|
||||
def write_blob(content: bytes) -> Tuple[str, Path, int]:
|
||||
"""Persist ``content`` under its sha256 path.
|
||||
|
||||
Idempotent: if the target file already exists with the same
|
||||
bytes, no rewrite happens. Returns ``(sha256, path,
|
||||
size_bytes)``.
|
||||
"""
|
||||
sha = hashlib.sha256(content).hexdigest()
|
||||
target = _path_for(sha)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
if not target.exists():
|
||||
# Atomic-ish: write to a temp sibling and rename. Avoids the
|
||||
# half-written-file race a concurrent reader would otherwise
|
||||
# see if we wrote in place.
|
||||
tmp = target.with_suffix(target.suffix + ".part")
|
||||
tmp.write_bytes(content)
|
||||
os.replace(tmp, target)
|
||||
return sha, target, len(content)
|
||||
|
||||
|
||||
def read_blob(sha256: str) -> bytes:
|
||||
"""Read the bytes for a stored blob.
|
||||
|
||||
Raises :class:`FileNotFoundError` when the on-disk row was unlinked
|
||||
out of band (operator pruned ``/var/lib/decnet`` by hand) — the
|
||||
caller (instrumenter dispatch) surfaces it as a 410-ish error so
|
||||
the operator can re-upload.
|
||||
"""
|
||||
return _path_for(sha256).read_bytes()
|
||||
|
||||
|
||||
def unlink_blob(sha256: str) -> bool:
|
||||
"""Delete the on-disk bytes for ``sha256``.
|
||||
|
||||
Returns True if a file was removed, False if it was already gone.
|
||||
The DB row deletion happens in
|
||||
:meth:`SQLModelRepository.delete_canary_blob`; this function is
|
||||
a best-effort companion called *after* the DB delete commits so
|
||||
a crash between them leaves a recoverable orphan, never a
|
||||
dangling DB reference.
|
||||
"""
|
||||
target = _path_for(sha256)
|
||||
try:
|
||||
target.unlink()
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
return True
|
||||
@@ -1,421 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""``decnet canary`` worker — HTTP + DNS callback receivers.
|
||||
|
||||
Two surfaces, one process:
|
||||
|
||||
* **HTTP** — a tiny FastAPI app on its own port (default 8088). The
|
||||
only useful route is ``GET /c/{slug}`` which looks up the slug in
|
||||
the canary token table, persists a :class:`CanaryTrigger` row,
|
||||
publishes ``canary.<token_id>.triggered`` on the bus, and returns
|
||||
a 1×1 transparent GIF (or 204 if the client's ``Accept`` doesn't
|
||||
list any image type).
|
||||
* **DNS** — an authoritative UDP server (default 5353 if non-root,
|
||||
53 if root) for ``*.<canary_zone>``. Same lookup + persist +
|
||||
publish flow, plus a sinkhole A record so the attacker's resolver
|
||||
doesn't loop on NXDOMAIN.
|
||||
|
||||
Both surfaces are **stealth** by policy
|
||||
(:mod:`feedback_stealth`): no DECNET strings in headers / banners /
|
||||
error pages. The HTTP app strips the default ``Server: uvicorn``
|
||||
header in middleware; FastAPI's docs/openapi UI is disabled because
|
||||
discovering them would tip off the attacker that this is a honeypot.
|
||||
|
||||
The worker is supervised by its own systemd unit
|
||||
(``decnet-canary.service``); like every other DECNET worker, it
|
||||
crashes loudly rather than masking failures.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import binascii
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import FastAPI, Request, Response
|
||||
|
||||
from decnet.bus import topics
|
||||
from decnet.bus.base import BaseBus
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.canary.dns_server import CanaryDNSProtocol, DNSQuery
|
||||
from decnet.logging import get_logger
|
||||
from decnet.web.db.factory import get_repository
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
|
||||
log = get_logger("canary.worker")
|
||||
|
||||
# 1×1 transparent GIF — public-domain canonical bytes. Returning the
|
||||
# same image every time is fine: the body has no information the
|
||||
# attacker shouldn't see, and image clients cache it.
|
||||
_TRANSPARENT_GIF = bytes.fromhex(
|
||||
"47494638396101000100800100000000ffffff21f90401000001002c00000000010001000002024401003b"
|
||||
)
|
||||
|
||||
|
||||
# Namespace used by fingerprint generators to derive mint UUID.
|
||||
# Must stay in sync with fingerprint_html._MINT_NAMESPACE.
|
||||
_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
|
||||
|
||||
# In-memory per-(token_uuid, src_ip) rate limiter for fingerprint persists.
|
||||
# Maps (token_uuid, src_ip) -> list of monotonic timestamps.
|
||||
# Not shared across worker restarts or processes — acceptable for MVP.
|
||||
_FP_RATE_WINDOW_S = 60
|
||||
_FP_RATE_LIMIT = 30
|
||||
_fp_rate_buckets: dict[tuple[str, str], list[float]] = {}
|
||||
|
||||
|
||||
def _fp_rate_allowed(token_uuid: str, src_ip: str) -> bool:
|
||||
key = (token_uuid, src_ip)
|
||||
now = time.monotonic()
|
||||
cutoff = now - _FP_RATE_WINDOW_S
|
||||
bucket = _fp_rate_buckets.get(key, [])
|
||||
bucket = [t for t in bucket if t > cutoff]
|
||||
if len(bucket) >= _FP_RATE_LIMIT:
|
||||
_fp_rate_buckets[key] = bucket
|
||||
return False
|
||||
bucket.append(now)
|
||||
_fp_rate_buckets[key] = bucket
|
||||
return True
|
||||
|
||||
|
||||
def _is_valid_fp_shape(fp: dict) -> bool:
|
||||
"""Layer B — structural sanity check on a decoded fingerprint blob."""
|
||||
if not isinstance(fp.get("mint"), str) or not fp["mint"]:
|
||||
return False
|
||||
known_keys = {"nav", "scr", "tz", "cv", "gl", "au", "ft", "rtc"}
|
||||
present = sum(1 for k in known_keys if isinstance(fp.get(k), dict))
|
||||
return present >= 3
|
||||
|
||||
|
||||
def _http_base() -> str:
|
||||
return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/")
|
||||
|
||||
|
||||
def _dns_zone() -> str:
|
||||
return os.environ.get("DECNET_CANARY_DNS_ZONE", "").strip(".").lower()
|
||||
|
||||
|
||||
def _http_port() -> int:
|
||||
return int(os.environ.get("DECNET_CANARY_HTTP_PORT", "8088"))
|
||||
|
||||
|
||||
def _dns_port() -> int:
|
||||
# Default 5353 (mDNS-ish, non-privileged) — operators pin :53 via
|
||||
# NAT or a CAP_NET_BIND_SERVICE-enabled unit.
|
||||
return int(os.environ.get("DECNET_CANARY_DNS_PORT", "5353"))
|
||||
|
||||
|
||||
def _dns_bind() -> str:
|
||||
return os.environ.get("DECNET_CANARY_DNS_BIND", "0.0.0.0") # nosec B104 — attacker-facing decoy listener, internet exposure is the design
|
||||
|
||||
|
||||
def _http_bind() -> str:
|
||||
return os.environ.get("DECNET_CANARY_HTTP_BIND", "0.0.0.0") # nosec B104 — same rationale
|
||||
|
||||
|
||||
# ---------------------------- HTTP surface --------------------------------
|
||||
|
||||
|
||||
def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
|
||||
"""Construct the FastAPI app.
|
||||
|
||||
Disables docs / openapi / redoc — operators query the canary
|
||||
surface via the *main* DECNET API, never directly. Anyone hitting
|
||||
these paths is either misconfigured or scanning for a honeypot.
|
||||
"""
|
||||
app = FastAPI(
|
||||
title="", # don't leak "DECNET" in OpenAPI
|
||||
docs_url=None, redoc_url=None, openapi_url=None,
|
||||
)
|
||||
|
||||
@app.middleware("http")
|
||||
async def _stealth_headers(request: Request, call_next):
|
||||
response: Response = await call_next(request)
|
||||
# Strip the uvicorn / starlette banner; replace with a
|
||||
# generic Server line that matches what most CDNs return.
|
||||
response.headers["Server"] = "nginx"
|
||||
# Don't leak request id / process id headers.
|
||||
if "x-process-time" in response.headers:
|
||||
del response.headers["x-process-time"]
|
||||
return response
|
||||
|
||||
@app.get("/c/{slug}")
|
||||
async def callback(slug: str, request: Request) -> Response:
|
||||
raw_nonce = request.query_params.get("k")
|
||||
fp_meta, parsed_fp = _extract_fingerprint(request.query_params)
|
||||
merged_headers = dict(request.headers)
|
||||
if fp_meta:
|
||||
merged_headers.update(fp_meta)
|
||||
await _record_hit(
|
||||
repo, bus,
|
||||
slug=slug,
|
||||
src_ip=_client_ip(request),
|
||||
user_agent=request.headers.get("user-agent"),
|
||||
request_path=str(request.url.path),
|
||||
dns_qname=None,
|
||||
raw_headers=merged_headers,
|
||||
parsed_fp=parsed_fp,
|
||||
raw_nonce=raw_nonce,
|
||||
)
|
||||
# Always 200 with a tiny image so the attacker's client sees
|
||||
# a "success" — same return regardless of whether the slug is
|
||||
# known. Stealth: do NOT distinguish unknown vs known via
|
||||
# status code or response body.
|
||||
return Response(content=_TRANSPARENT_GIF, media_type="image/gif")
|
||||
|
||||
@app.get("/")
|
||||
async def root() -> Response:
|
||||
# Bare root returns a generic 404. The decoy posture: pretend
|
||||
# to be an empty static-file host that just happens to resolve
|
||||
# /c/<slug> when it matches.
|
||||
return Response(status_code=404)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
# Per-chunk size cap. Real fingerprints fit in one ~3KB GET; honest
|
||||
# overflow is handled via chunking (s/i/n + d). Anything larger than
|
||||
# this on a single request is junk, so we drop it instead of letting an
|
||||
# attacker inflate a trigger row indefinitely.
|
||||
_FP_CHUNK_MAX = 8 * 1024
|
||||
|
||||
|
||||
def _extract_fingerprint(qp: Any) -> tuple[dict[str, Any], Optional[dict]]:
|
||||
"""Decode fingerprint-payload query params into (meta_dict, parsed_fp).
|
||||
|
||||
The obfuscated browser payload may send three shapes on ``GET /c/<slug>``:
|
||||
|
||||
* ``?o=1`` — bare-open beacon, fired before fingerprinting starts.
|
||||
* ``?d=<b64url-json>`` — single-shot fingerprint dump.
|
||||
* ``?s=<sid>&i=<idx>&n=<total>&d=<b64url-chunk>`` — chunked dump.
|
||||
|
||||
Returns a tuple of:
|
||||
- ``meta`` — flat dict with ``_fp_*`` keys to merge into raw_headers.
|
||||
- ``parsed_fp`` — the decoded fingerprint dict for validation, or ``None``
|
||||
when there's no ``?d=`` or decoding fails.
|
||||
"""
|
||||
out: dict[str, Any] = {}
|
||||
parsed_fp: Optional[dict] = None
|
||||
if not qp:
|
||||
return out, parsed_fp
|
||||
o = qp.get("o") if hasattr(qp, "get") else None
|
||||
if o:
|
||||
out["_fp_open"] = "1"
|
||||
d = qp.get("d") if hasattr(qp, "get") else None
|
||||
if not d:
|
||||
return out, parsed_fp
|
||||
if len(d) > _FP_CHUNK_MAX:
|
||||
out["_fp_oversize"] = "1"
|
||||
return out, parsed_fp
|
||||
|
||||
sid = qp.get("s")
|
||||
idx = qp.get("i")
|
||||
total = qp.get("n")
|
||||
if sid and idx and total:
|
||||
out["_fp_sid"] = sid
|
||||
out["_fp_idx"] = idx
|
||||
out["_fp_total"] = total
|
||||
out["_fp_chunk"] = d
|
||||
return out, parsed_fp
|
||||
|
||||
# Single-shot: decode and pass back as parsed_fp; validation runs in
|
||||
# _record_hit after token lookup so we have the stored nonce at hand.
|
||||
try:
|
||||
padded = d + "=" * (-len(d) % 4)
|
||||
raw = base64.urlsafe_b64decode(padded.encode("ascii"))
|
||||
parsed = json.loads(raw.decode("utf-8"))
|
||||
except (binascii.Error, ValueError, UnicodeDecodeError):
|
||||
out["_fp_decode_error"] = "1"
|
||||
return out, parsed_fp
|
||||
if isinstance(parsed, dict):
|
||||
parsed_fp = parsed
|
||||
else:
|
||||
out["_fp_decode_error"] = "1"
|
||||
return out, parsed_fp
|
||||
|
||||
|
||||
def _client_ip(request: Request) -> str:
|
||||
# Honor X-Forwarded-For if the operator deployed behind a reverse
|
||||
# proxy. Take the leftmost address in the chain; everything after
|
||||
# is upstream-proxy noise.
|
||||
fwd = request.headers.get("x-forwarded-for")
|
||||
if fwd:
|
||||
return fwd.split(",", 1)[0].strip()
|
||||
if request.client:
|
||||
return request.client.host
|
||||
return "0.0.0.0" # nosec B104 — sentinel for "unknown remote"
|
||||
|
||||
|
||||
# ---------------------------- shared persistence -------------------------
|
||||
|
||||
|
||||
async def _record_hit(
|
||||
repo: BaseRepository,
|
||||
bus: BaseBus,
|
||||
*,
|
||||
slug: str,
|
||||
src_ip: str,
|
||||
user_agent: Optional[str],
|
||||
request_path: Optional[str],
|
||||
dns_qname: Optional[str],
|
||||
raw_headers: Optional[dict],
|
||||
parsed_fp: Optional[dict] = None,
|
||||
raw_nonce: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Resolve slug -> token, persist a trigger, publish on the bus.
|
||||
|
||||
Unknown slugs are silently swallowed: returning the same response
|
||||
for known and unknown slugs is the stealth posture, and persisting
|
||||
every random scan would clutter the DB.
|
||||
|
||||
When *parsed_fp* is present (single-shot fingerprint decode succeeded),
|
||||
it is validated through four layers before being merged into raw_headers:
|
||||
A) nonce match against CanaryToken.fingerprint_nonce,
|
||||
B) structural shape check,
|
||||
C) mint UUID consistency,
|
||||
D) per-(token, IP) rate limit.
|
||||
Each failure drops the structured ``_fp`` and sets a ``_fp_*_invalid`` flag.
|
||||
The trigger row always lands regardless — the GET hit is itself forensic.
|
||||
"""
|
||||
token = await repo.get_canary_token_by_slug(slug)
|
||||
if token is None:
|
||||
return
|
||||
|
||||
final_headers: dict[str, Any] = dict(raw_headers or {})
|
||||
|
||||
if parsed_fp is not None:
|
||||
stored_nonce: Optional[str] = token.get("fingerprint_nonce")
|
||||
|
||||
# Layer A — nonce
|
||||
if stored_nonce is not None and raw_nonce != stored_nonce:
|
||||
final_headers["_fp_invalid_nonce"] = "1"
|
||||
parsed_fp = None
|
||||
|
||||
# Layer B — shape (only when nonce passed or no nonce enforced)
|
||||
if parsed_fp is not None and not _is_valid_fp_shape(parsed_fp):
|
||||
final_headers["_fp_invalid_shape"] = "1"
|
||||
parsed_fp = None
|
||||
|
||||
# Layer C — mint UUID consistency
|
||||
if parsed_fp is not None:
|
||||
expected_mint = str(uuid.uuid5(_MINT_NAMESPACE, slug))
|
||||
if parsed_fp.get("mint") != expected_mint:
|
||||
final_headers["_fp_invalid_mint"] = "1"
|
||||
parsed_fp = None
|
||||
|
||||
# Layer D — rate limit
|
||||
if parsed_fp is not None and not _fp_rate_allowed(token["uuid"], src_ip):
|
||||
final_headers["_fp_rate_limited"] = "1"
|
||||
parsed_fp = None
|
||||
|
||||
if parsed_fp is not None:
|
||||
final_headers["_fp"] = parsed_fp
|
||||
|
||||
trigger_id = await repo.record_canary_trigger({
|
||||
"token_uuid": token["uuid"],
|
||||
"occurred_at": datetime.now(timezone.utc),
|
||||
"src_ip": src_ip,
|
||||
"user_agent": user_agent,
|
||||
"request_path": request_path,
|
||||
"dns_qname": dns_qname,
|
||||
"raw_headers": final_headers,
|
||||
})
|
||||
try:
|
||||
await bus.publish(
|
||||
topics.canary(token["uuid"], topics.CANARY_TRIGGERED),
|
||||
{
|
||||
"token_id": token["uuid"],
|
||||
"trigger_id": trigger_id,
|
||||
"decky_name": token["decky_name"],
|
||||
"src_ip": src_ip,
|
||||
"user_agent": user_agent,
|
||||
"request_path": request_path,
|
||||
"dns_qname": dns_qname,
|
||||
},
|
||||
)
|
||||
except Exception as e: # noqa: BLE001 — best effort
|
||||
log.warning("canary.triggered publish failed slug=%s err=%s", slug, e)
|
||||
|
||||
# Auto-deregister fingerprint canaries after the first valid fingerprint
|
||||
# is collected. Slug goes dark; the stealth posture means the attacker
|
||||
# sees the same 200 + GIF on the next hit — nothing reveals the revocation.
|
||||
# Guard: only fingerprint tokens have a non-NULL fingerprint_nonce; plain
|
||||
# http/dns canaries are NOT auto-revoked.
|
||||
if parsed_fp is not None and token.get("fingerprint_nonce") is not None:
|
||||
try:
|
||||
await repo.update_canary_token_state(token["uuid"], "revoked")
|
||||
await bus.publish(
|
||||
topics.canary(token["uuid"], topics.CANARY_REVOKED),
|
||||
{"token_id": token["uuid"], "trigger_id": trigger_id,
|
||||
"reason": "fingerprint_collected"},
|
||||
)
|
||||
except Exception as e: # noqa: BLE001 — trigger row already landed; best effort
|
||||
log.warning("canary.deregister failed token=%s err=%s", token["uuid"], e)
|
||||
|
||||
|
||||
# ---------------------------- DNS surface --------------------------------
|
||||
|
||||
|
||||
async def _start_dns_server(
|
||||
repo: BaseRepository, bus: BaseBus, *, loop: asyncio.AbstractEventLoop,
|
||||
) -> Optional[asyncio.DatagramTransport]:
|
||||
zone = _dns_zone()
|
||||
if not zone:
|
||||
log.info("canary.dns disabled (DECNET_CANARY_DNS_ZONE unset)")
|
||||
return None
|
||||
|
||||
async def _hook(slug: str, query: DNSQuery, src_ip: str) -> None:
|
||||
await _record_hit(
|
||||
repo, bus,
|
||||
slug=slug, src_ip=src_ip, user_agent=None,
|
||||
request_path=None, dns_qname=query.qname,
|
||||
raw_headers=None,
|
||||
)
|
||||
|
||||
transport, _proto = await loop.create_datagram_endpoint(
|
||||
lambda: CanaryDNSProtocol(zone, _hook),
|
||||
local_addr=(_dns_bind(), _dns_port()),
|
||||
)
|
||||
log.info("canary.dns listening zone=%s port=%d", zone, _dns_port())
|
||||
return transport
|
||||
|
||||
|
||||
# ---------------------------- entry point --------------------------------
|
||||
|
||||
|
||||
async def run() -> None:
|
||||
"""Worker entry point — kicked off by ``decnet canary``."""
|
||||
import uvicorn
|
||||
|
||||
repo = get_repository()
|
||||
await repo.initialize()
|
||||
bus = get_bus()
|
||||
await bus.connect()
|
||||
|
||||
app = _build_app(repo, bus)
|
||||
config = uvicorn.Config(
|
||||
app,
|
||||
host=_http_bind(),
|
||||
port=_http_port(),
|
||||
log_level="warning",
|
||||
access_log=False, # stealth: no per-request lines
|
||||
server_header=False, # we set Server: nginx in middleware
|
||||
)
|
||||
server = uvicorn.Server(config)
|
||||
loop = asyncio.get_running_loop()
|
||||
dns_transport = await _start_dns_server(repo, bus, loop=loop)
|
||||
try:
|
||||
await server.serve()
|
||||
finally:
|
||||
if dns_transport is not None:
|
||||
dns_transport.close()
|
||||
await bus.close()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""CLI entry point — synchronous wrapper for ``asyncio.run``."""
|
||||
asyncio.run(run())
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
DECNET CLI — entry point for all commands.
|
||||
|
||||
@@ -22,28 +21,18 @@ import typer
|
||||
from . import (
|
||||
agent,
|
||||
api,
|
||||
bus,
|
||||
canary,
|
||||
db,
|
||||
deploy,
|
||||
forwarder,
|
||||
geoip,
|
||||
init,
|
||||
inventory,
|
||||
lifecycle,
|
||||
listener,
|
||||
orchestrator,
|
||||
profiler,
|
||||
realism,
|
||||
reconciler,
|
||||
sniffer,
|
||||
swarm,
|
||||
swarmctl,
|
||||
topology,
|
||||
ttp,
|
||||
updater,
|
||||
web,
|
||||
webhook,
|
||||
workers,
|
||||
)
|
||||
from .gating import _gate_commands_by_mode
|
||||
@@ -60,8 +49,7 @@ for _mod in (
|
||||
api, swarmctl, agent, updater, listener, forwarder,
|
||||
swarm,
|
||||
deploy, lifecycle, workers, inventory,
|
||||
web, profiler, orchestrator, realism, reconciler, sniffer, db,
|
||||
topology, bus, geoip, init, webhook, canary, ttp,
|
||||
web, profiler, sniffer, db,
|
||||
):
|
||||
_mod.register(app)
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
@@ -30,7 +29,7 @@ def register(app: typer.Typer) -> None:
|
||||
with `decnet forwarder --daemon …`. Pass --no-forwarder to skip.
|
||||
"""
|
||||
from decnet.agent import server as _agent_server
|
||||
from decnet.env import DECNET_SWARM_MASTER_HOST, DECNET_AGENT_LOG_FILE
|
||||
from decnet.env import DECNET_SWARM_MASTER_HOST, DECNET_INGEST_LOG_FILE
|
||||
from decnet.swarm import pki as _pki
|
||||
|
||||
resolved_dir = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
|
||||
@@ -45,7 +44,7 @@ def register(app: typer.Typer) -> None:
|
||||
"--master-host", DECNET_SWARM_MASTER_HOST,
|
||||
"--master-port", str(int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))),
|
||||
"--agent-dir", str(resolved_dir),
|
||||
"--log-file", str(DECNET_AGENT_LOG_FILE),
|
||||
"--log-file", str(DECNET_INGEST_LOG_FILE),
|
||||
"--daemon",
|
||||
]
|
||||
try:
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="bus")
|
||||
def bus_cmd(
|
||||
socket_path: str = typer.Option(
|
||||
None, "--socket", "-s",
|
||||
help="UNIX socket path (defaults to DECNET_BUS_SOCKET env var, "
|
||||
"then /run/decnet/bus.sock, then ~/.decnet/bus.sock).",
|
||||
),
|
||||
group: str = typer.Option(
|
||||
"decnet", "--group", "-g",
|
||||
help="POSIX group to chown the socket to (falls back to process "
|
||||
"group if the named group does not exist).",
|
||||
),
|
||||
heartbeat: int = typer.Option(
|
||||
10, "--heartbeat", "-H",
|
||||
help="Seconds between system.bus.health heartbeat events.",
|
||||
),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process."),
|
||||
) -> None:
|
||||
"""Run the DECNET ServiceBus worker (host-local UNIX-socket pub/sub)."""
|
||||
import asyncio
|
||||
from decnet.bus.factory import _default_socket_path
|
||||
from decnet.bus.worker import bus_worker
|
||||
|
||||
resolved = socket_path or _default_socket_path()
|
||||
|
||||
if daemon:
|
||||
log.info("bus daemonizing socket=%s", resolved)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("bus starting socket=%s group=%s heartbeat=%ds", resolved, group, heartbeat)
|
||||
console.print(f"[bold cyan]Bus starting[/] (socket: {resolved}, heartbeat: {heartbeat}s)")
|
||||
|
||||
try:
|
||||
asyncio.run(bus_worker(resolved, group=group, heartbeat_interval=heartbeat))
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Bus stopped.[/]")
|
||||
@@ -1,104 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""``decnet canary`` — HTTP + DNS callback receiver for canary tokens.
|
||||
|
||||
Two entry points share this module:
|
||||
|
||||
* ``decnet canary`` — runs the worker process. Mirrors the shape of
|
||||
:mod:`decnet.cli.webhook`. Invoked by the ``decnet-canary.service``
|
||||
systemd unit so its argv must stay stable.
|
||||
* ``decnet canary-install-toolchain`` — provisions the Node side of
|
||||
the fingerprint-canary obfuscator. Idempotent; safe to call from
|
||||
the API service unit's ``ExecStartPre``.
|
||||
|
||||
Not master-only — any host that hosts deckies can run its own
|
||||
canary worker (the bus events stay local; the webhook worker on
|
||||
each host fans them out to SIEMs independently per the design
|
||||
in ``development/let-s-move-to-the-enumerated-pike.md``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess # nosec B404 — npm exec is the whole point of the toolchain installer
|
||||
from pathlib import Path
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
_TOOLCHAIN_TIMEOUT_S = 180
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="canary")
|
||||
def canary_cmd(
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d", help="Detach to background as a daemon process",
|
||||
),
|
||||
) -> None:
|
||||
"""Run the canary HTTP + DNS callback receiver."""
|
||||
import asyncio
|
||||
|
||||
from decnet.canary.worker import run
|
||||
|
||||
if daemon:
|
||||
log.info("canary daemonizing")
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("canary starting")
|
||||
console.print("[bold cyan]Canary callback receiver starting[/]")
|
||||
|
||||
try:
|
||||
asyncio.run(run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Canary worker stopped.[/]")
|
||||
|
||||
@app.command(name="canary-install-toolchain")
|
||||
def canary_install_toolchain(
|
||||
npm_bin: str = typer.Option(
|
||||
"npm", "--npm-bin", help="Path to the npm executable. Defaults to PATH lookup.",
|
||||
),
|
||||
) -> None:
|
||||
"""Install the Node-side toolchain used by fingerprint canaries.
|
||||
|
||||
Runs ``npm install --omit=dev`` under the installed ``decnet/canary/``
|
||||
directory so the obfuscator's helper script can ``require()``
|
||||
``javascript-obfuscator`` at mint time. Requires Node >= 18.
|
||||
|
||||
Idempotent: re-running on an already-installed tree is fast
|
||||
(npm short-circuits when ``node_modules/`` is up-to-date).
|
||||
"""
|
||||
import decnet.canary as _canary_pkg
|
||||
canary_dir = Path(_canary_pkg.__file__).resolve().parent
|
||||
if not (canary_dir / "package.json").is_file():
|
||||
console.print(
|
||||
f"[red]canary package.json not found under {canary_dir}; "
|
||||
"wheel may be missing the JS toolchain payload.[/]"
|
||||
)
|
||||
raise typer.Exit(code=2)
|
||||
if shutil.which(npm_bin) is None:
|
||||
console.print(
|
||||
f"[red]npm executable {npm_bin!r} not found on PATH. "
|
||||
"Install Node >= 18 and re-run.[/]"
|
||||
)
|
||||
raise typer.Exit(code=2)
|
||||
console.print(
|
||||
f"[cyan]installing canary toolchain[/] in {canary_dir}",
|
||||
)
|
||||
try:
|
||||
proc = subprocess.run( # nosec B603 — argv-form, no shell, fixed cwd, npm_bin checked above
|
||||
[npm_bin, "install", "--omit=dev", "--no-fund", "--no-audit"],
|
||||
cwd=str(canary_dir),
|
||||
capture_output=True, text=True,
|
||||
timeout=_TOOLCHAIN_TIMEOUT_S, check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
console.print("[red]npm install timed out after 3 minutes[/]")
|
||||
raise typer.Exit(code=3) from None
|
||||
if proc.returncode != 0:
|
||||
console.print(
|
||||
f"[red]npm install failed rc={proc.returncode}[/]\n"
|
||||
f"{proc.stderr.strip()}"
|
||||
)
|
||||
raise typer.Exit(code=proc.returncode)
|
||||
console.print("[green]canary toolchain ready[/]")
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
@@ -9,29 +8,19 @@ from rich.table import Table
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def _decnet_tables() -> tuple[str, ...]:
|
||||
"""Every DECNET-managed table, ordered child-first for DROP safety.
|
||||
|
||||
Source is ``SQLModel.metadata.sorted_tables`` — the same registry that
|
||||
drives ``create_all`` — so adding a new model automatically enrolls
|
||||
its table in ``db-reset`` with no manual step. (Previous hardcoded
|
||||
list drifted multiple times; ``webhook_subscriptions`` /
|
||||
``session_profile`` / ``smtp_targets`` all got missed.)
|
||||
|
||||
``sorted_tables`` returns parent-first (topological order that makes
|
||||
``CREATE`` safe). For ``DROP`` we need the reverse: children first,
|
||||
so FK constraints drop before their parents. ``SET FOREIGN_KEY_CHECKS
|
||||
= 0`` below makes this order-insensitive for MySQL, but the reverse
|
||||
order keeps the code honest for any backend that doesn't support
|
||||
disabling the FK check.
|
||||
"""
|
||||
from sqlmodel import SQLModel
|
||||
# Importing the models package registers every table on SQLModel.metadata.
|
||||
import decnet.web.db.models # noqa: F401
|
||||
|
||||
return tuple(
|
||||
t.name for t in reversed(SQLModel.metadata.sorted_tables)
|
||||
)
|
||||
_DB_RESET_TABLES: tuple[str, ...] = (
|
||||
# Order matters for DROP TABLE: child FKs first.
|
||||
# - attacker_behavior FK-references attackers.
|
||||
# - decky_shards FK-references swarm_hosts.
|
||||
"attacker_behavior",
|
||||
"attackers",
|
||||
"logs",
|
||||
"bounty",
|
||||
"state",
|
||||
"users",
|
||||
"decky_shards",
|
||||
"swarm_hosts",
|
||||
)
|
||||
|
||||
|
||||
async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None:
|
||||
@@ -43,11 +32,10 @@ async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None:
|
||||
|
||||
db_name = urlparse(dsn).path.lstrip("/") or "(default)"
|
||||
engine = create_async_engine(dsn)
|
||||
tables = _decnet_tables()
|
||||
try:
|
||||
rows: dict[str, int] = {}
|
||||
async with engine.connect() as conn:
|
||||
for tbl in tables:
|
||||
for tbl in _DB_RESET_TABLES:
|
||||
try:
|
||||
result = await conn.execute(text(f"SELECT COUNT(*) FROM `{tbl}`")) # nosec B608
|
||||
rows[tbl] = result.scalar() or 0
|
||||
@@ -70,7 +58,7 @@ async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None:
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0"))
|
||||
for tbl in tables:
|
||||
for tbl in _DB_RESET_TABLES:
|
||||
if rows.get(tbl, -1) < 0:
|
||||
continue
|
||||
if mode == "truncate":
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Role-based CLI gating.
|
||||
|
||||
MAINTAINERS: when you add a new Typer command (or add_typer group) that is
|
||||
@@ -30,15 +29,9 @@ MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
|
||||
"api", "swarmctl", "deploy", "redeploy", "teardown",
|
||||
"mutate", "listener", "profiler",
|
||||
"services", "distros", "correlate", "archetypes", "web",
|
||||
"db-reset", "init", "webhook", "clusterer", "campaign-clusterer",
|
||||
# `ttp` runs on agents — local SMTP decoys persist .eml files into the
|
||||
# agent's artifacts tree and the EmailLifter disk-reaches them in-process
|
||||
# (DEBT-047). `ttp-backfill` stays master-only: it walks the master DB.
|
||||
"ttp-backfill",
|
||||
"db-reset",
|
||||
})
|
||||
MASTER_ONLY_GROUPS: frozenset[str] = frozenset(
|
||||
{"swarm", "topology", "geoip", "realism"}
|
||||
)
|
||||
MASTER_ONLY_GROUPS: frozenset[str] = frozenset({"swarm"})
|
||||
|
||||
|
||||
def _agent_mode_active() -> bool:
|
||||
@@ -70,7 +63,7 @@ def _gate_commands_by_mode(_app: typer.Typer) -> None:
|
||||
return
|
||||
_app.registered_commands = [
|
||||
c for c in _app.registered_commands
|
||||
if (c.name or (c.callback.__name__ if c.callback else "")) not in MASTER_ONLY_COMMANDS
|
||||
if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS
|
||||
]
|
||||
_app.registered_groups = [
|
||||
g for g in _app.registered_groups
|
||||
|
||||
@@ -1,60 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""GeoIP CLI — refresh and lookup subcommands (master-only).
|
||||
|
||||
Usage::
|
||||
|
||||
decnet geoip refresh # re-download RIR files and rebuild the index
|
||||
decnet geoip lookup 8.8.8.8 # one-shot IP -> country dump
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
_group = typer.Typer(
|
||||
name="geoip",
|
||||
help="GeoIP provider management (master only).",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
|
||||
|
||||
@_group.command("refresh")
|
||||
def _refresh() -> None:
|
||||
"""Force re-download of the GeoIP provider data and rebuild the index."""
|
||||
_require_master_mode("geoip refresh")
|
||||
from decnet.geoip import get_lookup
|
||||
from decnet.geoip.factory import get_provider
|
||||
|
||||
provider = get_provider()
|
||||
log.info("geoip: forcing refresh via %s provider", provider.name)
|
||||
console.print(f"[bold cyan]Refreshing {provider.name} GeoIP data…[/]")
|
||||
try:
|
||||
lookup = get_lookup(force_refresh=True)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f"[red]refresh failed: {exc}[/]")
|
||||
raise typer.Exit(1) from exc
|
||||
console.print(
|
||||
f"[green]OK[/] {provider.name} index rebuilt "
|
||||
f"({len(lookup)} ranges)."
|
||||
)
|
||||
|
||||
|
||||
@_group.command("lookup")
|
||||
def _lookup(
|
||||
ip: str = typer.Argument(..., help="IP address to resolve."),
|
||||
) -> None:
|
||||
"""Print the country code for an IP (or 'unknown')."""
|
||||
_require_master_mode("geoip lookup")
|
||||
from decnet.geoip import enrich_ip
|
||||
|
||||
cc, source = enrich_ip(ip)
|
||||
if cc is None:
|
||||
console.print(f"{ip} [yellow]unknown[/]")
|
||||
raise typer.Exit(0)
|
||||
console.print(f"{ip} [green]cc={cc}[/] source={source}")
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
app.add_typer(_group, name="geoip")
|
||||
@@ -1,864 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
`decnet init` — one-shot master-host bootstrap.
|
||||
|
||||
Idempotent: running it twice is a no-op on already-configured items.
|
||||
Takes a freshly ``pip install``'d DECNET and turns it into a ready-to-
|
||||
run master host: creates the ``decnet`` system user/group, installs
|
||||
the systemd units + polkit rule + tmpfiles.d entry, seeds the
|
||||
directory layout, drops a placeholder config, and starts the
|
||||
``decnet.target`` grouping unit.
|
||||
|
||||
Requires root. Uses ``subprocess.run`` (never ``shell=True``) for every
|
||||
privileged call so the full argv surface is auditable.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import grp
|
||||
import hashlib
|
||||
import os
|
||||
import pwd
|
||||
import shutil
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
import typer
|
||||
from jinja2 import Environment, FileSystemLoader, StrictUndefined
|
||||
|
||||
import decnet as _decnet_pkg
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
_CONFIG_PLACEHOLDER = """\
|
||||
# /etc/decnet/decnet.ini — DECNET host config.
|
||||
#
|
||||
# Every key is OPTIONAL. Absent keys fall through to env-var defaults
|
||||
# defined in decnet/env.py. Real env vars always win over this file
|
||||
# (precedence: env > INI > default), so systemd EnvironmentFile= and
|
||||
# one-off `DECNET_FOO=bar decnet ...` invocations always take effect.
|
||||
#
|
||||
# Secrets (JWT, admin password, DB password) intentionally DO NOT
|
||||
# live here. Put them in /opt/decnet/.env.local or the systemd
|
||||
# EnvironmentFile= — never in a group-readable INI.
|
||||
|
||||
[decnet]
|
||||
# DECNET-service user/group as configured at `decnet init` time.
|
||||
# Resolved to a uid/gid on each host at deploy time via pwd.getpwnam,
|
||||
# so the same user name can have different numeric uids on master vs
|
||||
# agents without breaking artifact ownership.
|
||||
api-user = {api_user}
|
||||
api-group = {api_group}
|
||||
# mode = master # or "agent"
|
||||
|
||||
# [api]
|
||||
# host = 127.0.0.1
|
||||
# port = 8000
|
||||
|
||||
# [web]
|
||||
# host = 127.0.0.1
|
||||
# port = 8080
|
||||
# admin-user = admin
|
||||
# cors-origins = http://localhost:8080 # comma-separated
|
||||
|
||||
# [database]
|
||||
# type = sqlite # or "mysql"
|
||||
# url = mysql+asyncmy://user@host:3306/decnet # if set, wins over host/port/name/user
|
||||
# host = localhost
|
||||
# port = 3306
|
||||
# name = decnet
|
||||
# user = decnet
|
||||
|
||||
# [bus]
|
||||
# enabled = true
|
||||
# type = unix # or "fake"
|
||||
# socket = /run/decnet/bus.sock
|
||||
# group = decnet
|
||||
|
||||
# [swarm]
|
||||
# master-host = 10.0.0.1
|
||||
# syslog-port = 6514
|
||||
# swarmctl-port = 8770
|
||||
# swarmctl-host = 127.0.0.1
|
||||
|
||||
# [logging]
|
||||
# system-log = /var/log/decnet/decnet.system.log
|
||||
# ingest-log = /var/log/decnet/decnet.log
|
||||
# agent-log = /var/log/decnet/agent.log
|
||||
|
||||
# [ingester]
|
||||
# batch-size = 100
|
||||
# batch-max-wait-ms = 250
|
||||
|
||||
# [tracing]
|
||||
# enabled = false
|
||||
# otel-endpoint = http://localhost:4317
|
||||
|
||||
# [agent]
|
||||
# Managed by the enroll bundle — do NOT edit by hand on an agent host.
|
||||
"""
|
||||
|
||||
|
||||
def _deploy_root() -> Path:
|
||||
"""Resolve the on-disk ``deploy/`` directory of the installed package.
|
||||
|
||||
Editable install (``pip install -e .``): sibling of the ``decnet``
|
||||
package at repo root. Wheel installs aren't supported yet — the
|
||||
error message tells the operator to use an editable install.
|
||||
"""
|
||||
root = Path(_decnet_pkg.__file__).resolve().parent.parent / "deploy"
|
||||
if not (root / "decnet.target").is_file():
|
||||
raise RuntimeError(
|
||||
f"cannot locate deploy/ directory (looked at {root}); "
|
||||
"are you on a wheel install that didn't bundle deploy/? "
|
||||
"use `pip install -e .` from a git checkout"
|
||||
)
|
||||
return root
|
||||
|
||||
|
||||
def _sha256(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
h.update(path.read_bytes())
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def _run(argv: List[str], *, dry_run: bool) -> None:
|
||||
if dry_run:
|
||||
console.print(f" [dim]would run:[/] {' '.join(argv)}")
|
||||
return
|
||||
log.info("init: exec %s", argv)
|
||||
subprocess.run(argv, check=True) # nosec B603
|
||||
|
||||
|
||||
def _step(label: str, action: Callable[[], str]) -> bool:
|
||||
"""Run ``action``, print a checklist line.
|
||||
|
||||
The callable returns the human-readable outcome verb:
|
||||
``"ok"`` → ``[ OK ] <label>``,
|
||||
``"skip: <reason>"`` → ``[SKIP] <label> (<reason>)``.
|
||||
Any exception becomes ``[FAIL] <label>: <err>`` and re-raises.
|
||||
"""
|
||||
try:
|
||||
result = action()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f"[red][FAIL][/] {label}: {exc}")
|
||||
raise
|
||||
if result.startswith("skip:"):
|
||||
reason = result[len("skip:") :].strip()
|
||||
console.print(f"[yellow][SKIP][/] {label} ({reason})")
|
||||
else:
|
||||
console.print(f"[green][ OK ][/] {label}")
|
||||
return True
|
||||
|
||||
|
||||
def _ensure_group(group: str, *, dry_run: bool) -> str:
|
||||
try:
|
||||
grp.getgrnam(group)
|
||||
return f"skip: group {group} already exists"
|
||||
except KeyError:
|
||||
_run(["groupadd", "--system", group], dry_run=dry_run)
|
||||
return "ok"
|
||||
|
||||
|
||||
def _ensure_user(user: str, group: str, install_dir: str, *, dry_run: bool) -> str:
|
||||
try:
|
||||
pwd.getpwnam(user)
|
||||
return f"skip: user {user} already exists"
|
||||
except KeyError:
|
||||
_run(
|
||||
[
|
||||
"useradd", "--system",
|
||||
"--gid", group,
|
||||
"--home-dir", install_dir,
|
||||
"--shell", "/usr/sbin/nologin",
|
||||
"--comment", "DECNET honeypot",
|
||||
user,
|
||||
],
|
||||
dry_run=dry_run,
|
||||
)
|
||||
return "ok"
|
||||
|
||||
|
||||
def _ensure_dir(
|
||||
path: Path, *, mode: int, owner: str, group: str, dry_run: bool
|
||||
) -> str:
|
||||
existed = path.exists()
|
||||
if dry_run:
|
||||
console.print(
|
||||
f" [dim]would ensure dir:[/] {path} (mode={oct(mode)}, "
|
||||
f"owner={owner}:{group})"
|
||||
)
|
||||
return "skip: dry-run" if existed else "ok"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
os.chmod(path, mode)
|
||||
uid = pwd.getpwnam(owner).pw_uid
|
||||
gid = grp.getgrnam(group).gr_gid
|
||||
os.chown(path, uid, gid)
|
||||
except (KeyError, PermissionError):
|
||||
# owner/group not yet created, or we're not root (--prefix tests).
|
||||
# mkdir is the load-bearing part; perm bits come back on the real
|
||||
# root run.
|
||||
pass
|
||||
return f"skip: {path} already present" if existed else "ok"
|
||||
|
||||
|
||||
def _ensure_config(
|
||||
path: Path, group: str, *, user: str, dry_run: bool,
|
||||
) -> str:
|
||||
if path.exists():
|
||||
return f"skip: {path} already present"
|
||||
if dry_run:
|
||||
console.print(f" [dim]would write:[/] {path}")
|
||||
return "ok"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
rendered = _CONFIG_PLACEHOLDER.format(api_user=user, api_group=group)
|
||||
path.write_text(rendered)
|
||||
try:
|
||||
os.chmod(path, 0o640)
|
||||
gid = grp.getgrnam(group).gr_gid
|
||||
os.chown(path, 0, gid)
|
||||
except (KeyError, PermissionError):
|
||||
pass
|
||||
return "ok"
|
||||
|
||||
|
||||
def _copy_if_changed(
|
||||
src: Path, dst: Path, *, mode: int, force: bool, dry_run: bool
|
||||
) -> str:
|
||||
if dst.exists() and not force and _sha256(src) == _sha256(dst):
|
||||
return f"skip: {dst} up to date"
|
||||
if dry_run:
|
||||
console.print(f" [dim]would install:[/] {src} -> {dst} (mode={oct(mode)})")
|
||||
return "ok"
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src, dst)
|
||||
try:
|
||||
os.chmod(dst, mode)
|
||||
os.chown(dst, 0, 0)
|
||||
except PermissionError:
|
||||
pass
|
||||
return "ok"
|
||||
|
||||
|
||||
def _render_template(src: Path, context: dict[str, str]) -> str:
|
||||
"""Render a Jinja2 .j2 template with the given context.
|
||||
|
||||
StrictUndefined: a missing context variable is an error, not a
|
||||
silent empty-string substitution — that way a typo in the template
|
||||
fails loudly instead of shipping a broken systemd unit.
|
||||
"""
|
||||
env = Environment(
|
||||
loader=FileSystemLoader(str(src.parent)),
|
||||
undefined=StrictUndefined,
|
||||
keep_trailing_newline=True,
|
||||
autoescape=False, # nosec B701 — rendering systemd INI, not HTML
|
||||
)
|
||||
template = env.get_template(src.name)
|
||||
return template.render(**context)
|
||||
|
||||
|
||||
def _write_rendered_if_changed(
|
||||
src: Path, dst: Path, rendered: str, *, mode: int, force: bool, dry_run: bool
|
||||
) -> str:
|
||||
"""Write *rendered* content to *dst* only if it differs from what's there.
|
||||
|
||||
SHA compares rendered-output ↔ on-disk bytes (NOT source-template ↔
|
||||
on-disk) so operators who customise their install_dir get idempotent
|
||||
re-runs instead of every ``decnet init`` rewriting files.
|
||||
"""
|
||||
rendered_bytes = rendered.encode("utf-8")
|
||||
if dst.exists() and not force:
|
||||
if hashlib.sha256(dst.read_bytes()).hexdigest() == hashlib.sha256(rendered_bytes).hexdigest():
|
||||
return f"skip: {dst} up to date"
|
||||
if dry_run:
|
||||
console.print(f" [dim]would render:[/] {src} -> {dst} (mode={oct(mode)})")
|
||||
return "ok"
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
dst.write_bytes(rendered_bytes)
|
||||
try:
|
||||
os.chmod(dst, mode)
|
||||
os.chown(dst, 0, 0)
|
||||
except PermissionError:
|
||||
pass
|
||||
return "ok"
|
||||
|
||||
|
||||
def _resolve_venv_dir(install_dir: str, explicit: str | None) -> str:
|
||||
"""Pick the virtualenv systemd units should ExecStart out of.
|
||||
|
||||
Priority:
|
||||
1. ``--venv-dir`` flag (explicit; absolute path required).
|
||||
2. ``VIRTUAL_ENV`` env var, but only when it lives under
|
||||
``install_dir`` (refuse to bake /home/user/.venv into a system
|
||||
service — that directory is user-owned and may vanish).
|
||||
3. ``{install_dir}/venv`` — what ``enroll_bootstrap.sh`` creates
|
||||
on fresh agents; the production default.
|
||||
4. First hit from a short list of dev-box conventions under
|
||||
``install_dir``: ``.venv``, ``.311``, ``.312``, ``.313``.
|
||||
|
||||
Raises RuntimeError with an operator-friendly message if none of
|
||||
those resolve to a directory containing ``bin/decnet``. Failing loud
|
||||
at init time beats systemd spamming journalctl with
|
||||
'Failed at step EXEC spawning .../venv/bin/decnet: No such file or
|
||||
directory' on every auto-restart.
|
||||
"""
|
||||
install_path = Path(install_dir)
|
||||
|
||||
candidates: list[Path] = []
|
||||
if explicit:
|
||||
if not explicit.startswith("/"):
|
||||
raise RuntimeError(
|
||||
f"--venv-dir must be an absolute path, got {explicit!r}"
|
||||
)
|
||||
candidates.append(Path(explicit))
|
||||
else:
|
||||
virtual_env = os.environ.get("VIRTUAL_ENV")
|
||||
if virtual_env:
|
||||
ve_path = Path(virtual_env)
|
||||
try:
|
||||
ve_path.relative_to(install_path)
|
||||
candidates.append(ve_path)
|
||||
except ValueError:
|
||||
# VIRTUAL_ENV lives outside install_dir — don't bake a
|
||||
# user-home venv into a root-owned systemd unit.
|
||||
pass
|
||||
candidates.append(install_path / "venv")
|
||||
for name in (".venv", ".311", ".312", ".313"):
|
||||
candidates.append(install_path / name)
|
||||
|
||||
for cand in candidates:
|
||||
if (cand / "bin" / "decnet").is_file():
|
||||
return str(cand)
|
||||
|
||||
searched = ", ".join(str(c) for c in candidates)
|
||||
raise RuntimeError(
|
||||
"Could not find a DECNET venv. Create one first (e.g. "
|
||||
f"`python -m venv {install_path}/venv && "
|
||||
f"{install_path}/venv/bin/pip install -e {install_path}[dev]`) "
|
||||
"or pass --venv-dir. Searched: " + searched
|
||||
)
|
||||
|
||||
|
||||
def _install_units(
|
||||
deploy: Path,
|
||||
systemd_dir: Path,
|
||||
*,
|
||||
install_dir: str,
|
||||
venv_dir: str,
|
||||
user: str,
|
||||
group: str,
|
||||
force: bool,
|
||||
dry_run: bool,
|
||||
) -> str:
|
||||
"""Render decnet-*.service.j2 → systemd_dir/decnet-*.service, and copy
|
||||
the static decnet.target (no templating needed — it has no install
|
||||
path references)."""
|
||||
context = {
|
||||
"install_dir": install_dir,
|
||||
"venv_dir": venv_dir,
|
||||
"user": user,
|
||||
"group": group,
|
||||
}
|
||||
templates = sorted(deploy.glob("decnet-*.service.j2"))
|
||||
static = [deploy / "decnet.target"]
|
||||
|
||||
touched = 0
|
||||
for src in templates:
|
||||
rendered = _render_template(src, context)
|
||||
# decnet-api.service.j2 → decnet-api.service
|
||||
dst_name = src.name[: -len(".j2")]
|
||||
result = _write_rendered_if_changed(
|
||||
src, systemd_dir / dst_name, rendered,
|
||||
mode=0o644, force=force, dry_run=dry_run,
|
||||
)
|
||||
if not result.startswith("skip:"):
|
||||
touched += 1
|
||||
for src in static:
|
||||
result = _copy_if_changed(
|
||||
src, systemd_dir / src.name,
|
||||
mode=0o644, force=force, dry_run=dry_run,
|
||||
)
|
||||
if not result.startswith("skip:"):
|
||||
touched += 1
|
||||
total = len(templates) + len(static)
|
||||
if touched == 0:
|
||||
return f"skip: {total} unit files up to date"
|
||||
return f"ok ({touched}/{total} installed)"
|
||||
|
||||
|
||||
def _install_polkit(
|
||||
deploy: Path, rules_dir: Path, *, group: str, force: bool, dry_run: bool
|
||||
) -> str:
|
||||
"""Render the group-scoped polkit rule to /etc/polkit-1/rules.d/.
|
||||
|
||||
The rule has to reference the same POSIX group passed via --group —
|
||||
otherwise the API (running as that user) can't
|
||||
systemctl start/stop decnet-*.service without an interactive auth
|
||||
prompt that never gets answered in a daemon context.
|
||||
"""
|
||||
src = deploy / "polkit" / "50-decnet-workers.rules.j2"
|
||||
if not src.is_file():
|
||||
raise RuntimeError(f"missing polkit rule template at {src}")
|
||||
rendered = _render_template(src, {"group": group})
|
||||
# 50-decnet-workers.rules.j2 → 50-decnet-workers.rules
|
||||
dst_name = src.name[: -len(".j2")]
|
||||
return _write_rendered_if_changed(
|
||||
src, rules_dir / dst_name, rendered,
|
||||
mode=0o644, force=force, dry_run=dry_run,
|
||||
)
|
||||
|
||||
|
||||
def _run_allow_fail(argv: List[str], *, dry_run: bool) -> str:
|
||||
"""Like ``_run`` but tolerates non-zero exits (stop/disable on an
|
||||
already-absent unit is fine during deinit)."""
|
||||
if dry_run:
|
||||
console.print(f" [dim]would run (allow fail):[/] {' '.join(argv)}")
|
||||
return "ok"
|
||||
log.info("init: exec (allow fail) %s", argv)
|
||||
result = subprocess.run(argv, check=False) # nosec B603
|
||||
if result.returncode != 0:
|
||||
return f"skip: rc={result.returncode} (already absent)"
|
||||
return "ok"
|
||||
|
||||
|
||||
def _remove_file(path: Path, *, dry_run: bool) -> str:
|
||||
if not path.exists() and not path.is_symlink():
|
||||
return f"skip: {path} already absent"
|
||||
if dry_run:
|
||||
console.print(f" [dim]would remove:[/] {path}")
|
||||
return "ok"
|
||||
path.unlink()
|
||||
return "ok"
|
||||
|
||||
|
||||
def _uninstall_units(systemd_dir: Path, *, dry_run: bool) -> str:
|
||||
removed = 0
|
||||
present = sorted(systemd_dir.glob("decnet-*.service"))
|
||||
target = systemd_dir / "decnet.target"
|
||||
if target.exists():
|
||||
present.append(target)
|
||||
for path in present:
|
||||
if dry_run:
|
||||
console.print(f" [dim]would remove:[/] {path}")
|
||||
removed += 1
|
||||
continue
|
||||
path.unlink()
|
||||
removed += 1
|
||||
if removed == 0:
|
||||
return "skip: no decnet unit files present"
|
||||
return f"ok ({removed} removed)"
|
||||
|
||||
|
||||
def _remove_user(user: str, *, dry_run: bool) -> str:
|
||||
try:
|
||||
pwd.getpwnam(user)
|
||||
except KeyError:
|
||||
return f"skip: user {user} already absent"
|
||||
# userdel returns non-zero if the user still owns running
|
||||
# processes; that's the operator's problem to sort out, not ours.
|
||||
return _run_allow_fail(["userdel", user], dry_run=dry_run)
|
||||
|
||||
|
||||
def _remove_group(group: str, *, dry_run: bool) -> str:
|
||||
try:
|
||||
grp.getgrnam(group)
|
||||
except KeyError:
|
||||
return f"skip: group {group} already absent"
|
||||
return _run_allow_fail(["groupdel", group], dry_run=dry_run)
|
||||
|
||||
|
||||
def _remove_dir_if_present(
|
||||
path: Path, *, dry_run: bool, recursive: bool = False
|
||||
) -> str:
|
||||
if not path.exists():
|
||||
return f"skip: {path} already absent"
|
||||
if dry_run:
|
||||
verb = "would rm -rf" if recursive else "would rmdir"
|
||||
console.print(f" [dim]{verb}:[/] {path}")
|
||||
return "ok"
|
||||
if recursive:
|
||||
shutil.rmtree(path, ignore_errors=True)
|
||||
else:
|
||||
try:
|
||||
path.rmdir()
|
||||
except OSError as exc:
|
||||
return f"skip: {path} not empty ({exc.strerror})"
|
||||
return "ok"
|
||||
|
||||
|
||||
def _install_tmpfiles(
|
||||
deploy: Path, tmpfiles_dir: Path, *, force: bool, dry_run: bool
|
||||
) -> str:
|
||||
src = deploy / "tmpfiles.d" / "decnet.conf"
|
||||
if not src.is_file():
|
||||
raise RuntimeError(f"missing tmpfiles.d entry at {src}")
|
||||
result = _copy_if_changed(
|
||||
src, tmpfiles_dir / src.name,
|
||||
mode=0o644, force=force, dry_run=dry_run,
|
||||
)
|
||||
# Apply immediately so /run/decnet exists before daemon-reload.
|
||||
_run(["systemd-tmpfiles", "--create", str(tmpfiles_dir / src.name)], dry_run=dry_run)
|
||||
return result
|
||||
|
||||
|
||||
def _install_logrotate(
|
||||
deploy: Path, logrotate_dir: Path, *, force: bool, dry_run: bool
|
||||
) -> str:
|
||||
"""Drop the logrotate config into ``/etc/logrotate.d/decnet``.
|
||||
|
||||
The ingester / forwarder hold the log files open via Python, so the
|
||||
config uses ``copytruncate`` rather than rename+create. Without this
|
||||
rule, /var/log/decnet/ grows without bound and a single noisy day of
|
||||
attacker traffic fills the disk on a small VPS. Best-effort: a host
|
||||
without logrotate installed (rare on systemd distros) still boots
|
||||
fine — the operator just needs to wire their own rotation.
|
||||
"""
|
||||
src = deploy / "logrotate.d" / "decnet"
|
||||
if not src.is_file():
|
||||
raise RuntimeError(f"missing logrotate config at {src}")
|
||||
return _copy_if_changed(
|
||||
src, logrotate_dir / src.name,
|
||||
mode=0o644, force=force, dry_run=dry_run,
|
||||
)
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="init")
|
||||
def init_cmd(
|
||||
dry_run: bool = typer.Option(
|
||||
False, "--dry-run",
|
||||
help="Print every action; make no changes.",
|
||||
),
|
||||
no_start: bool = typer.Option(
|
||||
False, "--no-start",
|
||||
help="Install everything but don't `systemctl enable --now decnet.target`.",
|
||||
),
|
||||
force: bool = typer.Option(
|
||||
False, "--force",
|
||||
help="Overwrite unit / polkit / tmpfiles entries even if identical.",
|
||||
),
|
||||
deinit: bool = typer.Option(
|
||||
False, "--deinit",
|
||||
help="Undo a previous init: stop + disable decnet.target, remove "
|
||||
"unit files, polkit rule, tmpfiles.d entry, /etc/decnet. "
|
||||
"Preserves /var/lib/decnet, /var/log/decnet, and the "
|
||||
"service user/group — pass --purge to remove those too.",
|
||||
),
|
||||
purge: bool = typer.Option(
|
||||
False, "--purge",
|
||||
help="With --deinit, also wipe /var/lib/decnet, "
|
||||
"/var/log/decnet, AND the service user/group. "
|
||||
"Destructive — operator data is gone, and if --user "
|
||||
"points at your own login account, that account goes "
|
||||
"with it. Only use when the user/group was created by "
|
||||
"`decnet init` in the first place.",
|
||||
),
|
||||
user: str = typer.Option(
|
||||
"decnet", "--user",
|
||||
help="System user to own DECNET processes.",
|
||||
),
|
||||
group: str = typer.Option(
|
||||
"decnet", "--group",
|
||||
help="Primary group of the DECNET user.",
|
||||
),
|
||||
install_dir: str = typer.Option(
|
||||
"/opt/decnet", "--install-dir",
|
||||
help="Absolute path where DECNET is installed. Default "
|
||||
"/opt/decnet; distros that reserve /opt can point this "
|
||||
"at /srv/decnet, /usr/local/decnet, etc. Gets rendered "
|
||||
"into every systemd unit via Jinja2 and used as the "
|
||||
"decnet user's home directory.",
|
||||
),
|
||||
venv_dir: Optional[str] = typer.Option(
|
||||
None, "--venv-dir",
|
||||
help="Absolute path to the Python venv systemd should "
|
||||
"ExecStart from. If omitted, auto-detected in order: "
|
||||
"$VIRTUAL_ENV (if under --install-dir), "
|
||||
"{install-dir}/venv, then {install-dir}/{.venv,.311,"
|
||||
".312,.313}. Init aborts if none exists.",
|
||||
),
|
||||
prefix: str = typer.Option(
|
||||
"", "--prefix", hidden=True,
|
||||
help="Filesystem prefix for tests (e.g. tmp_path). Empty = real root.",
|
||||
),
|
||||
) -> None:
|
||||
"""One-shot bootstrap of a DECNET master host.
|
||||
|
||||
Creates the `decnet` user/group, installs systemd units,
|
||||
polkit rules, tmpfiles.d entries, seeds directories and
|
||||
drops a placeholder config, then starts decnet.target.
|
||||
"""
|
||||
_require_master_mode("init")
|
||||
|
||||
if purge and not deinit:
|
||||
console.print("[red]--purge only applies with --deinit[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
# Root check — skip when --prefix is set (tests don't run as root).
|
||||
if not prefix and os.geteuid() != 0:
|
||||
verb = "deinit" if deinit else "init"
|
||||
console.print(f"[red]decnet {verb}: must run as root (use sudo)[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if not install_dir.startswith("/"):
|
||||
console.print(
|
||||
f"[red]decnet init: --install-dir must be absolute, got {install_dir!r}[/]"
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
# Strip leading slash so pfx-joining works under --prefix test mode
|
||||
# (Path("/"). / "/opt/decnet" == Path("/opt/decnet"), dropping pfx).
|
||||
_install_rel = install_dir.lstrip("/")
|
||||
|
||||
required_tools: tuple[str, ...] = ("systemctl",) if deinit else (
|
||||
"systemctl", "useradd", "groupadd", "systemd-tmpfiles",
|
||||
)
|
||||
if deinit:
|
||||
required_tools = required_tools + ("userdel", "groupdel")
|
||||
for tool in required_tools:
|
||||
if shutil.which(tool) is None and not dry_run:
|
||||
verb = "deinit" if deinit else "init"
|
||||
console.print(f"[red]decnet {verb}: {tool!r} is required on PATH[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
pfx = Path(prefix) if prefix else Path("/")
|
||||
systemd_dir = pfx / "etc/systemd/system"
|
||||
polkit_dir = pfx / "etc/polkit-1/rules.d"
|
||||
tmpfiles_dir = pfx / "etc/tmpfiles.d"
|
||||
logrotate_dir = pfx / "etc/logrotate.d"
|
||||
etc_decnet = pfx / "etc/decnet"
|
||||
|
||||
if deinit:
|
||||
console.print(
|
||||
f"[bold cyan]DECNET deinit[/] "
|
||||
f"(dry_run={dry_run}, purge={purge})"
|
||||
)
|
||||
_step(
|
||||
"systemctl stop + disable decnet.target",
|
||||
lambda: _run_allow_fail(
|
||||
["systemctl", "disable", "--now", "decnet.target"],
|
||||
dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"remove systemd unit files",
|
||||
lambda: _uninstall_units(systemd_dir, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
"remove polkit rule",
|
||||
lambda: _remove_file(
|
||||
polkit_dir / "50-decnet-workers.rules",
|
||||
dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"remove tmpfiles.d entry",
|
||||
lambda: _remove_file(
|
||||
tmpfiles_dir / "decnet.conf",
|
||||
dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"remove logrotate config",
|
||||
lambda: _remove_file(
|
||||
logrotate_dir / "decnet",
|
||||
dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"systemctl daemon-reload",
|
||||
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1], # type: ignore[func-returns-value]
|
||||
)
|
||||
_step(
|
||||
f"remove {etc_decnet / 'decnet.ini'}",
|
||||
lambda: _remove_file(etc_decnet / "decnet.ini", dry_run=dry_run),
|
||||
)
|
||||
# Legacy name from pre-domain-sections placeholder era.
|
||||
# Harmless if absent (the _remove_file step logs skip).
|
||||
_step(
|
||||
f"remove legacy {etc_decnet / 'config.ini'}",
|
||||
lambda: _remove_file(etc_decnet / "config.ini", dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
f"remove {etc_decnet}",
|
||||
lambda: _remove_dir_if_present(etc_decnet, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
f"remove {pfx / 'run/decnet'}",
|
||||
lambda: _remove_dir_if_present(
|
||||
pfx / "run/decnet", dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
f"remove {pfx / _install_rel}",
|
||||
lambda: _remove_dir_if_present(
|
||||
pfx / _install_rel, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
if purge:
|
||||
_step(
|
||||
f"purge {pfx / 'var/lib/decnet'}",
|
||||
lambda: _remove_dir_if_present(
|
||||
pfx / "var/lib/decnet",
|
||||
dry_run=dry_run, recursive=True,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
f"purge {pfx / 'var/log/decnet'}",
|
||||
lambda: _remove_dir_if_present(
|
||||
pfx / "var/log/decnet",
|
||||
dry_run=dry_run, recursive=True,
|
||||
),
|
||||
)
|
||||
else:
|
||||
console.print(
|
||||
f"[dim]preserved {pfx / 'var/lib/decnet'} and "
|
||||
f"{pfx / 'var/log/decnet'} (operator data); "
|
||||
"re-run with --purge to remove.[/]"
|
||||
)
|
||||
# User / group removal is also gated on --purge. In dev the
|
||||
# operator may have passed their own login user via
|
||||
# `--user $USER` to avoid ownership churn; an unconditional
|
||||
# `userdel anti` during deinit would nuke their account.
|
||||
if purge:
|
||||
_step(
|
||||
f"remove user {user!r}",
|
||||
lambda: _remove_user(user, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
f"remove group {group!r}",
|
||||
lambda: _remove_group(group, dry_run=dry_run),
|
||||
)
|
||||
else:
|
||||
console.print(
|
||||
f"[dim]preserved user {user!r} and group {group!r}; "
|
||||
"re-run with --purge to remove (only do this if "
|
||||
"they were created by `decnet init`).[/]"
|
||||
)
|
||||
console.print("[bold green]DECNET deinit complete.[/]")
|
||||
return
|
||||
|
||||
try:
|
||||
deploy = _deploy_root()
|
||||
except RuntimeError as exc:
|
||||
console.print(f"[red]decnet init: {exc}[/]")
|
||||
raise typer.Exit(1) from exc
|
||||
|
||||
# Resolve venv BEFORE any file writes — fails loud if the
|
||||
# operator hasn't created one yet, instead of shipping broken
|
||||
# systemd units that journalctl spams forever. Skipped under
|
||||
# --prefix (test mode) because the test harness doesn't build a
|
||||
# real venv and the rendered string is asserted on directly.
|
||||
if prefix:
|
||||
resolved_venv = venv_dir or f"{install_dir}/venv"
|
||||
else:
|
||||
try:
|
||||
resolved_venv = _resolve_venv_dir(install_dir, venv_dir)
|
||||
except RuntimeError as exc:
|
||||
console.print(f"[red]decnet init: {exc}[/]")
|
||||
raise typer.Exit(1) from exc
|
||||
console.print(f"[dim]using venv: {resolved_venv}[/]")
|
||||
|
||||
dirs = [
|
||||
(pfx / _install_rel, 0o755, user, group),
|
||||
(pfx / "var/lib/decnet", 0o750, user, group),
|
||||
(pfx / "var/lib/decnet/geoip", 0o755, user, group),
|
||||
# DEBT-035 / DEBT-047: artifact root carries setgid (the
|
||||
# 0o2... bit) so every file written under it inherits the
|
||||
# decnet group regardless of which container's uid created
|
||||
# it. Group-write (0o2775) lets the API process and the
|
||||
# local TTP worker read each other's outputs without a
|
||||
# manual chown after every fresh deploy.
|
||||
(pfx / "var/lib/decnet/artifacts", 0o2775, user, group),
|
||||
(pfx / "var/log/decnet", 0o750, user, group),
|
||||
(etc_decnet, 0o755, "root", group),
|
||||
(pfx / "run/decnet", 0o755, "root", group),
|
||||
]
|
||||
|
||||
console.print(
|
||||
f"[bold cyan]DECNET init[/] "
|
||||
f"(dry_run={dry_run}, no_start={no_start}, force={force})"
|
||||
)
|
||||
|
||||
_step(
|
||||
f"ensure group {group!r}",
|
||||
lambda: _ensure_group(group, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
f"ensure user {user!r}",
|
||||
lambda: _ensure_user(user, group, install_dir, dry_run=dry_run),
|
||||
)
|
||||
for path, mode, d_owner, d_group in dirs:
|
||||
_step(
|
||||
f"ensure dir {path}",
|
||||
lambda p=path, m=mode, o=d_owner, g=d_group: # type: ignore[misc]
|
||||
_ensure_dir(p, mode=m, owner=o, group=g, dry_run=dry_run),
|
||||
)
|
||||
_step(
|
||||
f"write {etc_decnet / 'decnet.ini'}",
|
||||
lambda: _ensure_config(
|
||||
etc_decnet / "decnet.ini", group,
|
||||
user=user, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"install systemd units",
|
||||
lambda: _install_units(
|
||||
deploy, systemd_dir,
|
||||
install_dir=install_dir, venv_dir=resolved_venv,
|
||||
user=user, group=group,
|
||||
force=force, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"install polkit rule",
|
||||
lambda: _install_polkit(
|
||||
deploy, polkit_dir, group=group,
|
||||
force=force, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"install tmpfiles.d entry",
|
||||
lambda: _install_tmpfiles(
|
||||
deploy, tmpfiles_dir, force=force, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"install logrotate config",
|
||||
lambda: _install_logrotate(
|
||||
deploy, logrotate_dir, force=force, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
"systemctl daemon-reload",
|
||||
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1], # type: ignore[func-returns-value]
|
||||
)
|
||||
|
||||
if no_start:
|
||||
console.print("[yellow]--no-start: skipping decnet.target start[/]")
|
||||
return
|
||||
|
||||
try:
|
||||
_step(
|
||||
"systemctl enable --now decnet.target",
|
||||
lambda: (
|
||||
_run( # type: ignore[func-returns-value]
|
||||
["systemctl", "enable", "--now", "decnet.target"],
|
||||
dry_run=dry_run,
|
||||
),
|
||||
"ok",
|
||||
)[1],
|
||||
)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
console.print(
|
||||
f"[red]decnet.target failed to start (rc={exc.returncode}); "
|
||||
"inspect `systemctl status decnet.target` and individual "
|
||||
"`decnet-*.service` units.[/]"
|
||||
)
|
||||
raise typer.Exit(1) from exc
|
||||
|
||||
console.print("[bold green]DECNET init complete.[/] "
|
||||
"Check `decnet status` or the Workers panel.")
|
||||
sys.stdout.flush()
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess # nosec B404
|
||||
@@ -56,65 +55,15 @@ def register(app: typer.Typer) -> None:
|
||||
|
||||
@app.command()
|
||||
def status() -> None:
|
||||
"""Show running deckies and the state of every ``decnet-*`` unit.
|
||||
|
||||
Prefers systemd (``systemctl list-units 'decnet-*.service'``) so
|
||||
agents, masters and mixed hosts all get one consistent view of
|
||||
what's installed, loaded, and active. Falls back to the psutil
|
||||
cmdline registry on boxes without systemd (dev laptops, CI
|
||||
containers, non-systemd init) so `decnet status` is still useful
|
||||
there.
|
||||
"""
|
||||
"""Show running deckies and their status."""
|
||||
log.info("status command invoked")
|
||||
from decnet.engine import status as _status
|
||||
_status()
|
||||
|
||||
units = _utils._systemd_units()
|
||||
if units is not None:
|
||||
_render_systemd_units(units)
|
||||
else:
|
||||
_render_psutil_fallback()
|
||||
|
||||
def _render_systemd_units(units: list[dict]) -> None:
|
||||
svc_table = Table(title="DECNET Services (systemd)", show_lines=True)
|
||||
svc_table.add_column("Unit", style="bold cyan")
|
||||
svc_table.add_column("Load")
|
||||
svc_table.add_column("Active")
|
||||
svc_table.add_column("Sub")
|
||||
svc_table.add_column("Description", style="dim")
|
||||
|
||||
if not units:
|
||||
console.print(
|
||||
"[yellow]No decnet-* systemd units loaded. "
|
||||
"Run `sudo decnet init` to install them.[/]"
|
||||
)
|
||||
return
|
||||
|
||||
def _active_style(active: str) -> str:
|
||||
if active == "active":
|
||||
return "[green]active[/]"
|
||||
if active == "failed":
|
||||
return "[red]failed[/]"
|
||||
return f"[yellow]{active}[/]"
|
||||
|
||||
for u in sorted(units, key=lambda x: x.get("unit", "")):
|
||||
svc_table.add_row(
|
||||
u.get("unit", ""),
|
||||
u.get("load", ""),
|
||||
_active_style(u.get("active", "")),
|
||||
u.get("sub", ""),
|
||||
u.get("description", ""),
|
||||
)
|
||||
console.print(svc_table)
|
||||
|
||||
def _render_psutil_fallback() -> None:
|
||||
registry = _utils._service_registry(str(DECNET_INGEST_LOG_FILE))
|
||||
if _agent_mode_active():
|
||||
registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}]
|
||||
svc_table = Table(
|
||||
title="DECNET Services (psutil fallback — systemd unavailable)",
|
||||
show_lines=True,
|
||||
)
|
||||
svc_table = Table(title="DECNET Services", show_lines=True)
|
||||
svc_table.add_column("Service", style="bold cyan")
|
||||
svc_table.add_column("Status")
|
||||
svc_table.add_column("PID", style="dim")
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="orchestrate")
|
||||
def orchestrate_cmd(
|
||||
interval: int = typer.Option(
|
||||
60, "--interval", "-i",
|
||||
help="Seconds between synthetic activity ticks",
|
||||
),
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d",
|
||||
help="Detach to background as a daemon process",
|
||||
),
|
||||
llm: Optional[bool] = typer.Option(
|
||||
None, "--llm/--no-llm",
|
||||
help=(
|
||||
"Enable / disable LLM enrichment of user-class file "
|
||||
"bodies. Default reads $DECNET_REALISM_LLM (any "
|
||||
"non-empty value enables; 'off' / unset disables)."
|
||||
),
|
||||
),
|
||||
) -> None:
|
||||
"""Inject synthetic life (inter-decky traffic + file ops + email) into the fleet."""
|
||||
import asyncio
|
||||
from decnet.orchestrator import orchestrator_worker
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
if daemon:
|
||||
log.info("orchestrator daemonizing interval=%d", interval)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info(
|
||||
"orchestrator starting interval=%d llm=%s",
|
||||
interval, "default" if llm is None else ("on" if llm else "off"),
|
||||
)
|
||||
console.print(
|
||||
f"[bold cyan]Orchestrator starting[/] (interval: {interval}s)"
|
||||
)
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await orchestrator_worker(repo, interval=interval, llm_enabled=llm)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Orchestrator stopped.[/]")
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
@@ -1,112 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""``decnet realism ...`` — content-engine maintenance commands.
|
||||
|
||||
After stage 5 of the realism migration, this is the only remaining
|
||||
CLI surface from the realism library / former emailgen. ``decnet
|
||||
realism run`` does not exist (the orchestrator runs the unified
|
||||
worker via ``decnet orchestrate``); the only sub-command is
|
||||
``import-personas``, which validates + installs the host-wide global
|
||||
persona pool consumed by fleet (MACVLAN/IPVLAN) and SWARM-shard
|
||||
deckies.
|
||||
|
||||
Topology personas live on ``Topology.email_personas`` and are
|
||||
managed via the dashboard or the topology API; this command does
|
||||
not touch them.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from .gating import _require_master_mode
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
realism_app = typer.Typer(
|
||||
name="realism",
|
||||
help=(
|
||||
"Maintain the realism content engine (persona pool import, "
|
||||
"future content-class tuning)."
|
||||
),
|
||||
)
|
||||
app.add_typer(realism_app, name="realism")
|
||||
|
||||
@realism_app.command("import-personas")
|
||||
def realism_import_personas(
|
||||
path: Path = typer.Argument(
|
||||
..., exists=True, file_okay=True, dir_okay=False, readable=True,
|
||||
help="JSON file containing a list of EmailPersona objects",
|
||||
),
|
||||
output: Optional[Path] = typer.Option(
|
||||
None, "--output", "-o",
|
||||
help=(
|
||||
"Override the destination path. Defaults to the canonical "
|
||||
"global pool (DECNET_REALISM_PERSONAS, /etc/decnet/"
|
||||
"email_personas.json, or ~/.decnet/email_personas.json)."
|
||||
),
|
||||
),
|
||||
) -> None:
|
||||
"""Validate + install a personas JSON file as the global pool.
|
||||
|
||||
Use this when deploying with IMAP/POP3 services on fleet
|
||||
(MACVLAN/IPVLAN) or SWARM-shard mail deckies — those have no
|
||||
parent topology row, so they read this host-wide list.
|
||||
MazeNET topology mail deckies use ``Topology.email_personas``
|
||||
instead and this command does not touch them.
|
||||
"""
|
||||
_require_master_mode("realism import-personas")
|
||||
from decnet.realism import personas_pool as global_pool
|
||||
from decnet.realism.personas import parse_personas
|
||||
|
||||
try:
|
||||
raw = path.read_text(encoding="utf-8")
|
||||
except OSError as exc:
|
||||
console.print(f"[red]Cannot read {path}:[/] {exc}")
|
||||
raise typer.Exit(code=1) from exc
|
||||
|
||||
try:
|
||||
payload = json.loads(raw)
|
||||
except json.JSONDecodeError as exc:
|
||||
console.print(f"[red]Invalid JSON in {path}:[/] {exc}")
|
||||
raise typer.Exit(code=1) from exc
|
||||
if not isinstance(payload, list):
|
||||
console.print(
|
||||
f"[red]{path} must contain a JSON list of personas, "
|
||||
f"got {type(payload).__name__}[/]"
|
||||
)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
personas = parse_personas(payload)
|
||||
if not personas:
|
||||
console.print(
|
||||
f"[red]No valid personas in {path}.[/] "
|
||||
"Check the schema (name, email, role, tone, mannerisms)."
|
||||
)
|
||||
raise typer.Exit(code=1)
|
||||
if len(personas) < 2:
|
||||
console.print(
|
||||
f"[yellow]Warning: only {len(personas)} valid persona(s) — "
|
||||
"the worker requires at least 2 to send mail; importing "
|
||||
"anyway in case more are added later.[/]"
|
||||
)
|
||||
|
||||
dest = output or global_pool.resolve_path()
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
dest.write_text(
|
||||
json.dumps(
|
||||
[p.model_dump(exclude_none=False) for p in personas],
|
||||
indent=2,
|
||||
ensure_ascii=False,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
global_pool.reset_cache()
|
||||
console.print(
|
||||
f"[green]Imported {len(personas)} personas to[/] {dest}"
|
||||
)
|
||||
if path != dest:
|
||||
log.info("realism import-personas src=%s dest=%s", path, dest)
|
||||
@@ -1,63 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="reconcile")
|
||||
def reconcile_cmd(
|
||||
once: bool = typer.Option(
|
||||
False, "--once",
|
||||
help="Run a single reconcile pass and exit (no daemon loop).",
|
||||
),
|
||||
interval: int = typer.Option(
|
||||
30, "--interval", "-i",
|
||||
help="Seconds between reconcile passes (ignored with --once).",
|
||||
),
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d",
|
||||
help="Detach to background as a daemon process (long-lived only).",
|
||||
),
|
||||
) -> None:
|
||||
"""Converge fleet state across decnet-state.json, the DB, and docker."""
|
||||
import asyncio
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
if once:
|
||||
from decnet.fleet.reconciler import reconcile_once
|
||||
|
||||
async def _one() -> None:
|
||||
await repo.initialize()
|
||||
counts = await reconcile_once(repo)
|
||||
console.print(
|
||||
f"[bold cyan]reconcile:[/] "
|
||||
f"inserted={counts['inserted']} "
|
||||
f"deleted={counts['deleted']} "
|
||||
f"state_updated={counts['state_updated']}"
|
||||
)
|
||||
asyncio.run(_one())
|
||||
return
|
||||
|
||||
from decnet.fleet.reconciler_worker import fleet_reconciler_worker
|
||||
|
||||
if daemon:
|
||||
log.info("reconciler daemonizing interval=%d", interval)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("reconciler starting interval=%d", interval)
|
||||
console.print(
|
||||
f"[bold cyan]Fleet reconciler starting[/] (interval: {interval}s)"
|
||||
)
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await fleet_reconciler_worker(repo, interval=interval)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Reconciler stopped.[/]")
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""`decnet swarm ...` — master-side operator commands (HTTP to local swarmctl)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
@@ -17,16 +16,8 @@ from .utils import console, log
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command()
|
||||
def swarmctl(
|
||||
port: int = typer.Option(
|
||||
8770, "--port",
|
||||
envvar="DECNET_SWARMCTL_PORT",
|
||||
help="Port for the swarm controller. Defaults to [swarm] swarmctl-port from /etc/decnet/decnet.ini, else 8770.",
|
||||
),
|
||||
host: str = typer.Option(
|
||||
"127.0.0.1", "--host",
|
||||
envvar="DECNET_SWARMCTL_HOST",
|
||||
help="Bind address for the swarm controller. Defaults to [swarm] swarmctl-host from /etc/decnet/decnet.ini, else 127.0.0.1.",
|
||||
),
|
||||
port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
|
||||
host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
|
||||
tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
|
||||
|
||||
@@ -1,349 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""MazeNET topology CLI: generate / deploy / teardown / list / show."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import hydrate, persist
|
||||
from decnet.topology.status import TopologyStatus
|
||||
|
||||
from .gating import _require_master_mode
|
||||
|
||||
_console = Console()
|
||||
|
||||
_group = typer.Typer(
|
||||
name="topology",
|
||||
help="MazeNET nested-topology commands (DECNET master only).",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
|
||||
|
||||
async def _repo():
|
||||
from decnet.web.db.factory import get_repository
|
||||
r = get_repository()
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@_group.command("generate")
|
||||
def _generate(
|
||||
name: str = typer.Option(..., "--name", help="Topology name"),
|
||||
depth: int = typer.Option(3, "--depth", min=1, max=16),
|
||||
branching: int = typer.Option(2, "--branching", min=1, max=8),
|
||||
deckies_per_lan: str = typer.Option(
|
||||
"1-3",
|
||||
"--deckies-per-lan",
|
||||
help="Min-max deckies per LAN, e.g. 1-3",
|
||||
),
|
||||
bridge_forward_probability: float = typer.Option(1.0, "--bridge-forward-p", min=0.0, max=1.0),
|
||||
cross_edge_probability: float = typer.Option(0.0, "--cross-edge-p", min=0.0, max=1.0),
|
||||
services: Optional[str] = typer.Option(None, "--services", help="Comma-separated explicit services"),
|
||||
randomize_services: bool = typer.Option(True, "--randomize-services/--no-randomize-services"),
|
||||
seed: Optional[int] = typer.Option(None, "--seed", min=0),
|
||||
) -> None:
|
||||
"""Generate a topology plan and persist it as pending."""
|
||||
_require_master_mode("topology generate")
|
||||
|
||||
try:
|
||||
lo, hi = (int(x) for x in deckies_per_lan.split("-", 1))
|
||||
except ValueError:
|
||||
_console.print("[red]--deckies-per-lan must be formatted as MIN-MAX, e.g. 1-3.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
services_explicit = (
|
||||
[s.strip() for s in services.split(",") if s.strip()] if services else None
|
||||
)
|
||||
|
||||
try:
|
||||
cfg = TopologyConfig(
|
||||
name=name,
|
||||
depth=depth,
|
||||
branching_factor=branching,
|
||||
deckies_per_lan_min=lo,
|
||||
deckies_per_lan_max=hi,
|
||||
bridge_forward_probability=bridge_forward_probability,
|
||||
cross_edge_probability=cross_edge_probability,
|
||||
services_explicit=services_explicit,
|
||||
randomize_services=randomize_services if not services_explicit else False,
|
||||
seed=seed,
|
||||
)
|
||||
except ValueError as e:
|
||||
_console.print(f"[red]{e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
plan = generate(cfg)
|
||||
|
||||
async def _go() -> str:
|
||||
repo = await _repo()
|
||||
return await persist(repo, plan)
|
||||
|
||||
tid = asyncio.run(_go())
|
||||
_console.print(f"[green]Topology persisted as pending[/] — id=[bold]{tid}[/]")
|
||||
_console.print(
|
||||
f" LANs: {len(plan.lans)} deckies: {len(plan.deckies)} edges: {len(plan.edges)}"
|
||||
)
|
||||
|
||||
|
||||
@_group.command("list")
|
||||
def _list() -> None:
|
||||
"""List all topologies."""
|
||||
_require_master_mode("topology list")
|
||||
|
||||
async def _go() -> list[dict]:
|
||||
repo = await _repo()
|
||||
return await repo.list_topologies()
|
||||
|
||||
rows = asyncio.run(_go())
|
||||
if not rows:
|
||||
_console.print("[yellow]No topologies.[/]")
|
||||
return
|
||||
table = Table(title="DECNET / MazeNET Topologies")
|
||||
for col in ("id", "name", "mode", "status", "created_at"):
|
||||
table.add_column(col)
|
||||
for r in rows:
|
||||
table.add_row(
|
||||
str(r["id"]),
|
||||
str(r["name"]),
|
||||
str(r["mode"]),
|
||||
str(r["status"]),
|
||||
str(r.get("created_at", "")),
|
||||
)
|
||||
_console.print(table)
|
||||
|
||||
|
||||
@_group.command("show")
|
||||
def _show(topology_id: str = typer.Argument(..., help="Topology id")) -> None:
|
||||
"""Print a structured summary of a topology."""
|
||||
_require_master_mode("topology show")
|
||||
|
||||
async def _go():
|
||||
repo = await _repo()
|
||||
return await hydrate(repo, topology_id)
|
||||
|
||||
hydrated = asyncio.run(_go())
|
||||
if hydrated is None:
|
||||
_console.print(f"[red]No such topology: {topology_id}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
topo = hydrated["topology"]
|
||||
_console.print(
|
||||
f"[bold]{topo['name']}[/] id={topo['id']} status={topo['status']}"
|
||||
f" mode={topo['mode']}"
|
||||
)
|
||||
|
||||
def _decky_name(d: dict) -> str:
|
||||
cfg = d.get("decky_config") or {}
|
||||
return cfg.get("name") or d.get("name") or d["uuid"]
|
||||
|
||||
deckies_by_name = {_decky_name(d): d for d in hydrated["deckies"]}
|
||||
edges_by_lan: dict[str, list[dict]] = {}
|
||||
for e in hydrated["edges"]:
|
||||
edges_by_lan.setdefault(e["lan_id"], []).append(e)
|
||||
|
||||
for lan in hydrated["lans"]:
|
||||
dmz_tag = " [dim](DMZ)[/]" if lan["is_dmz"] else ""
|
||||
_console.print(f"\n[cyan]LAN[/] {lan['name']} {lan['subnet']}{dmz_tag}")
|
||||
lan_edges = edges_by_lan.get(lan["id"], [])
|
||||
for e in lan_edges:
|
||||
# Find the decky name via uuid.
|
||||
decky = next(
|
||||
(d for d in hydrated["deckies"] if d["uuid"] == e["decky_uuid"]),
|
||||
None,
|
||||
)
|
||||
if decky is None:
|
||||
continue
|
||||
cfg = decky.get("decky_config") or {}
|
||||
name = _decky_name(decky)
|
||||
ip = (cfg.get("ips_by_lan") or {}).get(lan["name"]) or decky.get("ip") or "?"
|
||||
tags = []
|
||||
if e["is_bridge"]:
|
||||
tags.append("bridge")
|
||||
if e["forwards_l3"]:
|
||||
tags.append("L3-forward")
|
||||
tag_s = f" [yellow]({', '.join(tags)})[/]" if tags else ""
|
||||
svcs = ",".join(cfg.get("services") or decky.get("services") or []) or "-"
|
||||
_console.print(f" • {name} {ip} svcs={svcs}{tag_s}")
|
||||
|
||||
_ = deckies_by_name # for future cross-reference extensions
|
||||
|
||||
|
||||
@_group.command("deploy")
|
||||
def _deploy(
|
||||
topology_id: str = typer.Argument(..., help="Topology id (must be pending)"),
|
||||
dry_run: bool = typer.Option(False, "--dry-run", help="Write compose + create nets, skip containers"),
|
||||
) -> None:
|
||||
"""Deploy a pending topology."""
|
||||
_require_master_mode("topology deploy")
|
||||
from decnet.engine.deployer import deploy_topology
|
||||
|
||||
async def _go() -> None:
|
||||
repo = await _repo()
|
||||
await deploy_topology(repo, topology_id, dry_run=dry_run)
|
||||
|
||||
asyncio.run(_go())
|
||||
_console.print(f"[green]Topology {topology_id} deployed.[/]")
|
||||
|
||||
|
||||
@_group.command("teardown")
|
||||
def _teardown(
|
||||
topology_id: str = typer.Argument(..., help="Topology id"),
|
||||
) -> None:
|
||||
"""Tear down a topology. Legal from active|degraded|failed|deploying."""
|
||||
_require_master_mode("topology teardown")
|
||||
from decnet.engine.deployer import teardown_topology
|
||||
|
||||
async def _go() -> None:
|
||||
repo = await _repo()
|
||||
await teardown_topology(repo, topology_id)
|
||||
|
||||
asyncio.run(_go())
|
||||
_console.print(f"[green]Topology {topology_id} torn down.[/]")
|
||||
|
||||
|
||||
@_group.command("delete")
|
||||
def _delete(
|
||||
topology_id: str = typer.Argument(..., help="Topology id"),
|
||||
force: bool = typer.Option(
|
||||
False,
|
||||
"--force",
|
||||
help="Skip the confirmation prompt (required for non-interactive use).",
|
||||
),
|
||||
) -> None:
|
||||
"""Delete a topology and all its children (LANs, deckies, edges, mutations).
|
||||
|
||||
Refuses while containers are running — teardown first.
|
||||
"""
|
||||
_require_master_mode("topology delete")
|
||||
|
||||
_RUNNING = {
|
||||
TopologyStatus.DEPLOYING,
|
||||
TopologyStatus.ACTIVE,
|
||||
TopologyStatus.DEGRADED,
|
||||
TopologyStatus.TEARING_DOWN,
|
||||
}
|
||||
|
||||
async def _go() -> tuple[bool, Optional[str]]:
|
||||
repo = await _repo()
|
||||
topo = await repo.get_topology(topology_id)
|
||||
if topo is None:
|
||||
return False, "not-found"
|
||||
if topo.status in _RUNNING:
|
||||
return False, str(topo.status)
|
||||
ok = await repo.delete_topology_cascade(topology_id)
|
||||
return ok, None
|
||||
|
||||
if not force and not typer.confirm(
|
||||
f"Delete topology {topology_id} and all its children? This cannot be undone.",
|
||||
default=False,
|
||||
):
|
||||
_console.print("[yellow]Cancelled.[/]")
|
||||
raise typer.Exit(0)
|
||||
|
||||
ok, reason = asyncio.run(_go())
|
||||
if reason == "not-found":
|
||||
_console.print(f"[red]No such topology: {topology_id}[/]")
|
||||
raise typer.Exit(1)
|
||||
if reason is not None:
|
||||
_console.print(
|
||||
f"[red]Cannot delete while status={reason!r}. Run "
|
||||
f"[bold]decnet topology teardown {topology_id}[/] first.[/]"
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
if not ok:
|
||||
_console.print(f"[red]Delete failed: {topology_id}[/]")
|
||||
raise typer.Exit(1)
|
||||
_console.print(f"[green]Topology {topology_id} deleted.[/]")
|
||||
|
||||
|
||||
@_group.command("mutate")
|
||||
def _mutate(
|
||||
topology_id: str = typer.Argument(..., help="Topology id (active or degraded)"),
|
||||
op: str = typer.Argument(
|
||||
...,
|
||||
help=(
|
||||
"One of: add_lan, remove_lan, add_decky, attach_decky, "
|
||||
"detach_decky, remove_decky, update_decky, update_lan"
|
||||
),
|
||||
),
|
||||
payload_json: str = typer.Option(
|
||||
"{}",
|
||||
"--payload-json",
|
||||
help="JSON payload for the op (see mutator.ops for keys)",
|
||||
),
|
||||
expected_version: Optional[int] = typer.Option(
|
||||
None,
|
||||
"--expected-version",
|
||||
help="Optimistic-concurrency guard; enqueue fails with a "
|
||||
"VersionConflict if the topology has since been mutated.",
|
||||
),
|
||||
) -> None:
|
||||
"""Enqueue a live mutation. The mutator's watch loop applies it."""
|
||||
_require_master_mode("topology mutate")
|
||||
import json
|
||||
|
||||
try:
|
||||
payload = json.loads(payload_json)
|
||||
except ValueError as e:
|
||||
_console.print(f"[red]Invalid JSON: {e}[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
async def _go() -> str:
|
||||
repo = await _repo()
|
||||
return await repo.enqueue_topology_mutation(
|
||||
topology_id, op, payload, expected_version=expected_version,
|
||||
)
|
||||
|
||||
mid = asyncio.run(_go())
|
||||
_console.print(
|
||||
f"[green]Mutation enqueued[/] — id=[bold]{mid}[/] op={op} "
|
||||
f"(watch for state=applied on [cyan]topology mutations {topology_id}[/])"
|
||||
)
|
||||
|
||||
|
||||
@_group.command("mutations")
|
||||
def _mutations(
|
||||
topology_id: str = typer.Argument(..., help="Topology id"),
|
||||
state: Optional[str] = typer.Option(
|
||||
None,
|
||||
"--state",
|
||||
help="Filter to one of pending|applying|applied|failed",
|
||||
),
|
||||
) -> None:
|
||||
"""List queued/applied mutations for a topology."""
|
||||
_require_master_mode("topology mutations")
|
||||
|
||||
async def _go() -> list[dict]:
|
||||
repo = await _repo()
|
||||
return await repo.list_topology_mutations(topology_id, state=state)
|
||||
|
||||
rows = asyncio.run(_go())
|
||||
if not rows:
|
||||
_console.print("[yellow]No mutations.[/]")
|
||||
return
|
||||
table = Table(title=f"Mutations — topology {topology_id}")
|
||||
for col in ("id", "op", "state", "requested_at", "applied_at", "reason"):
|
||||
table.add_column(col)
|
||||
for r in rows:
|
||||
table.add_row(
|
||||
str(r["id"]),
|
||||
str(r["op"]),
|
||||
str(r["state"]),
|
||||
str(r.get("requested_at", "")),
|
||||
str(r.get("applied_at") or ""),
|
||||
str(r.get("reason") or ""),
|
||||
)
|
||||
_console.print(table)
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
app.add_typer(_group, name="topology")
|
||||
|
||||
|
||||
__all__ = ["register", "TopologyStatus"]
|
||||
@@ -1,310 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""``decnet ttp`` — TTP-tagging worker and admin commands.
|
||||
|
||||
Two flat commands share this module:
|
||||
|
||||
* ``decnet ttp`` — runs the long-running tagger worker. Bus-woken on
|
||||
``attacker.session.ended`` / ``attacker.observed`` /
|
||||
``attacker.intel.enriched`` / ``identity.{formed,merged}`` /
|
||||
``credential.reuse.detected`` / ``email.received`` / ``canary.>``;
|
||||
dispatches each event through :class:`CompositeTagger` (RuleEngine +
|
||||
Behavioral / Intel / CanaryFingerprint / Email / Identity / Credential
|
||||
lifters), persists ``ttp_tag`` rows via the idempotent
|
||||
``INSERT OR IGNORE`` write, and publishes ``ttp.tagged`` +
|
||||
``ttp.rule.fired.<technique_id>`` only when the insert returned a
|
||||
non-zero rowcount (loop-prevention invariant from TTP_TAGGING.md
|
||||
§"Bus topics"). Invoked by the ``decnet-ttp.service`` systemd unit
|
||||
so its argv must stay stable.
|
||||
|
||||
* ``decnet ttp-backfill`` — replays historical events (shell commands
|
||||
recorded on :class:`Attacker.commands`, :class:`CanaryTrigger` rows)
|
||||
through the live tagger. Writes ``ttp_tag`` rows using the same
|
||||
idempotent insert path. **Does not publish** to the bus — replay must
|
||||
not re-trigger SIEM/webhook fan-out on already-attributed events.
|
||||
|
||||
Both are master-only — gated via ``MASTER_ONLY_COMMANDS`` in
|
||||
:mod:`decnet.cli.gating`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.ttp.factory import CompositeTagger, get_tagger
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
_BACKFILL_SOURCES = ("command", "canary", "all")
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="ttp")
|
||||
def ttp(
|
||||
poll_interval_secs: float = typer.Option(
|
||||
60.0, "--poll-interval", "-i",
|
||||
help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
|
||||
),
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d",
|
||||
help="Detach to background as a daemon process",
|
||||
),
|
||||
) -> None:
|
||||
"""TTP-tagging worker — MITRE ATT&CK technique tagging."""
|
||||
from decnet.ttp.worker import run_ttp_worker_loop
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
if daemon:
|
||||
log.info("ttp daemonizing poll=%s", poll_interval_secs)
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("ttp command invoked poll=%s", poll_interval_secs)
|
||||
console.print(
|
||||
f"[bold cyan]TTP tagging worker starting[/] "
|
||||
f"poll={poll_interval_secs}s"
|
||||
)
|
||||
console.print("[dim]Press Ctrl+C to stop[/]")
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await run_ttp_worker_loop(
|
||||
repo, poll_interval_secs=poll_interval_secs,
|
||||
)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]TTP tagging worker stopped.[/]")
|
||||
|
||||
@app.command(name="ttp-backfill")
|
||||
def ttp_backfill(
|
||||
since_days: int = typer.Option(
|
||||
7, "--since-days", "-s",
|
||||
min=1, max=3650,
|
||||
help="Replay events whose source row is newer than N days ago.",
|
||||
),
|
||||
source: str = typer.Option(
|
||||
"all", "--source",
|
||||
help=f"Source slice to replay. One of: {', '.join(_BACKFILL_SOURCES)}.",
|
||||
),
|
||||
dry_run: bool = typer.Option(
|
||||
False, "--dry-run",
|
||||
help="Run the tagger but skip insert_tags. Reports counts only.",
|
||||
),
|
||||
batch_size: int = typer.Option(
|
||||
500, "--batch-size",
|
||||
min=1, max=100_000,
|
||||
help="Number of tags accumulated before each repo.insert_tags call.",
|
||||
),
|
||||
) -> None:
|
||||
"""Replay historical attacker activity through the live tagger.
|
||||
|
||||
Walks ``Attacker.commands`` (per-IP shell-command history) and
|
||||
``CanaryTrigger`` (canary callback log) since N days ago,
|
||||
builds the same :class:`TaggerEvent` shape the live worker
|
||||
emits, and persists tags via the idempotent INSERT OR IGNORE
|
||||
write. Re-running is safe — a second pass over identical
|
||||
source rows reports ``inserted=0``.
|
||||
|
||||
Bus publish is intentionally suppressed; SIEM / webhook fan-out
|
||||
sees only live events, never replays.
|
||||
"""
|
||||
from decnet.cli.gating import _require_master_mode
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
_require_master_mode("ttp-backfill")
|
||||
|
||||
if source not in _BACKFILL_SOURCES:
|
||||
console.print(
|
||||
f"[red]invalid --source {source!r}; expected one of "
|
||||
f"{_BACKFILL_SOURCES}[/]"
|
||||
)
|
||||
raise typer.Exit(code=2)
|
||||
|
||||
cutoff = datetime.now(tz=timezone.utc) - timedelta(days=since_days)
|
||||
console.print(
|
||||
f"[bold cyan]TTP backfill[/] since={cutoff.isoformat()} "
|
||||
f"source={source} dry_run={dry_run} batch_size={batch_size}"
|
||||
)
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await _backfill(
|
||||
repo,
|
||||
cutoff=cutoff,
|
||||
sources=_resolve_sources(source),
|
||||
dry_run=dry_run,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Backfill interrupted.[/]")
|
||||
|
||||
|
||||
def _resolve_sources(name: str) -> tuple[str, ...]:
|
||||
if name == "all":
|
||||
return ("command", "canary")
|
||||
return (name,)
|
||||
|
||||
|
||||
async def _backfill(
|
||||
repo: Any,
|
||||
*,
|
||||
cutoff: datetime,
|
||||
sources: tuple[str, ...],
|
||||
dry_run: bool,
|
||||
batch_size: int,
|
||||
) -> None:
|
||||
"""Drive the per-source backfill loops and report structured counts.
|
||||
|
||||
One :class:`CompositeTagger` is built once and reused for every
|
||||
source — the per-lifter watch fan-out the live worker performs is
|
||||
inlined here as a `watch_store()` startup task per
|
||||
:class:`WatchableTagger`, so the dispatch indexes hydrate before
|
||||
we start feeding events.
|
||||
"""
|
||||
# Import-time bound so tests can monkeypatch ``decnet.cli.ttp.get_tagger``
|
||||
# to inject a recording fake without touching the global factory.
|
||||
tagger = get_tagger()
|
||||
watch_tasks: list[asyncio.Task[None]] = []
|
||||
if isinstance(tagger, CompositeTagger):
|
||||
for watchable in tagger.iter_watchables():
|
||||
watch_tasks.append(asyncio.create_task(watchable.watch_store()))
|
||||
# Yield once so each watch_store gets a chance to run its
|
||||
# initial `load_compiled` before we feed the first event.
|
||||
await asyncio.sleep(0.05)
|
||||
|
||||
try:
|
||||
if "command" in sources:
|
||||
await _backfill_commands(
|
||||
repo, tagger, cutoff=cutoff,
|
||||
dry_run=dry_run, batch_size=batch_size,
|
||||
)
|
||||
if "canary" in sources:
|
||||
await _backfill_canaries(
|
||||
repo, tagger, cutoff=cutoff,
|
||||
dry_run=dry_run, batch_size=batch_size,
|
||||
)
|
||||
finally:
|
||||
for task in watch_tasks:
|
||||
task.cancel()
|
||||
for task in watch_tasks:
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
|
||||
|
||||
async def _backfill_commands(
|
||||
repo: Any,
|
||||
tagger: Any,
|
||||
*,
|
||||
cutoff: datetime,
|
||||
dry_run: bool,
|
||||
batch_size: int,
|
||||
) -> None:
|
||||
from decnet.ttp.base import TaggerEvent
|
||||
|
||||
started = time.monotonic()
|
||||
rows_seen = 0
|
||||
cmds_seen = 0
|
||||
inserted = 0
|
||||
pending: list[Any] = []
|
||||
|
||||
async for attacker, commands in repo.iter_attacker_commands_since(cutoff):
|
||||
rows_seen += 1
|
||||
for idx, cmd in enumerate(commands):
|
||||
cmds_seen += 1
|
||||
text = cmd.get("command_text") or cmd.get("text")
|
||||
if not isinstance(text, str):
|
||||
continue
|
||||
cmd_id = (
|
||||
cmd.get("id")
|
||||
or cmd.get("uuid")
|
||||
or cmd.get("command_id")
|
||||
or f"{attacker.uuid}#cmd{idx}"
|
||||
)
|
||||
event = TaggerEvent(
|
||||
source_kind="command",
|
||||
source_id=str(cmd_id),
|
||||
attacker_uuid=attacker.uuid,
|
||||
identity_uuid=getattr(attacker, "identity_id", None),
|
||||
session_id=cmd.get("session_id"),
|
||||
decky_id=cmd.get("decky_id") or cmd.get("decky"),
|
||||
payload={**cmd, "command_text": text},
|
||||
)
|
||||
tags = await tagger.tag(event)
|
||||
if tags:
|
||||
pending.extend(tags)
|
||||
if len(pending) >= batch_size:
|
||||
inserted += await _flush(repo, pending, dry_run)
|
||||
pending = []
|
||||
if pending:
|
||||
inserted += await _flush(repo, pending, dry_run)
|
||||
elapsed = time.monotonic() - started
|
||||
console.print(
|
||||
f"source=command rows={rows_seen} commands={cmds_seen} "
|
||||
f"inserted={inserted} dry_run={dry_run} elapsed_s={elapsed:.2f}"
|
||||
)
|
||||
|
||||
|
||||
async def _backfill_canaries(
|
||||
repo: Any,
|
||||
tagger: Any,
|
||||
*,
|
||||
cutoff: datetime,
|
||||
dry_run: bool,
|
||||
batch_size: int,
|
||||
) -> None:
|
||||
from decnet.ttp.base import TaggerEvent
|
||||
|
||||
started = time.monotonic()
|
||||
rows_seen = 0
|
||||
inserted = 0
|
||||
pending: list[Any] = []
|
||||
|
||||
async for trigger in repo.iter_canary_triggers_since(cutoff):
|
||||
rows_seen += 1
|
||||
event = TaggerEvent(
|
||||
source_kind="canary_fingerprint",
|
||||
source_id=trigger.uuid,
|
||||
attacker_uuid=trigger.attacker_id,
|
||||
identity_uuid=None,
|
||||
session_id=None,
|
||||
decky_id=None,
|
||||
payload={
|
||||
"token_uuid": trigger.token_uuid,
|
||||
"src_ip": trigger.src_ip,
|
||||
"ua_signature": trigger.user_agent or "",
|
||||
"user_agent": trigger.user_agent,
|
||||
"request_path": trigger.request_path,
|
||||
"dns_qname": trigger.dns_qname,
|
||||
"headers": trigger.headers(),
|
||||
},
|
||||
)
|
||||
tags = await tagger.tag(event)
|
||||
if tags:
|
||||
pending.extend(tags)
|
||||
if len(pending) >= batch_size:
|
||||
inserted += await _flush(repo, pending, dry_run)
|
||||
pending = []
|
||||
if pending:
|
||||
inserted += await _flush(repo, pending, dry_run)
|
||||
elapsed = time.monotonic() - started
|
||||
console.print(
|
||||
f"source=canary rows={rows_seen} inserted={inserted} "
|
||||
f"dry_run={dry_run} elapsed_s={elapsed:.2f}"
|
||||
)
|
||||
|
||||
|
||||
async def _flush(repo: Any, tags: list[Any], dry_run: bool) -> int:
|
||||
if dry_run:
|
||||
return 0
|
||||
return int(await repo.insert_tags(tags))
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import pathlib as _pathlib
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Shared CLI helpers: console, logger, process management, swarm HTTP client.
|
||||
|
||||
Submodules reference these as ``from . import utils`` then ``utils.foo(...)``
|
||||
@@ -12,7 +11,7 @@ import signal
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Optional
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
@@ -97,7 +96,7 @@ def _is_running(match_fn) -> int | None:
|
||||
return None
|
||||
|
||||
|
||||
def _service_registry(log_file: str) -> list[tuple[str, Callable[..., Any], list[str]]]:
|
||||
def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
|
||||
"""Return the microservice registry for health-check and relaunch.
|
||||
|
||||
On agents these run as systemd units invoking /usr/local/bin/decnet,
|
||||
@@ -135,46 +134,6 @@ def _service_registry(log_file: str) -> list[tuple[str, Callable[..., Any], list
|
||||
]
|
||||
|
||||
|
||||
def _systemd_units(pattern: str = "decnet-*.service") -> list[dict] | None:
|
||||
"""Return state of every systemd unit matching *pattern*, or ``None``
|
||||
when systemctl is unavailable (non-systemd host, container lab,
|
||||
PATH-stripped env, user-manager unreachable).
|
||||
|
||||
Output shape mirrors ``systemctl list-units --output=json``: each
|
||||
dict has ``unit``, ``load``, ``active``, ``sub``, ``description``.
|
||||
Empty list = systemd works but no matching units are loaded (fresh
|
||||
host that never ran ``decnet init``).
|
||||
"""
|
||||
import json # local import — avoids paying it on every CLI startup
|
||||
import shutil
|
||||
|
||||
if not shutil.which("systemctl"):
|
||||
return None
|
||||
try:
|
||||
proc = subprocess.run( # nosec B603 B607 — fixed argv, no shell
|
||||
[
|
||||
"systemctl", "list-units",
|
||||
"--type=service", "--all",
|
||||
"--no-legend", "--no-pager",
|
||||
"--output=json",
|
||||
pattern,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
check=False,
|
||||
)
|
||||
except (OSError, subprocess.SubprocessError):
|
||||
return None
|
||||
if proc.returncode != 0:
|
||||
return None
|
||||
try:
|
||||
data = json.loads(proc.stdout or "[]")
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
return data if isinstance(data, list) else None
|
||||
|
||||
|
||||
def _kill_all_services() -> None:
|
||||
"""Find and kill all running DECNET microservice processes."""
|
||||
registry = _service_registry(str(DECNET_INGEST_LOG_FILE))
|
||||
@@ -196,7 +155,7 @@ _DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770"
|
||||
|
||||
|
||||
def _swarmctl_base_url(url: Optional[str]) -> str:
|
||||
return url or os.environ.get("DECNET_SWARMCTL_URL") or _DEFAULT_SWARMCTL_URL
|
||||
return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL)
|
||||
|
||||
|
||||
def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
|
||||
|
||||
@@ -1,35 +1,18 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_WEB_HOST, DECNET_WEB_PORT
|
||||
from decnet.env import DECNET_API_PORT, DECNET_WEB_HOST, DECNET_WEB_PORT
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def _proxy_target(api_host: str) -> str:
|
||||
"""Resolve the host the web proxy should connect to.
|
||||
|
||||
The API binds at ``DECNET_API_HOST``; when that's a wildcard
|
||||
(``0.0.0.0`` / ``::``) we still connect over loopback because the
|
||||
web and API run in the same host. When the operator binds the API
|
||||
to a specific address (e.g. a Tailscale IP), the API is *only*
|
||||
reachable there — loopback is closed — so the proxy must follow.
|
||||
"""
|
||||
wildcard = {"0.0.0.0", "::", ""} # nosec B104 — comparison only
|
||||
if api_host in wildcard:
|
||||
return "127.0.0.1"
|
||||
return api_host
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="web")
|
||||
def serve_web(
|
||||
web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"),
|
||||
host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"),
|
||||
api_host: str = typer.Option(DECNET_API_HOST, "--api-host", help="Host the DECNET API is listening on (loopback for wildcard binds)"),
|
||||
api_port: int = typer.Option(DECNET_API_PORT, "--api-port", help="Port the DECNET API is listening on"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
@@ -50,13 +33,8 @@ def register(app: typer.Typer) -> None:
|
||||
console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
_api_target = _proxy_target(api_host)
|
||||
|
||||
if daemon:
|
||||
log.info(
|
||||
"web daemonizing host=%s port=%d api_target=%s:%d",
|
||||
host, web_port, _api_target, api_port,
|
||||
)
|
||||
log.info("web daemonizing host=%s port=%d api_port=%d", host, web_port, api_port)
|
||||
_utils._daemonize()
|
||||
|
||||
_api_port = api_port
|
||||
@@ -89,18 +67,6 @@ def register(app: typer.Typer) -> None:
|
||||
return
|
||||
self.send_error(405)
|
||||
|
||||
def do_PATCH(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("PATCH")
|
||||
return
|
||||
self.send_error(405)
|
||||
|
||||
def do_OPTIONS(self):
|
||||
if self.path.startswith("/api/"):
|
||||
self._proxy("OPTIONS")
|
||||
return
|
||||
self.send_error(405)
|
||||
|
||||
def _proxy(self, method: str) -> None:
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.rfile.read(content_length) if content_length else None
|
||||
@@ -109,7 +75,7 @@ def register(app: typer.Typer) -> None:
|
||||
if k.lower() not in ("host", "connection")}
|
||||
|
||||
try:
|
||||
conn = http.client.HTTPConnection(_api_target, _api_port, timeout=120)
|
||||
conn = http.client.HTTPConnection("127.0.0.1", _api_port, timeout=120)
|
||||
conn.request(method, self.path, body=body, headers=forward)
|
||||
resp = conn.getresponse()
|
||||
|
||||
@@ -147,7 +113,7 @@ def register(app: typer.Typer) -> None:
|
||||
socketserver.TCPServer.allow_reuse_address = True
|
||||
with socketserver.ThreadingTCPServer((host, web_port), SPAHTTPRequestHandler) as httpd:
|
||||
console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]")
|
||||
console.print(f"[dim]Proxying /api/* → http://{_api_target}:{_api_port}[/]")
|
||||
console.print(f"[dim]Proxying /api/* → http://127.0.0.1:{_api_port}[/]")
|
||||
try:
|
||||
httpd.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from . import utils as _utils
|
||||
from .utils import console, log
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command(name="webhook")
|
||||
def webhook_cmd(
|
||||
daemon: bool = typer.Option(
|
||||
False, "--daemon", "-d", help="Detach to background as a daemon process"
|
||||
),
|
||||
) -> None:
|
||||
"""Run the webhook dispatcher — bus consumer → external HTTP egress."""
|
||||
import asyncio
|
||||
from decnet.web.dependencies import repo
|
||||
from decnet.webhook import webhook_worker
|
||||
|
||||
if daemon:
|
||||
log.info("webhook daemonizing")
|
||||
_utils._daemonize()
|
||||
|
||||
log.info("webhook starting")
|
||||
console.print("[bold cyan]Webhook dispatcher starting[/]")
|
||||
|
||||
async def _run() -> None:
|
||||
await repo.initialize()
|
||||
await webhook_worker(repo)
|
||||
|
||||
try:
|
||||
asyncio.run(_run())
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Webhook worker stopped.[/]")
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user