diff --git a/decnet/cli.py b/decnet/cli.py index f513c8a..cfae475 100644 --- a/decnet/cli.py +++ b/decnet/cli.py @@ -1323,6 +1323,11 @@ def status() -> None: _status() registry = _service_registry(str(DECNET_INGEST_LOG_FILE)) + # On agents, the Mutator runs master-side only (it schedules decky + # respawns across the swarm) and the API is never shipped. Hide those + # rows so operators aren't chasing permanent DOWN entries. + if _agent_mode_active(): + registry = [r for r in registry if r[0] not in {"Mutator", "API"}] svc_table = Table(title="DECNET Services", show_lines=True) svc_table.add_column("Service", style="bold cyan") svc_table.add_column("Status") @@ -1762,13 +1767,17 @@ def db_reset( # MASTER_ONLY when touching command registration. # # Worker-legitimate commands (NOT in these sets): agent, updater, forwarder, -# status (agents run deckies locally and should be able to inspect them). +# status, collect, probe, profiler, sniffer. Agents run deckies locally and +# should be able to inspect them + run the per-host microservices (collector +# streams container logs, prober/profiler characterize attackers hitting +# this host, sniffer captures traffic). Mutator stays master-only because +# it orchestrates respawns across the swarm. # ─────────────────────────────────────────────────────────────────────────── MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({ "api", "swarmctl", "deploy", "redeploy", "teardown", - "probe", "collect", "mutate", "listener", + "mutate", "listener", "services", "distros", "correlate", "archetypes", "web", - "profiler", "sniffer", "db-reset", + "db-reset", }) MASTER_ONLY_GROUPS: frozenset[str] = frozenset({"swarm"}) diff --git a/decnet/updater/executor.py b/decnet/updater/executor.py index 45edb72..1ac7ea0 100644 --- a/decnet/updater/executor.py +++ b/decnet/updater/executor.py @@ -237,6 +237,14 @@ def _run_pip( AGENT_SYSTEMD_UNIT = "decnet-agent.service" FORWARDER_SYSTEMD_UNIT = "decnet-forwarder.service" UPDATER_SYSTEMD_UNIT = "decnet-updater.service" +# Per-host microservices that run out of the same /opt/decnet tree. An +# update replaces their code, so we must cycle them alongside the agent or +# they keep serving the pre-update image. Best-effort: legacy enrollments +# without these units installed shouldn't abort the update. +AUXILIARY_SYSTEMD_UNITS = ( + "decnet-collector.service", "decnet-prober.service", + "decnet-profiler.service", "decnet-sniffer.service", +) def _systemd_available() -> bool: @@ -286,6 +294,13 @@ def _spawn_agent_via_systemd(install_dir: pathlib.Path) -> int: ) if fwd.returncode != 0: log.warning("forwarder restart failed (ignored): %s", fwd.stderr.strip()) + for unit in AUXILIARY_SYSTEMD_UNITS: + aux = subprocess.run( # nosec B603 B607 + ["systemctl", "restart", unit], + check=False, capture_output=True, text=True, + ) + if aux.returncode != 0: + log.warning("%s restart failed (ignored): %s", unit, aux.stderr.strip()) pid_out = subprocess.run( # nosec B603 B607 ["systemctl", "show", "--property=MainPID", "--value", AGENT_SYSTEMD_UNIT], check=True, capture_output=True, text=True, diff --git a/decnet/web/router/swarm_mgmt/api_enroll_bundle.py b/decnet/web/router/swarm_mgmt/api_enroll_bundle.py index 15c3f34..8d44868 100644 --- a/decnet/web/router/swarm_mgmt/api_enroll_bundle.py +++ b/decnet/web/router/swarm_mgmt/api_enroll_bundle.py @@ -63,10 +63,19 @@ _EXCLUDES: tuple[str, ...] = ( "wiki-checkout", "wiki-checkout/*", # Frontend is master-only; agents never serve UI. "decnet_web", "decnet_web/*", "decnet_web/**", - # Master FastAPI app (API, routers, master-side DB) is not run on agents. - # The `agent` / `updater` / `forwarder` commands have their own apps under - # decnet/agent, decnet/updater — they don't import decnet.web. - "decnet/web", "decnet/web/*", "decnet/web/**", + # Master API surface. Agents ship with decnet.web.db + auth + dependencies + # (the profiler microservice needs the repo singleton), but the FastAPI + # app itself (api.py, swarm_api.py, the full router tree, the ingester, + # and the .j2 templates that the master renders into the tarball) has no + # business running on a worker. + "decnet/web/api.py", + "decnet/web/swarm_api.py", + "decnet/web/ingester.py", + "decnet/web/router", "decnet/web/router/*", "decnet/web/router/**", + "decnet/web/templates", "decnet/web/templates/*", "decnet/web/templates/**", + # Mutator is master-only (it schedules decky respawns across the swarm); + # agents never invoke it. Keep it off the worker. + "decnet/mutator", "decnet/mutator/*", "decnet/mutator/**", "decnet-state.json", "master.log", "master.json", "decnet.tar", @@ -254,7 +263,11 @@ def _build_tarball( return buf.getvalue() -_SYSTEMD_UNITS = ("decnet-agent", "decnet-forwarder", "decnet-engine", "decnet-updater") +_SYSTEMD_UNITS = ( + "decnet-agent", "decnet-forwarder", "decnet-engine", "decnet-updater", + # Per-host microservices — activated by enroll_bootstrap.sh. + "decnet-collector", "decnet-prober", "decnet-profiler", "decnet-sniffer", +) def _render_systemd_unit(name: str, agent_name: str, master_host: str) -> bytes: diff --git a/decnet/web/templates/decnet-collector.service.j2 b/decnet/web/templates/decnet-collector.service.j2 new file mode 100644 index 0000000..3137bfd --- /dev/null +++ b/decnet/web/templates/decnet-collector.service.j2 @@ -0,0 +1,20 @@ +[Unit] +Description=DECNET container log collector — {{ agent_name }} +Documentation=https://github.com/anti/DECNET +After=network-online.target decnet-agent.service +Wants=network-online.target +PartOf=decnet-agent.service + +[Service] +Type=simple +WorkingDirectory=/opt/decnet +Environment=DECNET_MODE=agent +Environment=DECNET_SYSTEM_LOGS=/var/log/decnet/decnet.collector.log +ExecStart=/usr/local/bin/decnet collect --log-file /var/log/decnet/decnet.log +Restart=on-failure +RestartSec=5 +StandardOutput=append:/var/log/decnet/decnet.collector.log +StandardError=append:/var/log/decnet/decnet.collector.log + +[Install] +WantedBy=multi-user.target diff --git a/decnet/web/templates/decnet-prober.service.j2 b/decnet/web/templates/decnet-prober.service.j2 new file mode 100644 index 0000000..209851e --- /dev/null +++ b/decnet/web/templates/decnet-prober.service.j2 @@ -0,0 +1,20 @@ +[Unit] +Description=DECNET attacker prober (JARM/HASSH/TCP fingerprint) — {{ agent_name }} +Documentation=https://github.com/anti/DECNET +After=network-online.target decnet-agent.service +Wants=network-online.target +PartOf=decnet-agent.service + +[Service] +Type=simple +WorkingDirectory=/opt/decnet +Environment=DECNET_MODE=agent +Environment=DECNET_SYSTEM_LOGS=/var/log/decnet/decnet.prober.log +ExecStart=/usr/local/bin/decnet probe --log-file /var/log/decnet/decnet.log --interval 300 +Restart=on-failure +RestartSec=5 +StandardOutput=append:/var/log/decnet/decnet.prober.log +StandardError=append:/var/log/decnet/decnet.prober.log + +[Install] +WantedBy=multi-user.target diff --git a/decnet/web/templates/decnet-profiler.service.j2 b/decnet/web/templates/decnet-profiler.service.j2 new file mode 100644 index 0000000..b4691fc --- /dev/null +++ b/decnet/web/templates/decnet-profiler.service.j2 @@ -0,0 +1,20 @@ +[Unit] +Description=DECNET attacker profiler — {{ agent_name }} +Documentation=https://github.com/anti/DECNET +After=network-online.target decnet-agent.service +Wants=network-online.target +PartOf=decnet-agent.service + +[Service] +Type=simple +WorkingDirectory=/opt/decnet +Environment=DECNET_MODE=agent +Environment=DECNET_SYSTEM_LOGS=/var/log/decnet/decnet.profiler.log +ExecStart=/usr/local/bin/decnet profiler --interval 30 +Restart=on-failure +RestartSec=5 +StandardOutput=append:/var/log/decnet/decnet.profiler.log +StandardError=append:/var/log/decnet/decnet.profiler.log + +[Install] +WantedBy=multi-user.target diff --git a/decnet/web/templates/decnet-sniffer.service.j2 b/decnet/web/templates/decnet-sniffer.service.j2 new file mode 100644 index 0000000..360a3ac --- /dev/null +++ b/decnet/web/templates/decnet-sniffer.service.j2 @@ -0,0 +1,24 @@ +[Unit] +Description=DECNET network sniffer — {{ agent_name }} +Documentation=https://github.com/anti/DECNET +After=network-online.target decnet-agent.service +Wants=network-online.target +PartOf=decnet-agent.service + +[Service] +Type=simple +WorkingDirectory=/opt/decnet +Environment=DECNET_MODE=agent +Environment=DECNET_SYSTEM_LOGS=/var/log/decnet/decnet.sniffer.log +# scapy needs raw sockets; forwarder already runs with these caps, so we +# mirror the same ambient set here. +AmbientCapabilities=CAP_NET_ADMIN CAP_NET_RAW +CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_RAW +ExecStart=/usr/local/bin/decnet sniffer --log-file /var/log/decnet/decnet.log +Restart=on-failure +RestartSec=5 +StandardOutput=append:/var/log/decnet/decnet.sniffer.log +StandardError=append:/var/log/decnet/decnet.sniffer.log + +[Install] +WantedBy=multi-user.target diff --git a/decnet/web/templates/enroll_bootstrap.sh.j2 b/decnet/web/templates/enroll_bootstrap.sh.j2 index 60ca616..9db709c 100644 --- a/decnet/web/templates/enroll_bootstrap.sh.j2 +++ b/decnet/web/templates/enroll_bootstrap.sh.j2 @@ -60,15 +60,24 @@ chmod 0755 "$VENV_DIR/bin/decnet" ln -sf "$VENV_DIR/bin/decnet" /usr/local/bin/decnet echo "[DECNET] installing systemd units..." -install -Dm0644 etc/systemd/system/decnet-agent.service /etc/systemd/system/decnet-agent.service -install -Dm0644 etc/systemd/system/decnet-forwarder.service /etc/systemd/system/decnet-forwarder.service -install -Dm0644 etc/systemd/system/decnet-engine.service /etc/systemd/system/decnet-engine.service +for unit in \ + decnet-agent decnet-forwarder decnet-engine \ + decnet-collector decnet-prober decnet-profiler decnet-sniffer; do + install -Dm0644 "etc/systemd/system/${unit}.service" "/etc/systemd/system/${unit}.service" +done if [[ "$WITH_UPDATER" == "true" ]]; then install -Dm0644 etc/systemd/system/decnet-updater.service /etc/systemd/system/decnet-updater.service fi systemctl daemon-reload -ACTIVE_UNITS=(decnet-agent.service decnet-forwarder.service) +# Agent + forwarder are the control plane; collector/prober/profiler/sniffer +# are the per-host microservices that used to require `decnet deploy` to +# auto-spawn. With systemd units they come up at boot and auto-restart. +ACTIVE_UNITS=( + decnet-agent.service decnet-forwarder.service + decnet-collector.service decnet-prober.service + decnet-profiler.service decnet-sniffer.service +) if [[ "$WITH_UPDATER" == "true" ]]; then ACTIVE_UNITS+=(decnet-updater.service) fi diff --git a/tests/api/swarm_mgmt/test_enroll_bundle.py b/tests/api/swarm_mgmt/test_enroll_bundle.py index d3a085f..c9971e7 100644 --- a/tests/api/swarm_mgmt/test_enroll_bundle.py +++ b/tests/api/swarm_mgmt/test_enroll_bundle.py @@ -184,6 +184,10 @@ async def test_systemd_units_shipped_and_installed(client, auth_token): assert "etc/systemd/system/decnet-agent.service" in names assert "etc/systemd/system/decnet-forwarder.service" in names assert "etc/systemd/system/decnet-engine.service" in names + # Per-host microservices get their own systemd units now. + for unit in ("decnet-collector", "decnet-prober", + "decnet-profiler", "decnet-sniffer"): + assert f"etc/systemd/system/{unit}.service" in names, unit fwd = tf.extractfile("etc/systemd/system/decnet-forwarder.service").read().decode() assert "--master-host 10.9.8.7" in fwd @@ -197,8 +201,14 @@ async def test_systemd_units_shipped_and_installed(client, auth_token): master_host="10.9.8.7")).json()["token"] sh = (await client.get(f"/api/v1/swarm/enroll-bundle/{sh_token}.sh")).text assert "systemctl daemon-reload" in sh - # Agent + forwarder always enabled; updater conditional on WITH_UPDATER. - assert "decnet-agent.service decnet-forwarder.service" in sh + # Agent + forwarder + per-host microservices always enabled; updater + # conditional on WITH_UPDATER. + for unit in ( + "decnet-agent.service", "decnet-forwarder.service", + "decnet-collector.service", "decnet-prober.service", + "decnet-profiler.service", "decnet-sniffer.service", + ): + assert unit in sh, unit assert "decnet-updater.service" in sh ini = tf.extractfile("etc/decnet/decnet.ini").read().decode() @@ -299,9 +309,16 @@ async def test_get_tgz_contents(client, auth_token, tmp_path): assert ".env.example" not in bad, f"leaked env file: {bad}" # Master-only trees: agents don't run the FastAPI master app or the # React frontend, so shipping them bloats the tarball and widens the - # worker's attack surface for no benefit. + # worker's attack surface for no benefit. decnet/web/db and + # decnet/web/dependencies.py DO ship — the profiler microservice on + # the agent needs the repo singleton. assert not bad.startswith("decnet_web/"), f"leaked frontend: {bad}" - assert not bad.startswith("decnet/web/"), f"leaked master-api: {bad}" + assert bad != "decnet/web/api.py", f"leaked master API: {bad}" + assert bad != "decnet/web/swarm_api.py", f"leaked swarm API: {bad}" + assert bad != "decnet/web/ingester.py", f"leaked ingester: {bad}" + assert not bad.startswith("decnet/web/router/"), f"leaked router: {bad}" + assert not bad.startswith("decnet/web/templates/"), f"leaked tpl: {bad}" + assert not bad.startswith("decnet/mutator/"), f"leaked mutator: {bad}" # INI content is correct ini = tf.extractfile("etc/decnet/decnet.ini").read().decode() diff --git a/tests/updater/test_updater_executor.py b/tests/updater/test_updater_executor.py index eff4835..9e001e3 100644 --- a/tests/updater/test_updater_executor.py +++ b/tests/updater/test_updater_executor.py @@ -419,11 +419,17 @@ def test_spawn_agent_via_systemd_records_main_pid( pid = ex._spawn_agent_via_systemd(install_dir) assert pid == 4711 assert (install_dir / "agent.pid").read_text() == "4711" - # Agent restart, forwarder restart, then MainPID lookup on the agent. + # Agent restart, forwarder restart, each aux microservice, then the + # MainPID lookup on the agent. assert calls[0] == ["systemctl", "restart", ex.AGENT_SYSTEMD_UNIT] assert calls[1] == ["systemctl", "restart", ex.FORWARDER_SYSTEMD_UNIT] - assert calls[2][:2] == ["systemctl", "show"] - assert ex.AGENT_SYSTEMD_UNIT in calls[2] + aux_calls = calls[2 : 2 + len(ex.AUXILIARY_SYSTEMD_UNITS)] + assert aux_calls == [ + ["systemctl", "restart", unit] for unit in ex.AUXILIARY_SYSTEMD_UNITS + ] + show_call = calls[2 + len(ex.AUXILIARY_SYSTEMD_UNITS)] + assert show_call[:2] == ["systemctl", "show"] + assert ex.AGENT_SYSTEMD_UNIT in show_call def test_spawn_agent_via_systemd_tolerates_missing_forwarder_unit(