feat(swarm): unbundle master-only code from agent tarball + sync systemd units on update

Agents now ship with collector/prober/sniffer as systemd services; mutator,
profiler, web, and API stay master-only (profiler rebuilds attacker profiles
against the master DB — no per-host DB exists). Expand _EXCLUDES to drop the
full decnet/web, decnet/mutator, decnet/profiler, and decnet_web trees from
the enrollment bundle.

Updater now calls _heal_path_symlink + _sync_systemd_units after rotation so
fleets pick up new unit files and /usr/local/bin/decnet tracks the shared venv
without a manual reinstall. daemon-reload runs once per update when any unit
changed.

Fix _service_registry matchers to accept systemd-style /usr/local/bin/decnet
cmdlines (psutil returns a list — join to string before substring-checking)
so agent-mode `decnet status` reports collector/prober/sniffer correctly.
This commit is contained in:
2026-04-19 19:19:17 -04:00
parent d2cf1e8b3a
commit 2bef3edb72
8 changed files with 56 additions and 169 deletions

View File

@@ -1163,30 +1163,45 @@ def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
import sys
_py = sys.executable
# On agents these run as systemd units invoking /usr/local/bin/decnet,
# which doesn't include "decnet.cli" in its cmdline. On master dev boxes
# they're launched via `python -m decnet.cli`. Match either form — cmd
# is a list of argv tokens, so substring-check each token.
def _matches(sub: str, extras: tuple[str, ...] = ()):
def _check(cmd) -> bool:
joined = " ".join(cmd) if not isinstance(cmd, str) else cmd
if "decnet" not in joined:
return False
if sub not in joined:
return False
return all(e in joined for e in extras)
return _check
return [
(
"Collector",
lambda cmd: "decnet.cli" in cmd and "collect" in cmd,
_matches("collect"),
[_py, "-m", "decnet.cli", "collect", "--daemon", "--log-file", log_file],
),
(
"Mutator",
lambda cmd: "decnet.cli" in cmd and "mutate" in cmd and "--watch" in cmd,
_matches("mutate", ("--watch",)),
[_py, "-m", "decnet.cli", "mutate", "--daemon", "--watch"],
),
(
"Prober",
lambda cmd: "decnet.cli" in cmd and "probe" in cmd,
_matches("probe"),
[_py, "-m", "decnet.cli", "probe", "--daemon", "--log-file", log_file],
),
(
"Profiler",
lambda cmd: "decnet.cli" in cmd and "profiler" in cmd,
_matches("profiler"),
[_py, "-m", "decnet.cli", "profiler", "--daemon"],
),
(
"Sniffer",
lambda cmd: "decnet.cli" in cmd and "sniffer" in cmd,
_matches("sniffer"),
[_py, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", log_file],
),
(
@@ -1323,11 +1338,11 @@ def status() -> None:
_status()
registry = _service_registry(str(DECNET_INGEST_LOG_FILE))
# On agents, the Mutator runs master-side only (it schedules decky
# respawns across the swarm) and the API is never shipped. Hide those
# rows so operators aren't chasing permanent DOWN entries.
# On agents, Mutator + Profiler are master-only (they need the master
# DB and orchestrate across the swarm), and the API is never shipped.
# Hide those rows so operators aren't chasing permanent DOWN entries.
if _agent_mode_active():
registry = [r for r in registry if r[0] not in {"Mutator", "API"}]
registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}]
svc_table = Table(title="DECNET Services", show_lines=True)
svc_table.add_column("Service", style="bold cyan")
svc_table.add_column("Status")
@@ -1767,15 +1782,16 @@ def db_reset(
# MASTER_ONLY when touching command registration.
#
# Worker-legitimate commands (NOT in these sets): agent, updater, forwarder,
# status, collect, probe, profiler, sniffer. Agents run deckies locally and
# should be able to inspect them + run the per-host microservices (collector
# streams container logs, prober/profiler characterize attackers hitting
# this host, sniffer captures traffic). Mutator stays master-only because
# it orchestrates respawns across the swarm.
# status, collect, probe, sniffer. Agents run deckies locally and should be
# able to inspect them + run the per-host microservices (collector streams
# container logs, prober characterizes attackers hitting this host, sniffer
# captures traffic). Mutator and Profiler stay master-only: the mutator
# orchestrates respawns across the swarm; the profiler rebuilds attacker
# profiles against the master DB (no per-host DB exists).
# ───────────────────────────────────────────────────────────────────────────
MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
"api", "swarmctl", "deploy", "redeploy", "teardown",
"mutate", "listener",
"mutate", "listener", "profiler",
"services", "distros", "correlate", "archetypes", "web",
"db-reset",
})

View File

@@ -243,7 +243,7 @@ UPDATER_SYSTEMD_UNIT = "decnet-updater.service"
# without these units installed shouldn't abort the update.
AUXILIARY_SYSTEMD_UNITS = (
"decnet-collector.service", "decnet-prober.service",
"decnet-profiler.service", "decnet-sniffer.service",
"decnet-sniffer.service",
)

View File

@@ -63,19 +63,15 @@ _EXCLUDES: tuple[str, ...] = (
"wiki-checkout", "wiki-checkout/*",
# Frontend is master-only; agents never serve UI.
"decnet_web", "decnet_web/*", "decnet_web/**",
# Master API surface. Agents ship with decnet.web.db + auth + dependencies
# (the profiler microservice needs the repo singleton), but the FastAPI
# app itself (api.py, swarm_api.py, the full router tree, the ingester,
# and the .j2 templates that the master renders into the tarball) has no
# business running on a worker.
"decnet/web/api.py",
"decnet/web/swarm_api.py",
"decnet/web/ingester.py",
"decnet/web/router", "decnet/web/router/*", "decnet/web/router/**",
"decnet/web/templates", "decnet/web/templates/*", "decnet/web/templates/**",
# Mutator is master-only (it schedules decky respawns across the swarm);
# agents never invoke it. Keep it off the worker.
# Master FastAPI app and everything under decnet/web/ — no agent-side
# code imports it. The agent/updater/forwarder/collector/prober/sniffer
# entrypoints are all under decnet/agent, decnet/updater, decnet/swarm,
# decnet/collector, decnet/prober, decnet/sniffer.
"decnet/web", "decnet/web/*", "decnet/web/**",
# Mutator + Profiler are master-only (mutator schedules respawns across
# the swarm; profiler rebuilds attacker profiles against the master DB).
"decnet/mutator", "decnet/mutator/*", "decnet/mutator/**",
"decnet/profiler", "decnet/profiler/*", "decnet/profiler/**",
"decnet-state.json",
"master.log", "master.json",
"decnet.tar",
@@ -265,8 +261,10 @@ def _build_tarball(
_SYSTEMD_UNITS = (
"decnet-agent", "decnet-forwarder", "decnet-engine", "decnet-updater",
# Per-host microservices — activated by enroll_bootstrap.sh.
"decnet-collector", "decnet-prober", "decnet-profiler", "decnet-sniffer",
# Per-host microservices — activated by enroll_bootstrap.sh. The
# profiler intentionally stays master-side: it rebuilds attacker
# profiles against the master DB, which workers don't share.
"decnet-collector", "decnet-prober", "decnet-sniffer",
)

View File

@@ -1,20 +0,0 @@
[Unit]
Description=DECNET attacker profiler — {{ agent_name }}
Documentation=https://github.com/anti/DECNET
After=network-online.target decnet-agent.service
Wants=network-online.target
PartOf=decnet-agent.service
[Service]
Type=simple
WorkingDirectory=/opt/decnet
Environment=DECNET_MODE=agent
Environment=DECNET_SYSTEM_LOGS=/var/log/decnet/decnet.profiler.log
ExecStart=/usr/local/bin/decnet profiler --interval 30
Restart=on-failure
RestartSec=5
StandardOutput=append:/var/log/decnet/decnet.profiler.log
StandardError=append:/var/log/decnet/decnet.profiler.log
[Install]
WantedBy=multi-user.target

View File

@@ -62,7 +62,7 @@ ln -sf "$VENV_DIR/bin/decnet" /usr/local/bin/decnet
echo "[DECNET] installing systemd units..."
for unit in \
decnet-agent decnet-forwarder decnet-engine \
decnet-collector decnet-prober decnet-profiler decnet-sniffer; do
decnet-collector decnet-prober decnet-sniffer; do
install -Dm0644 "etc/systemd/system/${unit}.service" "/etc/systemd/system/${unit}.service"
done
if [[ "$WITH_UPDATER" == "true" ]]; then
@@ -76,7 +76,7 @@ systemctl daemon-reload
ACTIVE_UNITS=(
decnet-agent.service decnet-forwarder.service
decnet-collector.service decnet-prober.service
decnet-profiler.service decnet-sniffer.service
decnet-sniffer.service
)
if [[ "$WITH_UPDATER" == "true" ]]; then
ACTIVE_UNITS+=(decnet-updater.service)