From f5a5fec607634c150ba1ac9f295620e90e5c88f4 Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 19 Apr 2026 00:44:06 -0400 Subject: [PATCH] feat(deploy): systemd units w/ capability-based hardening; updater restarts agent via systemctl Add deploy/ unit files for every DECNET daemon (agent, updater, api, web, swarmctl, listener, forwarder). All run as User=decnet with NoNewPrivileges, ProtectSystem, PrivateTmp, LockPersonality; AmbientCapabilities=CAP_NET_ADMIN CAP_NET_RAW only on the agent (MACVLAN/scapy). Existing api/web units migrated to /opt/decnet layout and the same hardening stanza. Make the updater's _spawn_agent systemd-aware: under systemd (detected via INVOCATION_ID + systemctl on PATH), `systemctl restart decnet-agent.service` replaces the Popen path so the new agent inherits the unit's ambient caps instead of the updater's empty set. _stop_agent becomes a no-op in that mode to avoid racing systemctl's own stop phase. Tests cover the dispatcher branch selection, MainPID parsing, and the systemd no-op stop. --- decnet/updater/executor.py | 57 ++++++++++++++++++-- deploy/decnet-agent.service | 41 ++++++++++++++ deploy/decnet-api.service | 24 ++++++--- deploy/decnet-forwarder.service | 46 ++++++++++++++++ deploy/decnet-listener.service | 43 +++++++++++++++ deploy/decnet-swarmctl.service | 40 ++++++++++++++ deploy/decnet-updater.service | 49 +++++++++++++++++ deploy/decnet-web.service | 26 +++++---- tests/updater/test_updater_executor.py | 74 ++++++++++++++++++++++++++ 9 files changed, 381 insertions(+), 19 deletions(-) create mode 100644 deploy/decnet-agent.service create mode 100644 deploy/decnet-forwarder.service create mode 100644 deploy/decnet-listener.service create mode 100644 deploy/decnet-swarmctl.service create mode 100644 deploy/decnet-updater.service diff --git a/decnet/updater/executor.py b/decnet/updater/executor.py index 7eddca5..067b0a6 100644 --- a/decnet/updater/executor.py +++ b/decnet/updater/executor.py @@ -208,11 +208,56 @@ def _run_pip( ) -def _spawn_agent(install_dir: pathlib.Path) -> int: - """Launch ``decnet agent --daemon`` using the current-symlinked venv. +AGENT_SYSTEMD_UNIT = "decnet-agent.service" - Returns the new PID. Monkeypatched in tests. + +def _systemd_available() -> bool: + """True when we're running under systemd and have systemctl on PATH. + + Detection is conservative: we only return True if *both* the invocation + marker is set (``INVOCATION_ID`` is exported by systemd for every unit) + and ``systemctl`` is resolvable. The env var alone can be forged; the + binary alone can exist on hosts running other init systems. """ + if not os.environ.get("INVOCATION_ID"): + return False + from shutil import which + return which("systemctl") is not None + + +def _spawn_agent(install_dir: pathlib.Path) -> int: + """Launch the agent and return its PID. + + Under systemd, restart ``decnet-agent.service`` via ``systemctl`` so the + new process inherits the unit's ambient capabilities (CAP_NET_ADMIN, + CAP_NET_RAW). Spawning with ``subprocess.Popen`` from inside the updater + unit would make the agent a child of the updater and therefore a member + of the updater's (empty) capability set — it would come up without the + caps needed to run MACVLAN/scapy. + + Off systemd (dev boxes, manual starts), fall back to a direct Popen. + """ + if _systemd_available(): + return _spawn_agent_via_systemd(install_dir) + return _spawn_agent_via_popen(install_dir) + + +def _spawn_agent_via_systemd(install_dir: pathlib.Path) -> int: + subprocess.run( # nosec B603 B607 + ["systemctl", "restart", AGENT_SYSTEMD_UNIT], + check=True, capture_output=True, text=True, + ) + pid_out = subprocess.run( # nosec B603 B607 + ["systemctl", "show", "--property=MainPID", "--value", AGENT_SYSTEMD_UNIT], + check=True, capture_output=True, text=True, + ) + pid = int(pid_out.stdout.strip() or "0") + if pid: + _pid_file(install_dir).write_text(str(pid)) + return pid + + +def _spawn_agent_via_popen(install_dir: pathlib.Path) -> int: decnet_bin = _shared_venv(install_dir) / "bin" / "decnet" log_path = install_dir / "agent.spawn.log" # cwd=install_dir so a persistent ``/.env.local`` gets @@ -267,7 +312,13 @@ def _stop_agent(install_dir: pathlib.Path, grace: float = AGENT_RESTART_GRACE_S) Prefers the PID recorded in ``agent.pid`` (processes we spawned) but falls back to scanning /proc for any ``decnet agent`` so manually-started agents are also restarted cleanly during an update. + + Under systemd, stop is a no-op — ``_spawn_agent`` issues a single + ``systemctl restart`` that handles stop and start atomically. Pre-stopping + would only race the restart's own stop phase. """ + if _systemd_available(): + return pids: list[int] = [] pid_file = _pid_file(install_dir) if pid_file.is_file(): diff --git a/deploy/decnet-agent.service b/deploy/decnet-agent.service new file mode 100644 index 0000000..1657932 --- /dev/null +++ b/deploy/decnet-agent.service @@ -0,0 +1,41 @@ +[Unit] +Description=DECNET Worker Agent (mTLS) +Documentation=https://github.com/4nt11/DECNET/wiki/SWARM-Mode +After=network-online.target docker.service +Wants=network-online.target +Requires=docker.service + +[Service] +Type=simple +User=decnet +Group=decnet +# docker.sock is group-readable by 'docker'; the agent needs it for compose. +SupplementaryGroups=docker +WorkingDirectory=/opt/decnet +EnvironmentFile=-/opt/decnet/.env.local +ExecStart=/opt/decnet/venv/bin/decnet agent --host 0.0.0.0 --port 8765 --agent-dir /etc/decnet/agent + +# MACVLAN/IPVLAN management + scapy raw sockets. Granted via ambient caps so +# the process starts unprivileged and keeps only these two bits. +CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_RAW +AmbientCapabilities=CAP_NET_ADMIN CAP_NET_RAW + +# Security Hardening +NoNewPrivileges=yes +ProtectSystem=full +ProtectHome=read-only +PrivateTmp=yes +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictSUIDSGID=yes +LockPersonality=yes +# /opt/decnet holds release slots + state; the agent reads them and writes its PID. +ReadWritePaths=/opt/decnet /var/log/decnet + +Restart=on-failure +RestartSec=5 +TimeoutStopSec=15 + +[Install] +WantedBy=multi-user.target diff --git a/deploy/decnet-api.service b/deploy/decnet-api.service index c4a504a..e7b253d 100644 --- a/deploy/decnet-api.service +++ b/deploy/decnet-api.service @@ -1,19 +1,21 @@ [Unit] Description=DECNET API Service -After=network.target docker.service +Documentation=https://github.com/4nt11/DECNET/wiki/REST-API-Reference +After=network-online.target docker.service +Wants=network-online.target Requires=docker.service [Service] Type=simple User=decnet Group=decnet -WorkingDirectory=/path/to/DECNET -# Ensure environment is loaded from the .env file -EnvironmentFile=/path/to/DECNET/.env -# Use the virtualenv python to run the decnet api command -ExecStart=/path/to/DECNET/.venv/bin/decnet api +# docker.sock is group-readable by 'docker'; the API ingester tails container logs. +SupplementaryGroups=docker +WorkingDirectory=/opt/decnet +EnvironmentFile=-/opt/decnet/.env.local +ExecStart=/opt/decnet/venv/bin/decnet api -# Capabilities required to manage MACVLAN interfaces and network links without root +# MACVLAN/IPVLAN setup runs from the API lifespan when the embedded sniffer is on. CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_RAW AmbientCapabilities=CAP_NET_ADMIN CAP_NET_RAW @@ -21,9 +23,17 @@ AmbientCapabilities=CAP_NET_ADMIN CAP_NET_RAW NoNewPrivileges=yes ProtectSystem=full ProtectHome=read-only +PrivateTmp=yes +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictSUIDSGID=yes +LockPersonality=yes +ReadWritePaths=/opt/decnet /var/log/decnet Restart=on-failure RestartSec=5 +TimeoutStopSec=15 [Install] WantedBy=multi-user.target diff --git a/deploy/decnet-forwarder.service b/deploy/decnet-forwarder.service new file mode 100644 index 0000000..35a6d36 --- /dev/null +++ b/deploy/decnet-forwarder.service @@ -0,0 +1,46 @@ +[Unit] +Description=DECNET Syslog-over-TLS Forwarder (worker, RFC 5425) +Documentation=https://github.com/4nt11/DECNET/wiki/Logging-and-Syslog +After=network-online.target +Wants=network-online.target +# The forwarder can run independently of the agent — it only needs the local +# log file to exist and the master to be reachable. + +[Service] +Type=simple +User=decnet +Group=decnet +WorkingDirectory=/opt/decnet +EnvironmentFile=-/opt/decnet/.env.local +# Replace with the master's LAN address or hostname. The agent +# cert bundle at /etc/decnet/agent is reused — the forwarder presents the same +# worker identity when it connects to the master's listener. +ExecStart=/opt/decnet/venv/bin/decnet forwarder \ + --log-file /var/log/decnet/decnet.log \ + --master-host ${DECNET_SWARM_MASTER_HOST} \ + --master-port 6514 \ + --agent-dir /etc/decnet/agent + +# TLS client connection; no special capabilities. +CapabilityBoundingSet= +AmbientCapabilities= + +# Security Hardening +NoNewPrivileges=yes +ProtectSystem=full +ProtectHome=read-only +PrivateTmp=yes +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictSUIDSGID=yes +LockPersonality=yes +# Reads the tailed log; writes a small byte-offset state file alongside it. +ReadWritePaths=/var/log/decnet +ReadOnlyPaths=/etc/decnet + +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target diff --git a/deploy/decnet-listener.service b/deploy/decnet-listener.service new file mode 100644 index 0000000..db43db6 --- /dev/null +++ b/deploy/decnet-listener.service @@ -0,0 +1,43 @@ +[Unit] +Description=DECNET Syslog-over-TLS Listener (master, RFC 5425) +Documentation=https://github.com/4nt11/DECNET/wiki/Logging-and-Syslog +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=decnet +Group=decnet +WorkingDirectory=/opt/decnet +EnvironmentFile=-/opt/decnet/.env.local +# Binds 0.0.0.0:6514 so workers across the LAN can connect. 6514 is not a +# privileged port (≥1024), so no CAP_NET_BIND_SERVICE is required. +ExecStart=/opt/decnet/venv/bin/decnet listener \ + --host 0.0.0.0 --port 6514 \ + --ca-dir /etc/decnet/ca \ + --log-path /var/log/decnet/master.log \ + --json-path /var/log/decnet/master.json + +# Pure TLS server; no privileged network operations. +CapabilityBoundingSet= +AmbientCapabilities= + +# Security Hardening +NoNewPrivileges=yes +ProtectSystem=full +ProtectHome=read-only +PrivateTmp=yes +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictSUIDSGID=yes +LockPersonality=yes +# Writes forensic .log + parsed .json sinks; CA bundle is read-only. +ReadWritePaths=/var/log/decnet +ReadOnlyPaths=/etc/decnet + +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target diff --git a/deploy/decnet-swarmctl.service b/deploy/decnet-swarmctl.service new file mode 100644 index 0000000..bda6d60 --- /dev/null +++ b/deploy/decnet-swarmctl.service @@ -0,0 +1,40 @@ +[Unit] +Description=DECNET Swarm Controller (master) +Documentation=https://github.com/4nt11/DECNET/wiki/SWARM-Mode +After=network-online.target decnet-api.service +Wants=network-online.target + +[Service] +Type=simple +User=decnet +Group=decnet +WorkingDirectory=/opt/decnet +EnvironmentFile=-/opt/decnet/.env.local +# Default bind is loopback — the controller is a master-local orchestrator +# reached by the CLI and the web dashboard, not by workers. +ExecStart=/opt/decnet/venv/bin/decnet swarmctl --host 127.0.0.1 --port 8770 + +# No special capabilities — the controller issues mTLS certs and talks to +# workers over TCP on unprivileged ports. +CapabilityBoundingSet= +AmbientCapabilities= + +# Security Hardening +NoNewPrivileges=yes +ProtectSystem=full +ProtectHome=read-only +PrivateTmp=yes +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictSUIDSGID=yes +LockPersonality=yes +# Reads/writes the CA bundle and the master DB. +ReadWritePaths=/opt/decnet /var/log/decnet +ReadOnlyPaths=/etc/decnet + +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target diff --git a/deploy/decnet-updater.service b/deploy/decnet-updater.service new file mode 100644 index 0000000..6b3a457 --- /dev/null +++ b/deploy/decnet-updater.service @@ -0,0 +1,49 @@ +[Unit] +Description=DECNET Self-Updater (mTLS) +Documentation=https://github.com/4nt11/DECNET/wiki/Remote-Updates +After=network-online.target +Wants=network-online.target +# Deliberately NOT After=decnet-agent.service — the updater must come up even +# when the agent is broken, since that is exactly when it is most useful. + +[Service] +Type=simple +User=decnet +Group=decnet +WorkingDirectory=/opt/decnet +EnvironmentFile=-/opt/decnet/.env.local +ExecStart=/opt/decnet/venv/bin/decnet updater \ + --host 0.0.0.0 --port 8766 \ + --updater-dir /etc/decnet/updater \ + --install-dir /opt/decnet \ + --agent-dir /etc/decnet/agent + +# The updater SIGTERMs the agent and spawns a new one. Same User=decnet means +# signalling is allowed without CAP_KILL. It does not need NET_ADMIN/NET_RAW +# itself — the new agent process picks those up from decnet-agent.service when +# systemd restarts it (or from the agent's own unit's AmbientCapabilities when +# spawned by the updater as a direct child). +CapabilityBoundingSet= +AmbientCapabilities= + +# Security Hardening +NoNewPrivileges=yes +ProtectSystem=full +ProtectHome=read-only +PrivateTmp=yes +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictSUIDSGID=yes +LockPersonality=yes +# Writes release slots, pip installs into venv, manages agent.pid. +ReadWritePaths=/opt/decnet /var/log/decnet + +Restart=on-failure +RestartSec=5 +# Self-update replaces the process image via os.execv; the new binary answers +# /health within 30 s. Give it headroom before systemd's own termination. +TimeoutStopSec=30 + +[Install] +WantedBy=multi-user.target diff --git a/deploy/decnet-web.service b/deploy/decnet-web.service index d00d85b..e3b0e6d 100644 --- a/deploy/decnet-web.service +++ b/deploy/decnet-web.service @@ -1,27 +1,35 @@ [Unit] Description=DECNET Web Dashboard Service -After=network.target decnet-api.service +Documentation=https://github.com/4nt11/DECNET/wiki/Web-Dashboard +After=network-online.target decnet-api.service +Wants=network-online.target [Service] Type=simple User=decnet Group=decnet -WorkingDirectory=/path/to/DECNET -# Ensure environment is loaded from the .env file -EnvironmentFile=/path/to/DECNET/.env -# Use the virtualenv python to run the decnet web command -ExecStart=/path/to/DECNET/.venv/bin/decnet web +WorkingDirectory=/opt/decnet +EnvironmentFile=-/opt/decnet/.env.local +ExecStart=/opt/decnet/venv/bin/decnet web -# The Web Dashboard service does not require network administration privileges. -# Enable the following lines if you wish to bind the Dashboard to a privileged port (e.g., 80 or 443) -# while still running as a non-root user. +# Uncomment if you bind the dashboard to a privileged port (80/443): # CapabilityBoundingSet=CAP_NET_BIND_SERVICE # AmbientCapabilities=CAP_NET_BIND_SERVICE +CapabilityBoundingSet= +AmbientCapabilities= # Security Hardening NoNewPrivileges=yes ProtectSystem=full ProtectHome=read-only +PrivateTmp=yes +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictSUIDSGID=yes +LockPersonality=yes +ReadWritePaths=/opt/decnet /var/log/decnet +ReadOnlyPaths=/etc/decnet Restart=on-failure RestartSec=5 diff --git a/tests/updater/test_updater_executor.py b/tests/updater/test_updater_executor.py index 7eb350a..cfdaf0e 100644 --- a/tests/updater/test_updater_executor.py +++ b/tests/updater/test_updater_executor.py @@ -306,6 +306,7 @@ def test_stop_agent_falls_back_to_proc_scan_when_no_pidfile( killed.append((pid, sig)) raise ProcessLookupError # pretend it already died after SIGTERM + monkeypatch.setattr(ex, "_systemd_available", lambda: False) monkeypatch.setattr(ex, "_discover_agent_pids", lambda: [4242, 4243]) monkeypatch.setattr(ex.os, "kill", fake_kill) @@ -315,3 +316,76 @@ def test_stop_agent_falls_back_to_proc_scan_when_no_pidfile( import signal as _signal assert (4242, _signal.SIGTERM) in killed assert (4243, _signal.SIGTERM) in killed + + +def test_systemd_available_requires_invocation_id_and_systemctl( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Both INVOCATION_ID and a resolvable systemctl are needed.""" + monkeypatch.delenv("INVOCATION_ID", raising=False) + assert ex._systemd_available() is False + + monkeypatch.setenv("INVOCATION_ID", "abc") + monkeypatch.setattr("shutil.which", lambda _: None) + assert ex._systemd_available() is False + + monkeypatch.setattr("shutil.which", lambda _: "/usr/bin/systemctl") + assert ex._systemd_available() is True + + +def test_spawn_agent_dispatches_to_systemd_when_available( + monkeypatch: pytest.MonkeyPatch, + install_dir: pathlib.Path, +) -> None: + monkeypatch.setattr(ex, "_systemd_available", lambda: True) + called: list[pathlib.Path] = [] + monkeypatch.setattr(ex, "_spawn_agent_via_systemd", lambda d: called.append(d) or 999) + monkeypatch.setattr(ex, "_spawn_agent_via_popen", lambda d: pytest.fail("popen path taken")) + assert ex._spawn_agent(install_dir) == 999 + assert called == [install_dir] + + +def test_spawn_agent_dispatches_to_popen_when_not_systemd( + monkeypatch: pytest.MonkeyPatch, + install_dir: pathlib.Path, +) -> None: + monkeypatch.setattr(ex, "_systemd_available", lambda: False) + monkeypatch.setattr(ex, "_spawn_agent_via_systemd", lambda d: pytest.fail("systemd path taken")) + monkeypatch.setattr(ex, "_spawn_agent_via_popen", lambda d: 777) + assert ex._spawn_agent(install_dir) == 777 + + +def test_stop_agent_is_noop_under_systemd( + monkeypatch: pytest.MonkeyPatch, + install_dir: pathlib.Path, +) -> None: + """Under systemd, stop is skipped — systemctl restart handles it atomically.""" + monkeypatch.setattr(ex, "_systemd_available", lambda: True) + monkeypatch.setattr(ex, "_discover_agent_pids", lambda: pytest.fail("scanned /proc")) + monkeypatch.setattr(ex.os, "kill", lambda *a, **k: pytest.fail("sent signal")) + (install_dir / "agent.pid").write_text("12345") + ex._stop_agent(install_dir, grace=0.0) # must not raise + + +def test_spawn_agent_via_systemd_records_main_pid( + monkeypatch: pytest.MonkeyPatch, + install_dir: pathlib.Path, +) -> None: + calls: list[list[str]] = [] + + class _Out: + def __init__(self, stdout: str = "") -> None: + self.stdout = stdout + + def fake_run(cmd, **kwargs): # type: ignore[no-untyped-def] + calls.append(cmd) + if "show" in cmd: + return _Out("4711\n") + return _Out("") + + monkeypatch.setattr(ex.subprocess, "run", fake_run) + pid = ex._spawn_agent_via_systemd(install_dir) + assert pid == 4711 + assert (install_dir / "agent.pid").read_text() == "4711" + assert calls[0][:2] == ["systemctl", "restart"] + assert calls[1][:2] == ["systemctl", "show"]