feat(deploy): systemd units w/ capability-based hardening; updater restarts agent via systemctl
Add deploy/ unit files for every DECNET daemon (agent, updater, api, web, swarmctl, listener, forwarder). All run as User=decnet with NoNewPrivileges, ProtectSystem, PrivateTmp, LockPersonality; AmbientCapabilities=CAP_NET_ADMIN CAP_NET_RAW only on the agent (MACVLAN/scapy). Existing api/web units migrated to /opt/decnet layout and the same hardening stanza. Make the updater's _spawn_agent systemd-aware: under systemd (detected via INVOCATION_ID + systemctl on PATH), `systemctl restart decnet-agent.service` replaces the Popen path so the new agent inherits the unit's ambient caps instead of the updater's empty set. _stop_agent becomes a no-op in that mode to avoid racing systemctl's own stop phase. Tests cover the dispatcher branch selection, MainPID parsing, and the systemd no-op stop.
This commit is contained in:
@@ -208,11 +208,56 @@ def _run_pip(
|
||||
)
|
||||
|
||||
|
||||
def _spawn_agent(install_dir: pathlib.Path) -> int:
|
||||
"""Launch ``decnet agent --daemon`` using the current-symlinked venv.
|
||||
AGENT_SYSTEMD_UNIT = "decnet-agent.service"
|
||||
|
||||
Returns the new PID. Monkeypatched in tests.
|
||||
|
||||
def _systemd_available() -> bool:
|
||||
"""True when we're running under systemd and have systemctl on PATH.
|
||||
|
||||
Detection is conservative: we only return True if *both* the invocation
|
||||
marker is set (``INVOCATION_ID`` is exported by systemd for every unit)
|
||||
and ``systemctl`` is resolvable. The env var alone can be forged; the
|
||||
binary alone can exist on hosts running other init systems.
|
||||
"""
|
||||
if not os.environ.get("INVOCATION_ID"):
|
||||
return False
|
||||
from shutil import which
|
||||
return which("systemctl") is not None
|
||||
|
||||
|
||||
def _spawn_agent(install_dir: pathlib.Path) -> int:
|
||||
"""Launch the agent and return its PID.
|
||||
|
||||
Under systemd, restart ``decnet-agent.service`` via ``systemctl`` so the
|
||||
new process inherits the unit's ambient capabilities (CAP_NET_ADMIN,
|
||||
CAP_NET_RAW). Spawning with ``subprocess.Popen`` from inside the updater
|
||||
unit would make the agent a child of the updater and therefore a member
|
||||
of the updater's (empty) capability set — it would come up without the
|
||||
caps needed to run MACVLAN/scapy.
|
||||
|
||||
Off systemd (dev boxes, manual starts), fall back to a direct Popen.
|
||||
"""
|
||||
if _systemd_available():
|
||||
return _spawn_agent_via_systemd(install_dir)
|
||||
return _spawn_agent_via_popen(install_dir)
|
||||
|
||||
|
||||
def _spawn_agent_via_systemd(install_dir: pathlib.Path) -> int:
|
||||
subprocess.run( # nosec B603 B607
|
||||
["systemctl", "restart", AGENT_SYSTEMD_UNIT],
|
||||
check=True, capture_output=True, text=True,
|
||||
)
|
||||
pid_out = subprocess.run( # nosec B603 B607
|
||||
["systemctl", "show", "--property=MainPID", "--value", AGENT_SYSTEMD_UNIT],
|
||||
check=True, capture_output=True, text=True,
|
||||
)
|
||||
pid = int(pid_out.stdout.strip() or "0")
|
||||
if pid:
|
||||
_pid_file(install_dir).write_text(str(pid))
|
||||
return pid
|
||||
|
||||
|
||||
def _spawn_agent_via_popen(install_dir: pathlib.Path) -> int:
|
||||
decnet_bin = _shared_venv(install_dir) / "bin" / "decnet"
|
||||
log_path = install_dir / "agent.spawn.log"
|
||||
# cwd=install_dir so a persistent ``<install_dir>/.env.local`` gets
|
||||
@@ -267,7 +312,13 @@ def _stop_agent(install_dir: pathlib.Path, grace: float = AGENT_RESTART_GRACE_S)
|
||||
Prefers the PID recorded in ``agent.pid`` (processes we spawned) but
|
||||
falls back to scanning /proc for any ``decnet agent`` so manually-started
|
||||
agents are also restarted cleanly during an update.
|
||||
|
||||
Under systemd, stop is a no-op — ``_spawn_agent`` issues a single
|
||||
``systemctl restart`` that handles stop and start atomically. Pre-stopping
|
||||
would only race the restart's own stop phase.
|
||||
"""
|
||||
if _systemd_available():
|
||||
return
|
||||
pids: list[int] = []
|
||||
pid_file = _pid_file(install_dir)
|
||||
if pid_file.is_file():
|
||||
|
||||
41
deploy/decnet-agent.service
Normal file
41
deploy/decnet-agent.service
Normal file
@@ -0,0 +1,41 @@
|
||||
[Unit]
|
||||
Description=DECNET Worker Agent (mTLS)
|
||||
Documentation=https://github.com/4nt11/DECNET/wiki/SWARM-Mode
|
||||
After=network-online.target docker.service
|
||||
Wants=network-online.target
|
||||
Requires=docker.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=decnet
|
||||
Group=decnet
|
||||
# docker.sock is group-readable by 'docker'; the agent needs it for compose.
|
||||
SupplementaryGroups=docker
|
||||
WorkingDirectory=/opt/decnet
|
||||
EnvironmentFile=-/opt/decnet/.env.local
|
||||
ExecStart=/opt/decnet/venv/bin/decnet agent --host 0.0.0.0 --port 8765 --agent-dir /etc/decnet/agent
|
||||
|
||||
# MACVLAN/IPVLAN management + scapy raw sockets. Granted via ambient caps so
|
||||
# the process starts unprivileged and keeps only these two bits.
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_RAW
|
||||
AmbientCapabilities=CAP_NET_ADMIN CAP_NET_RAW
|
||||
|
||||
# Security Hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=full
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
LockPersonality=yes
|
||||
# /opt/decnet holds release slots + state; the agent reads them and writes its PID.
|
||||
ReadWritePaths=/opt/decnet /var/log/decnet
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
TimeoutStopSec=15
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,19 +1,21 @@
|
||||
[Unit]
|
||||
Description=DECNET API Service
|
||||
After=network.target docker.service
|
||||
Documentation=https://github.com/4nt11/DECNET/wiki/REST-API-Reference
|
||||
After=network-online.target docker.service
|
||||
Wants=network-online.target
|
||||
Requires=docker.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=decnet
|
||||
Group=decnet
|
||||
WorkingDirectory=/path/to/DECNET
|
||||
# Ensure environment is loaded from the .env file
|
||||
EnvironmentFile=/path/to/DECNET/.env
|
||||
# Use the virtualenv python to run the decnet api command
|
||||
ExecStart=/path/to/DECNET/.venv/bin/decnet api
|
||||
# docker.sock is group-readable by 'docker'; the API ingester tails container logs.
|
||||
SupplementaryGroups=docker
|
||||
WorkingDirectory=/opt/decnet
|
||||
EnvironmentFile=-/opt/decnet/.env.local
|
||||
ExecStart=/opt/decnet/venv/bin/decnet api
|
||||
|
||||
# Capabilities required to manage MACVLAN interfaces and network links without root
|
||||
# MACVLAN/IPVLAN setup runs from the API lifespan when the embedded sniffer is on.
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_RAW
|
||||
AmbientCapabilities=CAP_NET_ADMIN CAP_NET_RAW
|
||||
|
||||
@@ -21,9 +23,17 @@ AmbientCapabilities=CAP_NET_ADMIN CAP_NET_RAW
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=full
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
LockPersonality=yes
|
||||
ReadWritePaths=/opt/decnet /var/log/decnet
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
TimeoutStopSec=15
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
46
deploy/decnet-forwarder.service
Normal file
46
deploy/decnet-forwarder.service
Normal file
@@ -0,0 +1,46 @@
|
||||
[Unit]
|
||||
Description=DECNET Syslog-over-TLS Forwarder (worker, RFC 5425)
|
||||
Documentation=https://github.com/4nt11/DECNET/wiki/Logging-and-Syslog
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
# The forwarder can run independently of the agent — it only needs the local
|
||||
# log file to exist and the master to be reachable.
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=decnet
|
||||
Group=decnet
|
||||
WorkingDirectory=/opt/decnet
|
||||
EnvironmentFile=-/opt/decnet/.env.local
|
||||
# Replace <master-host> with the master's LAN address or hostname. The agent
|
||||
# cert bundle at /etc/decnet/agent is reused — the forwarder presents the same
|
||||
# worker identity when it connects to the master's listener.
|
||||
ExecStart=/opt/decnet/venv/bin/decnet forwarder \
|
||||
--log-file /var/log/decnet/decnet.log \
|
||||
--master-host ${DECNET_SWARM_MASTER_HOST} \
|
||||
--master-port 6514 \
|
||||
--agent-dir /etc/decnet/agent
|
||||
|
||||
# TLS client connection; no special capabilities.
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
# Security Hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=full
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
LockPersonality=yes
|
||||
# Reads the tailed log; writes a small byte-offset state file alongside it.
|
||||
ReadWritePaths=/var/log/decnet
|
||||
ReadOnlyPaths=/etc/decnet
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
43
deploy/decnet-listener.service
Normal file
43
deploy/decnet-listener.service
Normal file
@@ -0,0 +1,43 @@
|
||||
[Unit]
|
||||
Description=DECNET Syslog-over-TLS Listener (master, RFC 5425)
|
||||
Documentation=https://github.com/4nt11/DECNET/wiki/Logging-and-Syslog
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=decnet
|
||||
Group=decnet
|
||||
WorkingDirectory=/opt/decnet
|
||||
EnvironmentFile=-/opt/decnet/.env.local
|
||||
# Binds 0.0.0.0:6514 so workers across the LAN can connect. 6514 is not a
|
||||
# privileged port (≥1024), so no CAP_NET_BIND_SERVICE is required.
|
||||
ExecStart=/opt/decnet/venv/bin/decnet listener \
|
||||
--host 0.0.0.0 --port 6514 \
|
||||
--ca-dir /etc/decnet/ca \
|
||||
--log-path /var/log/decnet/master.log \
|
||||
--json-path /var/log/decnet/master.json
|
||||
|
||||
# Pure TLS server; no privileged network operations.
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
# Security Hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=full
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
LockPersonality=yes
|
||||
# Writes forensic .log + parsed .json sinks; CA bundle is read-only.
|
||||
ReadWritePaths=/var/log/decnet
|
||||
ReadOnlyPaths=/etc/decnet
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
40
deploy/decnet-swarmctl.service
Normal file
40
deploy/decnet-swarmctl.service
Normal file
@@ -0,0 +1,40 @@
|
||||
[Unit]
|
||||
Description=DECNET Swarm Controller (master)
|
||||
Documentation=https://github.com/4nt11/DECNET/wiki/SWARM-Mode
|
||||
After=network-online.target decnet-api.service
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=decnet
|
||||
Group=decnet
|
||||
WorkingDirectory=/opt/decnet
|
||||
EnvironmentFile=-/opt/decnet/.env.local
|
||||
# Default bind is loopback — the controller is a master-local orchestrator
|
||||
# reached by the CLI and the web dashboard, not by workers.
|
||||
ExecStart=/opt/decnet/venv/bin/decnet swarmctl --host 127.0.0.1 --port 8770
|
||||
|
||||
# No special capabilities — the controller issues mTLS certs and talks to
|
||||
# workers over TCP on unprivileged ports.
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
# Security Hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=full
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
LockPersonality=yes
|
||||
# Reads/writes the CA bundle and the master DB.
|
||||
ReadWritePaths=/opt/decnet /var/log/decnet
|
||||
ReadOnlyPaths=/etc/decnet
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
49
deploy/decnet-updater.service
Normal file
49
deploy/decnet-updater.service
Normal file
@@ -0,0 +1,49 @@
|
||||
[Unit]
|
||||
Description=DECNET Self-Updater (mTLS)
|
||||
Documentation=https://github.com/4nt11/DECNET/wiki/Remote-Updates
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
# Deliberately NOT After=decnet-agent.service — the updater must come up even
|
||||
# when the agent is broken, since that is exactly when it is most useful.
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=decnet
|
||||
Group=decnet
|
||||
WorkingDirectory=/opt/decnet
|
||||
EnvironmentFile=-/opt/decnet/.env.local
|
||||
ExecStart=/opt/decnet/venv/bin/decnet updater \
|
||||
--host 0.0.0.0 --port 8766 \
|
||||
--updater-dir /etc/decnet/updater \
|
||||
--install-dir /opt/decnet \
|
||||
--agent-dir /etc/decnet/agent
|
||||
|
||||
# The updater SIGTERMs the agent and spawns a new one. Same User=decnet means
|
||||
# signalling is allowed without CAP_KILL. It does not need NET_ADMIN/NET_RAW
|
||||
# itself — the new agent process picks those up from decnet-agent.service when
|
||||
# systemd restarts it (or from the agent's own unit's AmbientCapabilities when
|
||||
# spawned by the updater as a direct child).
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
# Security Hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=full
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
LockPersonality=yes
|
||||
# Writes release slots, pip installs into venv, manages agent.pid.
|
||||
ReadWritePaths=/opt/decnet /var/log/decnet
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
# Self-update replaces the process image via os.execv; the new binary answers
|
||||
# /health within 30 s. Give it headroom before systemd's own termination.
|
||||
TimeoutStopSec=30
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,27 +1,35 @@
|
||||
[Unit]
|
||||
Description=DECNET Web Dashboard Service
|
||||
After=network.target decnet-api.service
|
||||
Documentation=https://github.com/4nt11/DECNET/wiki/Web-Dashboard
|
||||
After=network-online.target decnet-api.service
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=decnet
|
||||
Group=decnet
|
||||
WorkingDirectory=/path/to/DECNET
|
||||
# Ensure environment is loaded from the .env file
|
||||
EnvironmentFile=/path/to/DECNET/.env
|
||||
# Use the virtualenv python to run the decnet web command
|
||||
ExecStart=/path/to/DECNET/.venv/bin/decnet web
|
||||
WorkingDirectory=/opt/decnet
|
||||
EnvironmentFile=-/opt/decnet/.env.local
|
||||
ExecStart=/opt/decnet/venv/bin/decnet web
|
||||
|
||||
# The Web Dashboard service does not require network administration privileges.
|
||||
# Enable the following lines if you wish to bind the Dashboard to a privileged port (e.g., 80 or 443)
|
||||
# while still running as a non-root user.
|
||||
# Uncomment if you bind the dashboard to a privileged port (80/443):
|
||||
# CapabilityBoundingSet=CAP_NET_BIND_SERVICE
|
||||
# AmbientCapabilities=CAP_NET_BIND_SERVICE
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
# Security Hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=full
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
LockPersonality=yes
|
||||
ReadWritePaths=/opt/decnet /var/log/decnet
|
||||
ReadOnlyPaths=/etc/decnet
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
@@ -306,6 +306,7 @@ def test_stop_agent_falls_back_to_proc_scan_when_no_pidfile(
|
||||
killed.append((pid, sig))
|
||||
raise ProcessLookupError # pretend it already died after SIGTERM
|
||||
|
||||
monkeypatch.setattr(ex, "_systemd_available", lambda: False)
|
||||
monkeypatch.setattr(ex, "_discover_agent_pids", lambda: [4242, 4243])
|
||||
monkeypatch.setattr(ex.os, "kill", fake_kill)
|
||||
|
||||
@@ -315,3 +316,76 @@ def test_stop_agent_falls_back_to_proc_scan_when_no_pidfile(
|
||||
import signal as _signal
|
||||
assert (4242, _signal.SIGTERM) in killed
|
||||
assert (4243, _signal.SIGTERM) in killed
|
||||
|
||||
|
||||
def test_systemd_available_requires_invocation_id_and_systemctl(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Both INVOCATION_ID and a resolvable systemctl are needed."""
|
||||
monkeypatch.delenv("INVOCATION_ID", raising=False)
|
||||
assert ex._systemd_available() is False
|
||||
|
||||
monkeypatch.setenv("INVOCATION_ID", "abc")
|
||||
monkeypatch.setattr("shutil.which", lambda _: None)
|
||||
assert ex._systemd_available() is False
|
||||
|
||||
monkeypatch.setattr("shutil.which", lambda _: "/usr/bin/systemctl")
|
||||
assert ex._systemd_available() is True
|
||||
|
||||
|
||||
def test_spawn_agent_dispatches_to_systemd_when_available(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
install_dir: pathlib.Path,
|
||||
) -> None:
|
||||
monkeypatch.setattr(ex, "_systemd_available", lambda: True)
|
||||
called: list[pathlib.Path] = []
|
||||
monkeypatch.setattr(ex, "_spawn_agent_via_systemd", lambda d: called.append(d) or 999)
|
||||
monkeypatch.setattr(ex, "_spawn_agent_via_popen", lambda d: pytest.fail("popen path taken"))
|
||||
assert ex._spawn_agent(install_dir) == 999
|
||||
assert called == [install_dir]
|
||||
|
||||
|
||||
def test_spawn_agent_dispatches_to_popen_when_not_systemd(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
install_dir: pathlib.Path,
|
||||
) -> None:
|
||||
monkeypatch.setattr(ex, "_systemd_available", lambda: False)
|
||||
monkeypatch.setattr(ex, "_spawn_agent_via_systemd", lambda d: pytest.fail("systemd path taken"))
|
||||
monkeypatch.setattr(ex, "_spawn_agent_via_popen", lambda d: 777)
|
||||
assert ex._spawn_agent(install_dir) == 777
|
||||
|
||||
|
||||
def test_stop_agent_is_noop_under_systemd(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
install_dir: pathlib.Path,
|
||||
) -> None:
|
||||
"""Under systemd, stop is skipped — systemctl restart handles it atomically."""
|
||||
monkeypatch.setattr(ex, "_systemd_available", lambda: True)
|
||||
monkeypatch.setattr(ex, "_discover_agent_pids", lambda: pytest.fail("scanned /proc"))
|
||||
monkeypatch.setattr(ex.os, "kill", lambda *a, **k: pytest.fail("sent signal"))
|
||||
(install_dir / "agent.pid").write_text("12345")
|
||||
ex._stop_agent(install_dir, grace=0.0) # must not raise
|
||||
|
||||
|
||||
def test_spawn_agent_via_systemd_records_main_pid(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
install_dir: pathlib.Path,
|
||||
) -> None:
|
||||
calls: list[list[str]] = []
|
||||
|
||||
class _Out:
|
||||
def __init__(self, stdout: str = "") -> None:
|
||||
self.stdout = stdout
|
||||
|
||||
def fake_run(cmd, **kwargs): # type: ignore[no-untyped-def]
|
||||
calls.append(cmd)
|
||||
if "show" in cmd:
|
||||
return _Out("4711\n")
|
||||
return _Out("")
|
||||
|
||||
monkeypatch.setattr(ex.subprocess, "run", fake_run)
|
||||
pid = ex._spawn_agent_via_systemd(install_dir)
|
||||
assert pid == 4711
|
||||
assert (install_dir / "agent.pid").read_text() == "4711"
|
||||
assert calls[0][:2] == ["systemctl", "restart"]
|
||||
assert calls[1][:2] == ["systemctl", "show"]
|
||||
|
||||
Reference in New Issue
Block a user