feat(updater): sync systemd unit files and daemon-reload on update

The bootstrap installer copies etc/systemd/system/*.service into
/etc/systemd/system at enrollment time, but the updater was skipping
that step — a code push could not ship a new unit (e.g. the four
per-host microservices added this session) or change ExecStart on an
existing one. systemctl alone doesn't re-read unit files; daemon-reload
is required.

run_update / run_update_self now call _sync_systemd_units after
rotation: diff each .service file against the live copy, atomically
replace changed ones, then issue a single `systemctl daemon-reload`.
No-op on legacy tarballs that don't ship etc/systemd/system/.
This commit is contained in:
2026-04-19 19:07:24 -04:00
parent 6d7877c679
commit d2cf1e8b3a
2 changed files with 121 additions and 0 deletions

View File

@@ -278,6 +278,54 @@ def _spawn_agent(install_dir: pathlib.Path) -> int:
return _spawn_agent_via_popen(install_dir)
SYSTEMD_UNIT_DIR = pathlib.Path("/etc/systemd/system")
def _sync_systemd_units(
install_dir: pathlib.Path,
dst_root: pathlib.Path = SYSTEMD_UNIT_DIR,
) -> bool:
"""Copy any `etc/systemd/system/*.service` files from the active release
into ``dst_root`` (default ``/etc/systemd/system/``) and run
`daemon-reload` if anything changed.
Returns True if daemon-reload was invoked. The bootstrap installer writes
these files on first enrollment; the updater mirrors that on every code
push so unit edits (ExecStart flips, new units, cap changes) ship too.
Best-effort: a read-only /etc or a missing ``active/etc`` subtree is just
logged and skipped.
"""
src_root = _active_dir(install_dir) / "etc" / "systemd" / "system"
if not src_root.is_dir():
return False
changed = False
for src in sorted(src_root.glob("*.service")):
dst = dst_root / src.name
try:
new = src.read_bytes()
old = dst.read_bytes() if dst.is_file() else None
if old == new:
continue
tmp = dst.with_suffix(".service.tmp")
tmp.write_bytes(new)
os.chmod(tmp, 0o644)
os.replace(tmp, dst)
log.info("installed/updated systemd unit %s", dst)
changed = True
except OSError as exc:
log.warning("could not install unit %s: %s", dst, exc)
if changed and _systemd_available():
try:
subprocess.run( # nosec B603 B607
["systemctl", "daemon-reload"],
check=True, capture_output=True, text=True,
)
log.info("systemctl daemon-reload succeeded")
except subprocess.CalledProcessError as exc:
log.warning("systemctl daemon-reload failed: %s", exc.stderr.strip())
return changed
def _spawn_agent_via_systemd(install_dir: pathlib.Path) -> int:
# Restart agent + forwarder together: both processes run out of the same
# /opt/decnet tree, so a code push that replaces the tree must cycle both
@@ -509,6 +557,7 @@ def run_update(
_rotate(install_dir)
_point_current_at(install_dir, _active_dir(install_dir))
_heal_path_symlink(install_dir)
_sync_systemd_units(install_dir)
log.info("restarting agent (and forwarder if present)")
_stop_agent(install_dir)
@@ -606,6 +655,7 @@ def run_update_self(
_rotate(updater_install_dir)
_point_current_at(updater_install_dir, _active_dir(updater_install_dir))
_heal_path_symlink(updater_install_dir)
_sync_systemd_units(updater_install_dir)
# Reconstruct the updater's original launch command from env vars set by
# `decnet.updater.server.run`. We can't reuse sys.argv: inside the app

View File

@@ -454,3 +454,74 @@ def test_spawn_agent_via_systemd_tolerates_missing_forwarder_unit(
monkeypatch.setattr(ex.subprocess, "run", fake_run)
pid = ex._spawn_agent_via_systemd(install_dir)
assert pid == 4711
# ---------------------------------------------------------- _sync_systemd_units
def _make_release_with_units(install_dir: pathlib.Path, units: dict[str, str]) -> None:
src = install_dir / "releases" / "active" / "etc" / "systemd" / "system"
src.mkdir(parents=True)
for name, body in units.items():
(src / name).write_text(body)
def test_sync_systemd_units_copies_new_files_and_reloads(
monkeypatch: pytest.MonkeyPatch,
install_dir: pathlib.Path,
tmp_path: pathlib.Path,
) -> None:
"""Shipping a new unit or changing an existing one triggers a single
daemon-reload after the file writes."""
_make_release_with_units(install_dir, {
"decnet-collector.service": "unit-body-v1\n",
"decnet-agent.service": "unit-body-agent\n",
})
dst_root = tmp_path / "etc-systemd"
dst_root.mkdir()
(dst_root / "decnet-agent.service").write_text("unit-body-agent-OLD\n")
calls: list[list[str]] = []
def fake_run(cmd, **kwargs): # type: ignore[no-untyped-def]
calls.append(cmd)
return subprocess.CompletedProcess(cmd, 0, "", "")
monkeypatch.setenv("INVOCATION_ID", "x")
monkeypatch.setattr(ex.subprocess, "run", fake_run)
changed = ex._sync_systemd_units(install_dir, dst_root=dst_root)
assert changed is True
assert (dst_root / "decnet-collector.service").read_text() == "unit-body-v1\n"
assert (dst_root / "decnet-agent.service").read_text() == "unit-body-agent\n"
assert calls == [["systemctl", "daemon-reload"]]
def test_sync_systemd_units_noop_when_unchanged(
monkeypatch: pytest.MonkeyPatch,
install_dir: pathlib.Path,
tmp_path: pathlib.Path,
) -> None:
_make_release_with_units(install_dir, {"decnet-agent.service": "same\n"})
dst_root = tmp_path / "etc-systemd"
dst_root.mkdir()
(dst_root / "decnet-agent.service").write_text("same\n")
calls: list[list[str]] = []
monkeypatch.setenv("INVOCATION_ID", "x")
monkeypatch.setattr(
ex.subprocess, "run",
lambda cmd, **_: calls.append(cmd) or subprocess.CompletedProcess(cmd, 0, "", ""),
)
changed = ex._sync_systemd_units(install_dir, dst_root=dst_root)
assert changed is False
assert calls == [] # no daemon-reload when nothing changed
def test_sync_systemd_units_missing_src_is_noop(
install_dir: pathlib.Path,
tmp_path: pathlib.Path,
) -> None:
"""Legacy bundles without etc/systemd/system in the release: no-op."""
(install_dir / "releases" / "active").mkdir(parents=True)
assert ex._sync_systemd_units(install_dir, dst_root=tmp_path) is False