fix(updater): fall back to /proc scan when agent.pid is missing

If the agent was started outside the updater (manually, during dev,
or from a prior systemd unit), there is no agent.pid for _stop_agent
to target, so a successful code install leaves the old in-memory
agent process still serving requests. Scan /proc for any decnet agent
command and SIGTERM all matches so restart is reliable regardless of
how the agent was originally launched.
This commit is contained in:
2026-04-18 23:42:03 -04:00
parent 7765b36c50
commit ebeaf08a49
5 changed files with 140 additions and 34 deletions

View File

@@ -293,3 +293,25 @@ def test_update_self_pip_failure_leaves_active_intact(
ex.run_update_self(tb, sha="U", updater_install_dir=install_dir, exec_cb=lambda a: None)
assert (install_dir / "releases" / "active" / "marker").read_text() == "old-updater"
assert not (install_dir / "releases" / "active.new").exists()
def test_stop_agent_falls_back_to_proc_scan_when_no_pidfile(
monkeypatch: pytest.MonkeyPatch,
install_dir: pathlib.Path,
) -> None:
"""No agent.pid → _stop_agent still terminates agents found via /proc."""
killed: list[tuple[int, int]] = []
def fake_kill(pid: int, sig: int) -> None:
killed.append((pid, sig))
raise ProcessLookupError # pretend it already died after SIGTERM
monkeypatch.setattr(ex, "_discover_agent_pids", lambda: [4242, 4243])
monkeypatch.setattr(ex.os, "kill", fake_kill)
assert not (install_dir / "agent.pid").exists()
ex._stop_agent(install_dir, grace=0.0)
import signal as _signal
assert (4242, _signal.SIGTERM) in killed
assert (4243, _signal.SIGTERM) in killed