fix(agent): escape systemd cgroup when spawning self-destruct reaper
The reaper was being SIGTERM'd mid-rm because `start_new_session=True` only forks a new POSIX session — it does not escape decnet-agent.service's cgroup. When the reaper ran `systemctl stop decnet-agent`, systemd tore down the whole cgroup (reaper included) before `rm -rf /opt/decnet*` finished, leaving the install on disk. Spawn the reaper via `systemd-run --collect --unit decnet-reaper-<pid>` so it runs in a fresh transient scope, outside the agent unit. Falls back to bare Popen for non-systemd hosts.
This commit is contained in:
@@ -152,6 +152,7 @@ async def self_destruct() -> None:
|
|||||||
install footprint. Returns immediately so the HTTP response can drain
|
install footprint. Returns immediately so the HTTP response can drain
|
||||||
before the reaper starts deleting files out from under the agent."""
|
before the reaper starts deleting files out from under the agent."""
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
import subprocess # nosec B404
|
import subprocess # nosec B404
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
@@ -172,17 +173,39 @@ async def self_destruct() -> None:
|
|||||||
os.close(fd)
|
os.close(fd)
|
||||||
os.chmod(path, 0o700) # nosec B103 — root-owned reaper, needs exec
|
os.chmod(path, 0o700) # nosec B103 — root-owned reaper, needs exec
|
||||||
|
|
||||||
# start_new_session detaches from the agent process group so the
|
# The reaper MUST run outside decnet-agent.service's cgroup — otherwise
|
||||||
# reaper isn't killed when systemctl stop decnet-agent fires.
|
# `systemctl stop decnet-agent` SIGTERMs the whole cgroup (reaper included)
|
||||||
|
# before rm -rf completes. `start_new_session=True` gets us a fresh POSIX
|
||||||
|
# session but does NOT escape the systemd cgroup. So we prefer
|
||||||
|
# `systemd-run --scope` (launches the command in a transient scope
|
||||||
|
# detached from the caller's service), falling back to a bare Popen if
|
||||||
|
# systemd-run is unavailable (non-systemd host / container).
|
||||||
|
systemd_run = shutil.which("systemd-run")
|
||||||
|
if systemd_run:
|
||||||
|
argv = [
|
||||||
|
systemd_run,
|
||||||
|
"--collect",
|
||||||
|
"--unit", f"decnet-reaper-{os.getpid()}",
|
||||||
|
"--description", "DECNET agent self-destruct reaper",
|
||||||
|
"/bin/bash", path,
|
||||||
|
]
|
||||||
|
spawn_kwargs = {"start_new_session": True}
|
||||||
|
else:
|
||||||
|
argv = ["/bin/bash", path]
|
||||||
|
spawn_kwargs = {"start_new_session": True}
|
||||||
|
|
||||||
subprocess.Popen( # nosec B603
|
subprocess.Popen( # nosec B603
|
||||||
["/bin/bash", path],
|
argv,
|
||||||
stdin=subprocess.DEVNULL,
|
stdin=subprocess.DEVNULL,
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
stderr=subprocess.DEVNULL,
|
stderr=subprocess.DEVNULL,
|
||||||
start_new_session=True,
|
|
||||||
close_fds=True,
|
close_fds=True,
|
||||||
|
**spawn_kwargs,
|
||||||
|
)
|
||||||
|
log.warning(
|
||||||
|
"self_destruct: reaper spawned path=%s via=%s — agent will die in ~3s",
|
||||||
|
path, "systemd-run" if systemd_run else "popen",
|
||||||
)
|
)
|
||||||
log.warning("self_destruct: reaper spawned path=%s — agent will die in ~3s", path)
|
|
||||||
|
|
||||||
|
|
||||||
async def status() -> dict[str, Any]:
|
async def status() -> dict[str, Any]:
|
||||||
|
|||||||
@@ -71,8 +71,12 @@ def test_self_destruct_spawns_reaper_and_returns_fast(monkeypatch, tmp_path) ->
|
|||||||
assert resp.json()["status"] == "self_destruct_scheduled"
|
assert resp.json()["status"] == "self_destruct_scheduled"
|
||||||
assert len(spawned) == 1
|
assert len(spawned) == 1
|
||||||
assert spawned[0]["kw"].get("start_new_session") is True
|
assert spawned[0]["kw"].get("start_new_session") is True
|
||||||
script_path = spawned[0]["args"][1]
|
script_candidates = [
|
||||||
assert script_path.startswith("/tmp/decnet-reaper-")
|
a for a in spawned[0]["args"]
|
||||||
|
if isinstance(a, str) and a.startswith("/tmp/decnet-reaper-")
|
||||||
|
]
|
||||||
|
assert len(script_candidates) == 1, spawned[0]["args"]
|
||||||
|
script_path = script_candidates[0]
|
||||||
# Reaper content sanity check — covers the paths the operator asked for.
|
# Reaper content sanity check — covers the paths the operator asked for.
|
||||||
import pathlib
|
import pathlib
|
||||||
body = pathlib.Path(script_path).read_text()
|
body = pathlib.Path(script_path).read_text()
|
||||||
|
|||||||
Reference in New Issue
Block a user