fix(updater): align bootstrap layout with updater; log update phases

The bootstrap was installing into /opt/decnet/.venv with an editable
`pip install -e .`, and /usr/local/bin/decnet pointed there. The updater
writes releases to /opt/decnet/releases/active/ with a shared venv at
/opt/decnet/venv — a parallel tree nothing on the box actually runs.
Result: updates appeared to succeed (release dir rotated, SHA changed)
but systemd kept executing the untouched bootstrap code.

Changes:
  - Bootstrap now installs directly into /opt/decnet/releases/active
    with the shared venv at /opt/decnet/venv and /opt/decnet/current
    symlinked. Same layout the updater rotates in and out of.
  - /usr/local/bin/decnet -> /opt/decnet/venv/bin/decnet.
  - run_update / run_update_self heal /usr/local/bin/decnet on every
    push so already-enrolled hosts recover on the next update instead
    of needing a re-enroll.
  - run_update / run_update_self now log each phase (receive, extract,
    pip install, rotate, restart, probe) so the updater log actually
    shows what happened.
This commit is contained in:
2026-04-19 18:39:11 -04:00
parent f91ba9a16e
commit dad29249de
2 changed files with 55 additions and 9 deletions

View File

@@ -111,6 +111,32 @@ def _venv_python(release: pathlib.Path) -> pathlib.Path:
return release / ".venv" / "bin" / "python" return release / ".venv" / "bin" / "python"
def _heal_path_symlink(install_dir: pathlib.Path) -> None:
"""Point /usr/local/bin/decnet at the shared venv we manage.
Pre-fix bootstraps installed into ``<install_dir>/.venv`` (editable) and
symlinked /usr/local/bin/decnet there, so systemd units kept executing
the pre-update code even after ``_run_pip`` wrote to the shared venv.
Fix it opportunistically on every update so already-enrolled hosts
recover on the next push instead of needing a manual re-enroll.
"""
target = _shared_venv(install_dir) / "bin" / "decnet"
link = pathlib.Path("/usr/local/bin/decnet")
if not target.is_file():
return
try:
if link.is_symlink() and pathlib.Path(os.readlink(link)) == target:
return
tmp = link.with_suffix(".tmp")
if tmp.exists() or tmp.is_symlink():
tmp.unlink()
tmp.symlink_to(target)
os.replace(tmp, link)
log.info("repointed %s -> %s", link, target)
except OSError as exc:
log.warning("could not repoint %s: %s", link, exc)
def _shared_venv(install_dir: pathlib.Path) -> pathlib.Path: def _shared_venv(install_dir: pathlib.Path) -> pathlib.Path:
"""The one stable venv that agents/updaters run out of. """The one stable venv that agents/updaters run out of.
@@ -447,27 +473,35 @@ def run_update(
agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR, agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR,
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Apply an update atomically. Rolls back on probe failure.""" """Apply an update atomically. Rolls back on probe failure."""
log.info("update received sha=%s bytes=%d install_dir=%s", sha, len(tarball_bytes), install_dir)
clean_stale_staging(install_dir) clean_stale_staging(install_dir)
staging = _staging_dir(install_dir) staging = _staging_dir(install_dir)
log.info("extracting tarball -> %s", staging)
extract_tarball(tarball_bytes, staging) extract_tarball(tarball_bytes, staging)
_write_manifest(staging, sha) _write_manifest(staging, sha)
log.info("pip install into shared venv (%s)", _shared_venv(install_dir))
pip = _run_pip(staging) pip = _run_pip(staging)
if pip.returncode != 0: if pip.returncode != 0:
log.error("pip install failed rc=%d stderr=%s", pip.returncode, (pip.stderr or pip.stdout).strip()[:400])
shutil.rmtree(staging, ignore_errors=True) shutil.rmtree(staging, ignore_errors=True)
raise UpdateError( raise UpdateError(
"pip install failed on new release", stderr=pip.stderr or pip.stdout, "pip install failed on new release", stderr=pip.stderr or pip.stdout,
) )
log.info("rotating releases: active.new -> active, active -> prev")
_rotate(install_dir) _rotate(install_dir)
_point_current_at(install_dir, _active_dir(install_dir)) _point_current_at(install_dir, _active_dir(install_dir))
_heal_path_symlink(install_dir)
log.info("restarting agent (and forwarder if present)")
_stop_agent(install_dir) _stop_agent(install_dir)
_spawn_agent(install_dir) _spawn_agent(install_dir)
ok, detail = _probe_agent(agent_dir=agent_dir) ok, detail = _probe_agent(agent_dir=agent_dir)
if ok: if ok:
log.info("update complete sha=%s probe=ok", sha)
return { return {
"status": "updated", "status": "updated",
"release": read_release(_active_dir(install_dir)).to_dict(), "release": read_release(_active_dir(install_dir)).to_dict(),
@@ -536,21 +570,27 @@ def run_update_self(
No auto-rollback. Caller must treat "connection dropped + /health No auto-rollback. Caller must treat "connection dropped + /health
returns new SHA within 30s" as success. returns new SHA within 30s" as success.
""" """
log.info("self-update received sha=%s bytes=%d install_dir=%s", sha, len(tarball_bytes), updater_install_dir)
clean_stale_staging(updater_install_dir) clean_stale_staging(updater_install_dir)
staging = _staging_dir(updater_install_dir) staging = _staging_dir(updater_install_dir)
log.info("extracting tarball -> %s", staging)
extract_tarball(tarball_bytes, staging) extract_tarball(tarball_bytes, staging)
_write_manifest(staging, sha) _write_manifest(staging, sha)
log.info("pip install updater release into shared venv (%s)", _shared_venv(updater_install_dir))
pip = _run_pip(staging) pip = _run_pip(staging)
if pip.returncode != 0: if pip.returncode != 0:
log.error("self-update pip install failed rc=%d stderr=%s", pip.returncode, (pip.stderr or pip.stdout).strip()[:400])
shutil.rmtree(staging, ignore_errors=True) shutil.rmtree(staging, ignore_errors=True)
raise UpdateError( raise UpdateError(
"pip install failed on new updater release", "pip install failed on new updater release",
stderr=pip.stderr or pip.stdout, stderr=pip.stderr or pip.stdout,
) )
log.info("rotating updater releases and flipping current symlink")
_rotate(updater_install_dir) _rotate(updater_install_dir)
_point_current_at(updater_install_dir, _active_dir(updater_install_dir)) _point_current_at(updater_install_dir, _active_dir(updater_install_dir))
_heal_path_symlink(updater_install_dir)
# Reconstruct the updater's original launch command from env vars set by # Reconstruct the updater's original launch command from env vars set by
# `decnet.updater.server.run`. We can't reuse sys.argv: inside the app # `decnet.updater.server.run`. We can't reuse sys.argv: inside the app
@@ -576,6 +616,7 @@ def run_update_self(
# on our own unit would kill us mid-response and the caller would see a # on our own unit would kill us mid-response and the caller would see a
# connection drop with no indication of success. # connection drop with no indication of success.
if _systemd_available(): if _systemd_available():
log.info("self-update queued: systemctl restart %s (deferred 1s)", UPDATER_SYSTEMD_UNIT)
subprocess.Popen( # nosec B603 B607 subprocess.Popen( # nosec B603 B607
["sh", "-c", f"sleep 1 && systemctl restart {UPDATER_SYSTEMD_UNIT}"], ["sh", "-c", f"sleep 1 && systemctl restart {UPDATER_SYSTEMD_UNIT}"],
start_new_session=True, start_new_session=True,

View File

@@ -16,14 +16,19 @@ echo "[DECNET] fetching payload..."
curl -fsSL "{{ tarball_url }}" | tar -xz -C "$WORK" curl -fsSL "{{ tarball_url }}" | tar -xz -C "$WORK"
INSTALL_DIR=/opt/decnet INSTALL_DIR=/opt/decnet
mkdir -p "$INSTALL_DIR" RELEASE_DIR="$INSTALL_DIR/releases/active"
cp -a "$WORK/." "$INSTALL_DIR/" VENV_DIR="$INSTALL_DIR/venv"
cd "$INSTALL_DIR" # Mirror the updater's layout from day one so `decnet updater` can rotate
# releases/active in-place and the shared venv is the thing on PATH.
mkdir -p "$RELEASE_DIR"
cp -a "$WORK/." "$RELEASE_DIR/"
ln -sfn "$RELEASE_DIR" "$INSTALL_DIR/current"
cd "$RELEASE_DIR"
echo "[DECNET] building venv..." echo "[DECNET] building shared venv at $VENV_DIR..."
python3 -m venv .venv python3 -m venv "$VENV_DIR"
.venv/bin/pip install -q --upgrade pip "$VENV_DIR/bin/pip" install -q --upgrade pip
.venv/bin/pip install -q -e . "$VENV_DIR/bin/pip" install -q "$RELEASE_DIR"
install -Dm0644 etc/decnet/decnet.ini /etc/decnet/decnet.ini install -Dm0644 etc/decnet/decnet.ini /etc/decnet/decnet.ini
[[ -f services.ini ]] && install -Dm0644 services.ini /etc/decnet/services.ini [[ -f services.ini ]] && install -Dm0644 services.ini /etc/decnet/services.ini
@@ -51,8 +56,8 @@ fi
# Guarantee the pip-installed entrypoint is executable (some setuptools+editable # Guarantee the pip-installed entrypoint is executable (some setuptools+editable
# combos drop it with mode 0644) and expose it on PATH. # combos drop it with mode 0644) and expose it on PATH.
chmod 0755 "$INSTALL_DIR/.venv/bin/decnet" chmod 0755 "$VENV_DIR/bin/decnet"
ln -sf "$INSTALL_DIR/.venv/bin/decnet" /usr/local/bin/decnet ln -sf "$VENV_DIR/bin/decnet" /usr/local/bin/decnet
echo "[DECNET] installing systemd units..." echo "[DECNET] installing systemd units..."
install -Dm0644 etc/systemd/system/decnet-agent.service /etc/systemd/system/decnet-agent.service install -Dm0644 etc/systemd/system/decnet-agent.service /etc/systemd/system/decnet-agent.service