From dad29249de6d485e3bce4e6086f9e9063fbc0a1e Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 19 Apr 2026 18:39:11 -0400 Subject: [PATCH] fix(updater): align bootstrap layout with updater; log update phases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bootstrap was installing into /opt/decnet/.venv with an editable `pip install -e .`, and /usr/local/bin/decnet pointed there. The updater writes releases to /opt/decnet/releases/active/ with a shared venv at /opt/decnet/venv — a parallel tree nothing on the box actually runs. Result: updates appeared to succeed (release dir rotated, SHA changed) but systemd kept executing the untouched bootstrap code. Changes: - Bootstrap now installs directly into /opt/decnet/releases/active with the shared venv at /opt/decnet/venv and /opt/decnet/current symlinked. Same layout the updater rotates in and out of. - /usr/local/bin/decnet -> /opt/decnet/venv/bin/decnet. - run_update / run_update_self heal /usr/local/bin/decnet on every push so already-enrolled hosts recover on the next update instead of needing a re-enroll. - run_update / run_update_self now log each phase (receive, extract, pip install, rotate, restart, probe) so the updater log actually shows what happened. --- decnet/updater/executor.py | 41 +++++++++++++++++++++ decnet/web/templates/enroll_bootstrap.sh.j2 | 23 +++++++----- 2 files changed, 55 insertions(+), 9 deletions(-) diff --git a/decnet/updater/executor.py b/decnet/updater/executor.py index fb83596..45edb72 100644 --- a/decnet/updater/executor.py +++ b/decnet/updater/executor.py @@ -111,6 +111,32 @@ def _venv_python(release: pathlib.Path) -> pathlib.Path: return release / ".venv" / "bin" / "python" +def _heal_path_symlink(install_dir: pathlib.Path) -> None: + """Point /usr/local/bin/decnet at the shared venv we manage. + + Pre-fix bootstraps installed into ``/.venv`` (editable) and + symlinked /usr/local/bin/decnet there, so systemd units kept executing + the pre-update code even after ``_run_pip`` wrote to the shared venv. + Fix it opportunistically on every update so already-enrolled hosts + recover on the next push instead of needing a manual re-enroll. + """ + target = _shared_venv(install_dir) / "bin" / "decnet" + link = pathlib.Path("/usr/local/bin/decnet") + if not target.is_file(): + return + try: + if link.is_symlink() and pathlib.Path(os.readlink(link)) == target: + return + tmp = link.with_suffix(".tmp") + if tmp.exists() or tmp.is_symlink(): + tmp.unlink() + tmp.symlink_to(target) + os.replace(tmp, link) + log.info("repointed %s -> %s", link, target) + except OSError as exc: + log.warning("could not repoint %s: %s", link, exc) + + def _shared_venv(install_dir: pathlib.Path) -> pathlib.Path: """The one stable venv that agents/updaters run out of. @@ -447,27 +473,35 @@ def run_update( agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR, ) -> dict[str, Any]: """Apply an update atomically. Rolls back on probe failure.""" + log.info("update received sha=%s bytes=%d install_dir=%s", sha, len(tarball_bytes), install_dir) clean_stale_staging(install_dir) staging = _staging_dir(install_dir) + log.info("extracting tarball -> %s", staging) extract_tarball(tarball_bytes, staging) _write_manifest(staging, sha) + log.info("pip install into shared venv (%s)", _shared_venv(install_dir)) pip = _run_pip(staging) if pip.returncode != 0: + log.error("pip install failed rc=%d stderr=%s", pip.returncode, (pip.stderr or pip.stdout).strip()[:400]) shutil.rmtree(staging, ignore_errors=True) raise UpdateError( "pip install failed on new release", stderr=pip.stderr or pip.stdout, ) + log.info("rotating releases: active.new -> active, active -> prev") _rotate(install_dir) _point_current_at(install_dir, _active_dir(install_dir)) + _heal_path_symlink(install_dir) + log.info("restarting agent (and forwarder if present)") _stop_agent(install_dir) _spawn_agent(install_dir) ok, detail = _probe_agent(agent_dir=agent_dir) if ok: + log.info("update complete sha=%s probe=ok", sha) return { "status": "updated", "release": read_release(_active_dir(install_dir)).to_dict(), @@ -536,21 +570,27 @@ def run_update_self( No auto-rollback. Caller must treat "connection dropped + /health returns new SHA within 30s" as success. """ + log.info("self-update received sha=%s bytes=%d install_dir=%s", sha, len(tarball_bytes), updater_install_dir) clean_stale_staging(updater_install_dir) staging = _staging_dir(updater_install_dir) + log.info("extracting tarball -> %s", staging) extract_tarball(tarball_bytes, staging) _write_manifest(staging, sha) + log.info("pip install updater release into shared venv (%s)", _shared_venv(updater_install_dir)) pip = _run_pip(staging) if pip.returncode != 0: + log.error("self-update pip install failed rc=%d stderr=%s", pip.returncode, (pip.stderr or pip.stdout).strip()[:400]) shutil.rmtree(staging, ignore_errors=True) raise UpdateError( "pip install failed on new updater release", stderr=pip.stderr or pip.stdout, ) + log.info("rotating updater releases and flipping current symlink") _rotate(updater_install_dir) _point_current_at(updater_install_dir, _active_dir(updater_install_dir)) + _heal_path_symlink(updater_install_dir) # Reconstruct the updater's original launch command from env vars set by # `decnet.updater.server.run`. We can't reuse sys.argv: inside the app @@ -576,6 +616,7 @@ def run_update_self( # on our own unit would kill us mid-response and the caller would see a # connection drop with no indication of success. if _systemd_available(): + log.info("self-update queued: systemctl restart %s (deferred 1s)", UPDATER_SYSTEMD_UNIT) subprocess.Popen( # nosec B603 B607 ["sh", "-c", f"sleep 1 && systemctl restart {UPDATER_SYSTEMD_UNIT}"], start_new_session=True, diff --git a/decnet/web/templates/enroll_bootstrap.sh.j2 b/decnet/web/templates/enroll_bootstrap.sh.j2 index b1b21ba..60ca616 100644 --- a/decnet/web/templates/enroll_bootstrap.sh.j2 +++ b/decnet/web/templates/enroll_bootstrap.sh.j2 @@ -16,14 +16,19 @@ echo "[DECNET] fetching payload..." curl -fsSL "{{ tarball_url }}" | tar -xz -C "$WORK" INSTALL_DIR=/opt/decnet -mkdir -p "$INSTALL_DIR" -cp -a "$WORK/." "$INSTALL_DIR/" -cd "$INSTALL_DIR" +RELEASE_DIR="$INSTALL_DIR/releases/active" +VENV_DIR="$INSTALL_DIR/venv" +# Mirror the updater's layout from day one so `decnet updater` can rotate +# releases/active in-place and the shared venv is the thing on PATH. +mkdir -p "$RELEASE_DIR" +cp -a "$WORK/." "$RELEASE_DIR/" +ln -sfn "$RELEASE_DIR" "$INSTALL_DIR/current" +cd "$RELEASE_DIR" -echo "[DECNET] building venv..." -python3 -m venv .venv -.venv/bin/pip install -q --upgrade pip -.venv/bin/pip install -q -e . +echo "[DECNET] building shared venv at $VENV_DIR..." +python3 -m venv "$VENV_DIR" +"$VENV_DIR/bin/pip" install -q --upgrade pip +"$VENV_DIR/bin/pip" install -q "$RELEASE_DIR" install -Dm0644 etc/decnet/decnet.ini /etc/decnet/decnet.ini [[ -f services.ini ]] && install -Dm0644 services.ini /etc/decnet/services.ini @@ -51,8 +56,8 @@ fi # Guarantee the pip-installed entrypoint is executable (some setuptools+editable # combos drop it with mode 0644) and expose it on PATH. -chmod 0755 "$INSTALL_DIR/.venv/bin/decnet" -ln -sf "$INSTALL_DIR/.venv/bin/decnet" /usr/local/bin/decnet +chmod 0755 "$VENV_DIR/bin/decnet" +ln -sf "$VENV_DIR/bin/decnet" /usr/local/bin/decnet echo "[DECNET] installing systemd units..." install -Dm0644 etc/systemd/system/decnet-agent.service /etc/systemd/system/decnet-agent.service