feat(init): decouple venv from install_dir; fail loud if no venv exists

The systemd unit templates hardcoded {{ install_dir }}/venv/bin/decnet.
On production hosts enroll_bootstrap.sh creates exactly that path so it
worked. On dev boxes where the operator runs `sudo decnet init` against
a source checkout with a differently-named venv (.venv, .311, .312),
every decnet-*.service looped forever in auto-restart with:

  Failed at step EXEC spawning .../venv/bin/decnet: No such file or
  directory

Templates now use {{ venv_dir }} as an independent Jinja2 var. `decnet
init` adds --venv-dir (explicit override), otherwise autodetects:

  1. $VIRTUAL_ENV (only when inside --install-dir, so a user-home venv
     never gets baked into a root-owned unit),
  2. {install_dir}/venv (production default; what enroll_bootstrap
     creates),
  3. {install_dir}/{.venv,.311,.312,.313} (common dev conventions).

Init aborts before any file writes if nothing resolves — an
operator-friendly error beats journalctl spam on every unit restart.

python3-venv doesn't set a persistent system variable — $VIRTUAL_ENV
lives in the activated shell only — so this has to be decided + baked
in at init time; there's no way for systemd to "inherit the current
venv" at unit start.

Test mode (--prefix) skips venv validation so the existing test suite
doesn't need to stub up a venv tree per case.
This commit is contained in:
2026-04-24 00:29:49 -04:00
parent cb692d570a
commit 51012eaa67
14 changed files with 103 additions and 17 deletions

View File

@@ -21,7 +21,7 @@ import shutil
import subprocess # nosec B404 import subprocess # nosec B404
import sys import sys
from pathlib import Path from pathlib import Path
from typing import Callable, List from typing import Callable, List, Optional
import typer import typer
from jinja2 import Environment, FileSystemLoader, StrictUndefined from jinja2 import Environment, FileSystemLoader, StrictUndefined
@@ -275,13 +275,75 @@ def _write_rendered_if_changed(
return "ok" return "ok"
def _resolve_venv_dir(install_dir: str, explicit: str | None) -> str:
"""Pick the virtualenv systemd units should ExecStart out of.
Priority:
1. ``--venv-dir`` flag (explicit; absolute path required).
2. ``VIRTUAL_ENV`` env var, but only when it lives under
``install_dir`` (refuse to bake /home/user/.venv into a system
service — that directory is user-owned and may vanish).
3. ``{install_dir}/venv`` — what ``enroll_bootstrap.sh`` creates
on fresh agents; the production default.
4. First hit from a short list of dev-box conventions under
``install_dir``: ``.venv``, ``.311``, ``.312``, ``.313``.
Raises RuntimeError with an operator-friendly message if none of
those resolve to a directory containing ``bin/decnet``. Failing loud
at init time beats systemd spamming journalctl with
'Failed at step EXEC spawning .../venv/bin/decnet: No such file or
directory' on every auto-restart.
"""
install_path = Path(install_dir)
candidates: list[Path] = []
if explicit:
if not explicit.startswith("/"):
raise RuntimeError(
f"--venv-dir must be an absolute path, got {explicit!r}"
)
candidates.append(Path(explicit))
else:
virtual_env = os.environ.get("VIRTUAL_ENV")
if virtual_env:
ve_path = Path(virtual_env)
try:
ve_path.relative_to(install_path)
candidates.append(ve_path)
except ValueError:
# VIRTUAL_ENV lives outside install_dir — don't bake a
# user-home venv into a root-owned systemd unit.
pass
candidates.append(install_path / "venv")
for name in (".venv", ".311", ".312", ".313"):
candidates.append(install_path / name)
for cand in candidates:
if (cand / "bin" / "decnet").is_file():
return str(cand)
searched = ", ".join(str(c) for c in candidates)
raise RuntimeError(
"Could not find a DECNET venv. Create one first (e.g. "
f"`python -m venv {install_path}/venv && "
f"{install_path}/venv/bin/pip install -e {install_path}[dev]`) "
"or pass --venv-dir. Searched: " + searched
)
def _install_units( def _install_units(
deploy: Path, systemd_dir: Path, *, install_dir: str, force: bool, dry_run: bool deploy: Path,
systemd_dir: Path,
*,
install_dir: str,
venv_dir: str,
force: bool,
dry_run: bool,
) -> str: ) -> str:
"""Render decnet-*.service.j2 → systemd_dir/decnet-*.service, and copy """Render decnet-*.service.j2 → systemd_dir/decnet-*.service, and copy
the static decnet.target (no templating needed — it has no install the static decnet.target (no templating needed — it has no install
path references).""" path references)."""
context = {"install_dir": install_dir} context = {"install_dir": install_dir, "venv_dir": venv_dir}
templates = sorted(deploy.glob("decnet-*.service.j2")) templates = sorted(deploy.glob("decnet-*.service.j2"))
static = [deploy / "decnet.target"] static = [deploy / "decnet.target"]
@@ -457,6 +519,14 @@ def register(app: typer.Typer) -> None:
"into every systemd unit via Jinja2 and used as the " "into every systemd unit via Jinja2 and used as the "
"decnet user's home directory.", "decnet user's home directory.",
), ),
venv_dir: Optional[str] = typer.Option(
None, "--venv-dir",
help="Absolute path to the Python venv systemd should "
"ExecStart from. If omitted, auto-detected in order: "
"$VIRTUAL_ENV (if under --install-dir), "
"{install-dir}/venv, then {install-dir}/{.venv,.311,"
".312,.313}. Init aborts if none exists.",
),
prefix: str = typer.Option( prefix: str = typer.Option(
"", "--prefix", hidden=True, "", "--prefix", hidden=True,
help="Filesystem prefix for tests (e.g. tmp_path). Empty = real root.", help="Filesystem prefix for tests (e.g. tmp_path). Empty = real root.",
@@ -604,6 +674,21 @@ def register(app: typer.Typer) -> None:
console.print(f"[red]decnet init: {exc}[/]") console.print(f"[red]decnet init: {exc}[/]")
raise typer.Exit(1) from exc raise typer.Exit(1) from exc
# Resolve venv BEFORE any file writes — fails loud if the
# operator hasn't created one yet, instead of shipping broken
# systemd units that journalctl spams forever. Skipped under
# --prefix (test mode) because the test harness doesn't build a
# real venv and the rendered string is asserted on directly.
if prefix:
resolved_venv = venv_dir or f"{install_dir}/venv"
else:
try:
resolved_venv = _resolve_venv_dir(install_dir, venv_dir)
except RuntimeError as exc:
console.print(f"[red]decnet init: {exc}[/]")
raise typer.Exit(1) from exc
console.print(f"[dim]using venv: {resolved_venv}[/]")
dirs = [ dirs = [
(pfx / _install_rel, 0o755, user, group), (pfx / _install_rel, 0o755, user, group),
(pfx / "var/lib/decnet", 0o750, user, group), (pfx / "var/lib/decnet", 0o750, user, group),
@@ -640,7 +725,8 @@ def register(app: typer.Typer) -> None:
"install systemd units", "install systemd units",
lambda: _install_units( lambda: _install_units(
deploy, systemd_dir, deploy, systemd_dir,
install_dir=install_dir, force=force, dry_run=dry_run, install_dir=install_dir, venv_dir=resolved_venv,
force=force, dry_run=dry_run,
), ),
) )
_step( _step(

View File

@@ -13,7 +13,7 @@ Group=decnet
SupplementaryGroups=docker SupplementaryGroups=docker
WorkingDirectory={{ install_dir }} WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet agent --host 0.0.0.0 --port 8765 --agent-dir /etc/decnet/agent ExecStart={{ venv_dir }}/bin/decnet agent --host 0.0.0.0 --port 8765 --agent-dir /etc/decnet/agent
# MACVLAN/IPVLAN management + scapy raw sockets. Granted via ambient caps so # MACVLAN/IPVLAN management + scapy raw sockets. Granted via ambient caps so
# the process starts unprivileged and keeps only these two bits. # the process starts unprivileged and keeps only these two bits.

View File

@@ -13,7 +13,7 @@ Group=decnet
SupplementaryGroups=docker SupplementaryGroups=docker
WorkingDirectory={{ install_dir }} WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet api ExecStart={{ venv_dir }}/bin/decnet api
# MACVLAN/IPVLAN setup runs from the API lifespan when the embedded sniffer is on. # MACVLAN/IPVLAN setup runs from the API lifespan when the embedded sniffer is on.
CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_RAW CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_RAW

View File

@@ -16,7 +16,7 @@ EnvironmentFile=-{{ install_dir }}/.env.local
# connect. # connect.
RuntimeDirectory=decnet RuntimeDirectory=decnet
RuntimeDirectoryMode=0755 RuntimeDirectoryMode=0755
ExecStart={{ install_dir }}/venv/bin/decnet bus \ ExecStart={{ venv_dir }}/bin/decnet bus \
--socket /run/decnet/bus.sock \ --socket /run/decnet/bus.sock \
--group decnet --group decnet

View File

@@ -13,7 +13,7 @@ Group=decnet
SupplementaryGroups=docker SupplementaryGroups=docker
WorkingDirectory={{ install_dir }} WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet collect ExecStart={{ venv_dir }}/bin/decnet collect
# No privileged network operations. # No privileged network operations.
CapabilityBoundingSet= CapabilityBoundingSet=

View File

@@ -15,7 +15,7 @@ EnvironmentFile=-{{ install_dir }}/.env.local
# Replace <master-host> with the master's LAN address or hostname. The agent # Replace <master-host> with the master's LAN address or hostname. The agent
# cert bundle at /etc/decnet/agent is reused — the forwarder presents the same # cert bundle at /etc/decnet/agent is reused — the forwarder presents the same
# worker identity when it connects to the master's listener. # worker identity when it connects to the master's listener.
ExecStart={{ install_dir }}/venv/bin/decnet forwarder \ ExecStart={{ venv_dir }}/bin/decnet forwarder \
--log-file /var/log/decnet/decnet.log \ --log-file /var/log/decnet/decnet.log \
--master-host ${DECNET_SWARM_MASTER_HOST} \ --master-host ${DECNET_SWARM_MASTER_HOST} \
--master-port 6514 \ --master-port 6514 \

View File

@@ -12,7 +12,7 @@ WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local EnvironmentFile=-{{ install_dir }}/.env.local
# Binds 0.0.0.0:6514 so workers across the LAN can connect. 6514 is not a # Binds 0.0.0.0:6514 so workers across the LAN can connect. 6514 is not a
# privileged port (≥1024), so no CAP_NET_BIND_SERVICE is required. # privileged port (≥1024), so no CAP_NET_BIND_SERVICE is required.
ExecStart={{ install_dir }}/venv/bin/decnet listener \ ExecStart={{ venv_dir }}/bin/decnet listener \
--host 0.0.0.0 --port 6514 \ --host 0.0.0.0 --port 6514 \
--ca-dir /etc/decnet/ca \ --ca-dir /etc/decnet/ca \
--log-path /var/log/decnet/master.log \ --log-path /var/log/decnet/master.log \

View File

@@ -13,7 +13,7 @@ Group=decnet
SupplementaryGroups=docker SupplementaryGroups=docker
WorkingDirectory={{ install_dir }} WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet mutate --watch ExecStart={{ venv_dir }}/bin/decnet mutate --watch
CapabilityBoundingSet= CapabilityBoundingSet=
AmbientCapabilities= AmbientCapabilities=

View File

@@ -10,7 +10,7 @@ User=decnet
Group=decnet Group=decnet
WorkingDirectory={{ install_dir }} WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet probe ExecStart={{ venv_dir }}/bin/decnet probe
# TCP connect probes only — no raw sockets required. # TCP connect probes only — no raw sockets required.
CapabilityBoundingSet= CapabilityBoundingSet=

View File

@@ -10,7 +10,7 @@ User=decnet
Group=decnet Group=decnet
WorkingDirectory={{ install_dir }} WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet profiler ExecStart={{ venv_dir }}/bin/decnet profiler
CapabilityBoundingSet= CapabilityBoundingSet=
AmbientCapabilities= AmbientCapabilities=

View File

@@ -10,7 +10,7 @@ User=decnet
Group=decnet Group=decnet
WorkingDirectory={{ install_dir }} WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet sniffer ExecStart={{ venv_dir }}/bin/decnet sniffer
# scapy needs raw packet access on the MACVLAN host interface. # scapy needs raw packet access on the MACVLAN host interface.
CapabilityBoundingSet=CAP_NET_RAW CapabilityBoundingSet=CAP_NET_RAW

View File

@@ -12,7 +12,7 @@ WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local EnvironmentFile=-{{ install_dir }}/.env.local
# Default bind is loopback — the controller is a master-local orchestrator # Default bind is loopback — the controller is a master-local orchestrator
# reached by the CLI and the web dashboard, not by workers. # reached by the CLI and the web dashboard, not by workers.
ExecStart={{ install_dir }}/venv/bin/decnet swarmctl --host 127.0.0.1 --port 8770 ExecStart={{ venv_dir }}/bin/decnet swarmctl --host 127.0.0.1 --port 8770
# No special capabilities — the controller issues mTLS certs and talks to # No special capabilities — the controller issues mTLS certs and talks to
# workers over TCP on unprivileged ports. # workers over TCP on unprivileged ports.

View File

@@ -12,7 +12,7 @@ User=decnet
Group=decnet Group=decnet
WorkingDirectory={{ install_dir }} WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet updater \ ExecStart={{ venv_dir }}/bin/decnet updater \
--host 0.0.0.0 --port 8766 \ --host 0.0.0.0 --port 8766 \
--updater-dir /etc/decnet/updater \ --updater-dir /etc/decnet/updater \
--install-dir {{ install_dir }} \ --install-dir {{ install_dir }} \

View File

@@ -10,7 +10,7 @@ User=decnet
Group=decnet Group=decnet
WorkingDirectory={{ install_dir }} WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet web ExecStart={{ venv_dir }}/bin/decnet web
# Uncomment if you bind the dashboard to a privileged port (80/443): # Uncomment if you bind the dashboard to a privileged port (80/443):
# CapabilityBoundingSet=CAP_NET_BIND_SERVICE # CapabilityBoundingSet=CAP_NET_BIND_SERVICE