feat(init): decouple venv from install_dir; fail loud if no venv exists

The systemd unit templates hardcoded {{ install_dir }}/venv/bin/decnet.
On production hosts enroll_bootstrap.sh creates exactly that path so it
worked. On dev boxes where the operator runs `sudo decnet init` against
a source checkout with a differently-named venv (.venv, .311, .312),
every decnet-*.service looped forever in auto-restart with:

  Failed at step EXEC spawning .../venv/bin/decnet: No such file or
  directory

Templates now use {{ venv_dir }} as an independent Jinja2 var. `decnet
init` adds --venv-dir (explicit override), otherwise autodetects:

  1. $VIRTUAL_ENV (only when inside --install-dir, so a user-home venv
     never gets baked into a root-owned unit),
  2. {install_dir}/venv (production default; what enroll_bootstrap
     creates),
  3. {install_dir}/{.venv,.311,.312,.313} (common dev conventions).

Init aborts before any file writes if nothing resolves — an
operator-friendly error beats journalctl spam on every unit restart.

python3-venv doesn't set a persistent system variable — $VIRTUAL_ENV
lives in the activated shell only — so this has to be decided + baked
in at init time; there's no way for systemd to "inherit the current
venv" at unit start.

Test mode (--prefix) skips venv validation so the existing test suite
doesn't need to stub up a venv tree per case.
This commit is contained in:
2026-04-24 00:29:49 -04:00
parent cb692d570a
commit 51012eaa67
14 changed files with 103 additions and 17 deletions

View File

@@ -21,7 +21,7 @@ import shutil
import subprocess # nosec B404
import sys
from pathlib import Path
from typing import Callable, List
from typing import Callable, List, Optional
import typer
from jinja2 import Environment, FileSystemLoader, StrictUndefined
@@ -275,13 +275,75 @@ def _write_rendered_if_changed(
return "ok"
def _resolve_venv_dir(install_dir: str, explicit: str | None) -> str:
"""Pick the virtualenv systemd units should ExecStart out of.
Priority:
1. ``--venv-dir`` flag (explicit; absolute path required).
2. ``VIRTUAL_ENV`` env var, but only when it lives under
``install_dir`` (refuse to bake /home/user/.venv into a system
service — that directory is user-owned and may vanish).
3. ``{install_dir}/venv`` — what ``enroll_bootstrap.sh`` creates
on fresh agents; the production default.
4. First hit from a short list of dev-box conventions under
``install_dir``: ``.venv``, ``.311``, ``.312``, ``.313``.
Raises RuntimeError with an operator-friendly message if none of
those resolve to a directory containing ``bin/decnet``. Failing loud
at init time beats systemd spamming journalctl with
'Failed at step EXEC spawning .../venv/bin/decnet: No such file or
directory' on every auto-restart.
"""
install_path = Path(install_dir)
candidates: list[Path] = []
if explicit:
if not explicit.startswith("/"):
raise RuntimeError(
f"--venv-dir must be an absolute path, got {explicit!r}"
)
candidates.append(Path(explicit))
else:
virtual_env = os.environ.get("VIRTUAL_ENV")
if virtual_env:
ve_path = Path(virtual_env)
try:
ve_path.relative_to(install_path)
candidates.append(ve_path)
except ValueError:
# VIRTUAL_ENV lives outside install_dir — don't bake a
# user-home venv into a root-owned systemd unit.
pass
candidates.append(install_path / "venv")
for name in (".venv", ".311", ".312", ".313"):
candidates.append(install_path / name)
for cand in candidates:
if (cand / "bin" / "decnet").is_file():
return str(cand)
searched = ", ".join(str(c) for c in candidates)
raise RuntimeError(
"Could not find a DECNET venv. Create one first (e.g. "
f"`python -m venv {install_path}/venv && "
f"{install_path}/venv/bin/pip install -e {install_path}[dev]`) "
"or pass --venv-dir. Searched: " + searched
)
def _install_units(
deploy: Path, systemd_dir: Path, *, install_dir: str, force: bool, dry_run: bool
deploy: Path,
systemd_dir: Path,
*,
install_dir: str,
venv_dir: str,
force: bool,
dry_run: bool,
) -> str:
"""Render decnet-*.service.j2 → systemd_dir/decnet-*.service, and copy
the static decnet.target (no templating needed — it has no install
path references)."""
context = {"install_dir": install_dir}
context = {"install_dir": install_dir, "venv_dir": venv_dir}
templates = sorted(deploy.glob("decnet-*.service.j2"))
static = [deploy / "decnet.target"]
@@ -457,6 +519,14 @@ def register(app: typer.Typer) -> None:
"into every systemd unit via Jinja2 and used as the "
"decnet user's home directory.",
),
venv_dir: Optional[str] = typer.Option(
None, "--venv-dir",
help="Absolute path to the Python venv systemd should "
"ExecStart from. If omitted, auto-detected in order: "
"$VIRTUAL_ENV (if under --install-dir), "
"{install-dir}/venv, then {install-dir}/{.venv,.311,"
".312,.313}. Init aborts if none exists.",
),
prefix: str = typer.Option(
"", "--prefix", hidden=True,
help="Filesystem prefix for tests (e.g. tmp_path). Empty = real root.",
@@ -604,6 +674,21 @@ def register(app: typer.Typer) -> None:
console.print(f"[red]decnet init: {exc}[/]")
raise typer.Exit(1) from exc
# Resolve venv BEFORE any file writes — fails loud if the
# operator hasn't created one yet, instead of shipping broken
# systemd units that journalctl spams forever. Skipped under
# --prefix (test mode) because the test harness doesn't build a
# real venv and the rendered string is asserted on directly.
if prefix:
resolved_venv = venv_dir or f"{install_dir}/venv"
else:
try:
resolved_venv = _resolve_venv_dir(install_dir, venv_dir)
except RuntimeError as exc:
console.print(f"[red]decnet init: {exc}[/]")
raise typer.Exit(1) from exc
console.print(f"[dim]using venv: {resolved_venv}[/]")
dirs = [
(pfx / _install_rel, 0o755, user, group),
(pfx / "var/lib/decnet", 0o750, user, group),
@@ -640,7 +725,8 @@ def register(app: typer.Typer) -> None:
"install systemd units",
lambda: _install_units(
deploy, systemd_dir,
install_dir=install_dir, force=force, dry_run=dry_run,
install_dir=install_dir, venv_dir=resolved_venv,
force=force, dry_run=dry_run,
),
)
_step(

View File

@@ -13,7 +13,7 @@ Group=decnet
SupplementaryGroups=docker
WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet agent --host 0.0.0.0 --port 8765 --agent-dir /etc/decnet/agent
ExecStart={{ venv_dir }}/bin/decnet agent --host 0.0.0.0 --port 8765 --agent-dir /etc/decnet/agent
# MACVLAN/IPVLAN management + scapy raw sockets. Granted via ambient caps so
# the process starts unprivileged and keeps only these two bits.

View File

@@ -13,7 +13,7 @@ Group=decnet
SupplementaryGroups=docker
WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet api
ExecStart={{ venv_dir }}/bin/decnet api
# MACVLAN/IPVLAN setup runs from the API lifespan when the embedded sniffer is on.
CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_RAW

View File

@@ -16,7 +16,7 @@ EnvironmentFile=-{{ install_dir }}/.env.local
# connect.
RuntimeDirectory=decnet
RuntimeDirectoryMode=0755
ExecStart={{ install_dir }}/venv/bin/decnet bus \
ExecStart={{ venv_dir }}/bin/decnet bus \
--socket /run/decnet/bus.sock \
--group decnet

View File

@@ -13,7 +13,7 @@ Group=decnet
SupplementaryGroups=docker
WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet collect
ExecStart={{ venv_dir }}/bin/decnet collect
# No privileged network operations.
CapabilityBoundingSet=

View File

@@ -15,7 +15,7 @@ EnvironmentFile=-{{ install_dir }}/.env.local
# Replace <master-host> with the master's LAN address or hostname. The agent
# cert bundle at /etc/decnet/agent is reused — the forwarder presents the same
# worker identity when it connects to the master's listener.
ExecStart={{ install_dir }}/venv/bin/decnet forwarder \
ExecStart={{ venv_dir }}/bin/decnet forwarder \
--log-file /var/log/decnet/decnet.log \
--master-host ${DECNET_SWARM_MASTER_HOST} \
--master-port 6514 \

View File

@@ -12,7 +12,7 @@ WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
# Binds 0.0.0.0:6514 so workers across the LAN can connect. 6514 is not a
# privileged port (≥1024), so no CAP_NET_BIND_SERVICE is required.
ExecStart={{ install_dir }}/venv/bin/decnet listener \
ExecStart={{ venv_dir }}/bin/decnet listener \
--host 0.0.0.0 --port 6514 \
--ca-dir /etc/decnet/ca \
--log-path /var/log/decnet/master.log \

View File

@@ -13,7 +13,7 @@ Group=decnet
SupplementaryGroups=docker
WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet mutate --watch
ExecStart={{ venv_dir }}/bin/decnet mutate --watch
CapabilityBoundingSet=
AmbientCapabilities=

View File

@@ -10,7 +10,7 @@ User=decnet
Group=decnet
WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet probe
ExecStart={{ venv_dir }}/bin/decnet probe
# TCP connect probes only — no raw sockets required.
CapabilityBoundingSet=

View File

@@ -10,7 +10,7 @@ User=decnet
Group=decnet
WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet profiler
ExecStart={{ venv_dir }}/bin/decnet profiler
CapabilityBoundingSet=
AmbientCapabilities=

View File

@@ -10,7 +10,7 @@ User=decnet
Group=decnet
WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet sniffer
ExecStart={{ venv_dir }}/bin/decnet sniffer
# scapy needs raw packet access on the MACVLAN host interface.
CapabilityBoundingSet=CAP_NET_RAW

View File

@@ -12,7 +12,7 @@ WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
# Default bind is loopback — the controller is a master-local orchestrator
# reached by the CLI and the web dashboard, not by workers.
ExecStart={{ install_dir }}/venv/bin/decnet swarmctl --host 127.0.0.1 --port 8770
ExecStart={{ venv_dir }}/bin/decnet swarmctl --host 127.0.0.1 --port 8770
# No special capabilities — the controller issues mTLS certs and talks to
# workers over TCP on unprivileged ports.

View File

@@ -12,7 +12,7 @@ User=decnet
Group=decnet
WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet updater \
ExecStart={{ venv_dir }}/bin/decnet updater \
--host 0.0.0.0 --port 8766 \
--updater-dir /etc/decnet/updater \
--install-dir {{ install_dir }} \

View File

@@ -10,7 +10,7 @@ User=decnet
Group=decnet
WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
ExecStart={{ install_dir }}/venv/bin/decnet web
ExecStart={{ venv_dir }}/bin/decnet web
# Uncomment if you bind the dashboard to a privileged port (80/443):
# CapabilityBoundingSet=CAP_NET_BIND_SERVICE