feat(init): decouple venv from install_dir; fail loud if no venv exists
The systemd unit templates hardcoded {{ install_dir }}/venv/bin/decnet.
On production hosts enroll_bootstrap.sh creates exactly that path so it
worked. On dev boxes where the operator runs `sudo decnet init` against
a source checkout with a differently-named venv (.venv, .311, .312),
every decnet-*.service looped forever in auto-restart with:
Failed at step EXEC spawning .../venv/bin/decnet: No such file or
directory
Templates now use {{ venv_dir }} as an independent Jinja2 var. `decnet
init` adds --venv-dir (explicit override), otherwise autodetects:
1. $VIRTUAL_ENV (only when inside --install-dir, so a user-home venv
never gets baked into a root-owned unit),
2. {install_dir}/venv (production default; what enroll_bootstrap
creates),
3. {install_dir}/{.venv,.311,.312,.313} (common dev conventions).
Init aborts before any file writes if nothing resolves — an
operator-friendly error beats journalctl spam on every unit restart.
python3-venv doesn't set a persistent system variable — $VIRTUAL_ENV
lives in the activated shell only — so this has to be decided + baked
in at init time; there's no way for systemd to "inherit the current
venv" at unit start.
Test mode (--prefix) skips venv validation so the existing test suite
doesn't need to stub up a venv tree per case.
This commit is contained in:
@@ -21,7 +21,7 @@ import shutil
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Callable, List
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
import typer
|
||||
from jinja2 import Environment, FileSystemLoader, StrictUndefined
|
||||
@@ -275,13 +275,75 @@ def _write_rendered_if_changed(
|
||||
return "ok"
|
||||
|
||||
|
||||
def _resolve_venv_dir(install_dir: str, explicit: str | None) -> str:
|
||||
"""Pick the virtualenv systemd units should ExecStart out of.
|
||||
|
||||
Priority:
|
||||
1. ``--venv-dir`` flag (explicit; absolute path required).
|
||||
2. ``VIRTUAL_ENV`` env var, but only when it lives under
|
||||
``install_dir`` (refuse to bake /home/user/.venv into a system
|
||||
service — that directory is user-owned and may vanish).
|
||||
3. ``{install_dir}/venv`` — what ``enroll_bootstrap.sh`` creates
|
||||
on fresh agents; the production default.
|
||||
4. First hit from a short list of dev-box conventions under
|
||||
``install_dir``: ``.venv``, ``.311``, ``.312``, ``.313``.
|
||||
|
||||
Raises RuntimeError with an operator-friendly message if none of
|
||||
those resolve to a directory containing ``bin/decnet``. Failing loud
|
||||
at init time beats systemd spamming journalctl with
|
||||
'Failed at step EXEC spawning .../venv/bin/decnet: No such file or
|
||||
directory' on every auto-restart.
|
||||
"""
|
||||
install_path = Path(install_dir)
|
||||
|
||||
candidates: list[Path] = []
|
||||
if explicit:
|
||||
if not explicit.startswith("/"):
|
||||
raise RuntimeError(
|
||||
f"--venv-dir must be an absolute path, got {explicit!r}"
|
||||
)
|
||||
candidates.append(Path(explicit))
|
||||
else:
|
||||
virtual_env = os.environ.get("VIRTUAL_ENV")
|
||||
if virtual_env:
|
||||
ve_path = Path(virtual_env)
|
||||
try:
|
||||
ve_path.relative_to(install_path)
|
||||
candidates.append(ve_path)
|
||||
except ValueError:
|
||||
# VIRTUAL_ENV lives outside install_dir — don't bake a
|
||||
# user-home venv into a root-owned systemd unit.
|
||||
pass
|
||||
candidates.append(install_path / "venv")
|
||||
for name in (".venv", ".311", ".312", ".313"):
|
||||
candidates.append(install_path / name)
|
||||
|
||||
for cand in candidates:
|
||||
if (cand / "bin" / "decnet").is_file():
|
||||
return str(cand)
|
||||
|
||||
searched = ", ".join(str(c) for c in candidates)
|
||||
raise RuntimeError(
|
||||
"Could not find a DECNET venv. Create one first (e.g. "
|
||||
f"`python -m venv {install_path}/venv && "
|
||||
f"{install_path}/venv/bin/pip install -e {install_path}[dev]`) "
|
||||
"or pass --venv-dir. Searched: " + searched
|
||||
)
|
||||
|
||||
|
||||
def _install_units(
|
||||
deploy: Path, systemd_dir: Path, *, install_dir: str, force: bool, dry_run: bool
|
||||
deploy: Path,
|
||||
systemd_dir: Path,
|
||||
*,
|
||||
install_dir: str,
|
||||
venv_dir: str,
|
||||
force: bool,
|
||||
dry_run: bool,
|
||||
) -> str:
|
||||
"""Render decnet-*.service.j2 → systemd_dir/decnet-*.service, and copy
|
||||
the static decnet.target (no templating needed — it has no install
|
||||
path references)."""
|
||||
context = {"install_dir": install_dir}
|
||||
context = {"install_dir": install_dir, "venv_dir": venv_dir}
|
||||
templates = sorted(deploy.glob("decnet-*.service.j2"))
|
||||
static = [deploy / "decnet.target"]
|
||||
|
||||
@@ -457,6 +519,14 @@ def register(app: typer.Typer) -> None:
|
||||
"into every systemd unit via Jinja2 and used as the "
|
||||
"decnet user's home directory.",
|
||||
),
|
||||
venv_dir: Optional[str] = typer.Option(
|
||||
None, "--venv-dir",
|
||||
help="Absolute path to the Python venv systemd should "
|
||||
"ExecStart from. If omitted, auto-detected in order: "
|
||||
"$VIRTUAL_ENV (if under --install-dir), "
|
||||
"{install-dir}/venv, then {install-dir}/{.venv,.311,"
|
||||
".312,.313}. Init aborts if none exists.",
|
||||
),
|
||||
prefix: str = typer.Option(
|
||||
"", "--prefix", hidden=True,
|
||||
help="Filesystem prefix for tests (e.g. tmp_path). Empty = real root.",
|
||||
@@ -604,6 +674,21 @@ def register(app: typer.Typer) -> None:
|
||||
console.print(f"[red]decnet init: {exc}[/]")
|
||||
raise typer.Exit(1) from exc
|
||||
|
||||
# Resolve venv BEFORE any file writes — fails loud if the
|
||||
# operator hasn't created one yet, instead of shipping broken
|
||||
# systemd units that journalctl spams forever. Skipped under
|
||||
# --prefix (test mode) because the test harness doesn't build a
|
||||
# real venv and the rendered string is asserted on directly.
|
||||
if prefix:
|
||||
resolved_venv = venv_dir or f"{install_dir}/venv"
|
||||
else:
|
||||
try:
|
||||
resolved_venv = _resolve_venv_dir(install_dir, venv_dir)
|
||||
except RuntimeError as exc:
|
||||
console.print(f"[red]decnet init: {exc}[/]")
|
||||
raise typer.Exit(1) from exc
|
||||
console.print(f"[dim]using venv: {resolved_venv}[/]")
|
||||
|
||||
dirs = [
|
||||
(pfx / _install_rel, 0o755, user, group),
|
||||
(pfx / "var/lib/decnet", 0o750, user, group),
|
||||
@@ -640,7 +725,8 @@ def register(app: typer.Typer) -> None:
|
||||
"install systemd units",
|
||||
lambda: _install_units(
|
||||
deploy, systemd_dir,
|
||||
install_dir=install_dir, force=force, dry_run=dry_run,
|
||||
install_dir=install_dir, venv_dir=resolved_venv,
|
||||
force=force, dry_run=dry_run,
|
||||
),
|
||||
)
|
||||
_step(
|
||||
|
||||
@@ -13,7 +13,7 @@ Group=decnet
|
||||
SupplementaryGroups=docker
|
||||
WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet agent --host 0.0.0.0 --port 8765 --agent-dir /etc/decnet/agent
|
||||
ExecStart={{ venv_dir }}/bin/decnet agent --host 0.0.0.0 --port 8765 --agent-dir /etc/decnet/agent
|
||||
|
||||
# MACVLAN/IPVLAN management + scapy raw sockets. Granted via ambient caps so
|
||||
# the process starts unprivileged and keeps only these two bits.
|
||||
|
||||
@@ -13,7 +13,7 @@ Group=decnet
|
||||
SupplementaryGroups=docker
|
||||
WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet api
|
||||
ExecStart={{ venv_dir }}/bin/decnet api
|
||||
|
||||
# MACVLAN/IPVLAN setup runs from the API lifespan when the embedded sniffer is on.
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_RAW
|
||||
|
||||
@@ -16,7 +16,7 @@ EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
# connect.
|
||||
RuntimeDirectory=decnet
|
||||
RuntimeDirectoryMode=0755
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet bus \
|
||||
ExecStart={{ venv_dir }}/bin/decnet bus \
|
||||
--socket /run/decnet/bus.sock \
|
||||
--group decnet
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ Group=decnet
|
||||
SupplementaryGroups=docker
|
||||
WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet collect
|
||||
ExecStart={{ venv_dir }}/bin/decnet collect
|
||||
|
||||
# No privileged network operations.
|
||||
CapabilityBoundingSet=
|
||||
|
||||
@@ -15,7 +15,7 @@ EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
# Replace <master-host> with the master's LAN address or hostname. The agent
|
||||
# cert bundle at /etc/decnet/agent is reused — the forwarder presents the same
|
||||
# worker identity when it connects to the master's listener.
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet forwarder \
|
||||
ExecStart={{ venv_dir }}/bin/decnet forwarder \
|
||||
--log-file /var/log/decnet/decnet.log \
|
||||
--master-host ${DECNET_SWARM_MASTER_HOST} \
|
||||
--master-port 6514 \
|
||||
|
||||
@@ -12,7 +12,7 @@ WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
# Binds 0.0.0.0:6514 so workers across the LAN can connect. 6514 is not a
|
||||
# privileged port (≥1024), so no CAP_NET_BIND_SERVICE is required.
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet listener \
|
||||
ExecStart={{ venv_dir }}/bin/decnet listener \
|
||||
--host 0.0.0.0 --port 6514 \
|
||||
--ca-dir /etc/decnet/ca \
|
||||
--log-path /var/log/decnet/master.log \
|
||||
|
||||
@@ -13,7 +13,7 @@ Group=decnet
|
||||
SupplementaryGroups=docker
|
||||
WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet mutate --watch
|
||||
ExecStart={{ venv_dir }}/bin/decnet mutate --watch
|
||||
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
@@ -10,7 +10,7 @@ User=decnet
|
||||
Group=decnet
|
||||
WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet probe
|
||||
ExecStart={{ venv_dir }}/bin/decnet probe
|
||||
|
||||
# TCP connect probes only — no raw sockets required.
|
||||
CapabilityBoundingSet=
|
||||
|
||||
@@ -10,7 +10,7 @@ User=decnet
|
||||
Group=decnet
|
||||
WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet profiler
|
||||
ExecStart={{ venv_dir }}/bin/decnet profiler
|
||||
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
@@ -10,7 +10,7 @@ User=decnet
|
||||
Group=decnet
|
||||
WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet sniffer
|
||||
ExecStart={{ venv_dir }}/bin/decnet sniffer
|
||||
|
||||
# scapy needs raw packet access on the MACVLAN host interface.
|
||||
CapabilityBoundingSet=CAP_NET_RAW
|
||||
|
||||
@@ -12,7 +12,7 @@ WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
# Default bind is loopback — the controller is a master-local orchestrator
|
||||
# reached by the CLI and the web dashboard, not by workers.
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet swarmctl --host 127.0.0.1 --port 8770
|
||||
ExecStart={{ venv_dir }}/bin/decnet swarmctl --host 127.0.0.1 --port 8770
|
||||
|
||||
# No special capabilities — the controller issues mTLS certs and talks to
|
||||
# workers over TCP on unprivileged ports.
|
||||
|
||||
@@ -12,7 +12,7 @@ User=decnet
|
||||
Group=decnet
|
||||
WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet updater \
|
||||
ExecStart={{ venv_dir }}/bin/decnet updater \
|
||||
--host 0.0.0.0 --port 8766 \
|
||||
--updater-dir /etc/decnet/updater \
|
||||
--install-dir {{ install_dir }} \
|
||||
|
||||
@@ -10,7 +10,7 @@ User=decnet
|
||||
Group=decnet
|
||||
WorkingDirectory={{ install_dir }}
|
||||
EnvironmentFile=-{{ install_dir }}/.env.local
|
||||
ExecStart={{ install_dir }}/venv/bin/decnet web
|
||||
ExecStart={{ venv_dir }}/bin/decnet web
|
||||
|
||||
# Uncomment if you bind the dashboard to a privileged port (80/443):
|
||||
# CapabilityBoundingSet=CAP_NET_BIND_SERVICE
|
||||
|
||||
Reference in New Issue
Block a user