The 1,878-line cli.py held every Typer command plus process/HTTP helpers and mode-gating logic. Split into one module per command using a register(app) pattern so submodules never import app at module scope, eliminating circular-import risk. - utils.py: process helpers, _http_request, _kill_all_services, console, log - gating.py: MASTER_ONLY_* sets, _require_master_mode, _gate_commands_by_mode - deploy.py: deploy + _deploy_swarm (tightly coupled) - lifecycle.py: status, teardown, redeploy - workers.py: probe, collect, mutate, correlate - inventory.py, swarm.py, db.py, and one file per remaining command __init__.py calls register(app) on each module then runs the mode gate last, and re-exports the private symbols tests patch against (_db_reset_mysql_async, _kill_all_services, _require_master_mode, etc.). Test patches retargeted to the submodule where each name now resolves. Enroll-bundle tarball test updated to assert decnet/cli/__init__.py. No behavioral change.
178 lines
6.4 KiB
Python
178 lines
6.4 KiB
Python
"""Shared CLI helpers: console, logger, process management, swarm HTTP client.
|
|
|
|
Submodules reference these as ``from . import utils`` then ``utils.foo(...)``
|
|
so tests can patch ``decnet.cli.utils.<name>`` and have every caller see it.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import signal
|
|
import subprocess # nosec B404
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import typer
|
|
from rich.console import Console
|
|
|
|
from decnet.logging import get_logger
|
|
from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
|
|
|
|
log = get_logger("cli")
|
|
console = Console()
|
|
|
|
|
|
def _daemonize() -> None:
|
|
"""Fork the current process into a background daemon (Unix double-fork)."""
|
|
if os.fork() > 0:
|
|
raise SystemExit(0)
|
|
os.setsid()
|
|
if os.fork() > 0:
|
|
raise SystemExit(0)
|
|
sys.stdout = open(os.devnull, "w") # noqa: SIM115
|
|
sys.stderr = open(os.devnull, "w") # noqa: SIM115
|
|
sys.stdin = open(os.devnull, "r") # noqa: SIM115
|
|
|
|
|
|
def _pid_dir() -> Path:
|
|
"""Return the writable PID directory.
|
|
|
|
/opt/decnet when it exists and is writable (production), else
|
|
~/.decnet (dev). The directory is created if needed."""
|
|
candidates = [Path("/opt/decnet"), Path.home() / ".decnet"]
|
|
for path in candidates:
|
|
try:
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
if os.access(path, os.W_OK):
|
|
return path
|
|
except (PermissionError, OSError):
|
|
continue
|
|
return Path("/tmp") # nosec B108
|
|
|
|
|
|
def _spawn_detached(argv: list[str], pid_file: Path) -> int:
|
|
"""Spawn a DECNET subcommand as a fully-independent sibling process.
|
|
|
|
The parent does NOT wait() on this child. start_new_session=True puts
|
|
the child in its own session so SIGHUP on parent exit doesn't kill it;
|
|
stdin/stdout/stderr go to /dev/null so the launching shell can close
|
|
without EIO on the child. close_fds=True prevents inherited sockets
|
|
from pinning ports we're trying to rebind.
|
|
|
|
This is deliberately NOT a supervisor — we fire-and-forget. If the
|
|
child dies, the operator restarts it manually via its own subcommand.
|
|
"""
|
|
if pid_file.exists():
|
|
try:
|
|
existing = int(pid_file.read_text().strip())
|
|
os.kill(existing, 0)
|
|
return existing
|
|
except (ValueError, ProcessLookupError, PermissionError, OSError):
|
|
pass # stale pid_file — fall through and spawn
|
|
|
|
with open(os.devnull, "rb") as dn_in, open(os.devnull, "ab") as dn_out:
|
|
proc = subprocess.Popen( # nosec B603
|
|
argv,
|
|
stdin=dn_in, stdout=dn_out, stderr=dn_out,
|
|
start_new_session=True, close_fds=True,
|
|
)
|
|
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
|
pid_file.write_text(f"{proc.pid}\n")
|
|
return proc.pid
|
|
|
|
|
|
def _is_running(match_fn) -> int | None:
|
|
"""Return PID of a running DECNET process matching ``match_fn(cmdline)``, or None."""
|
|
import psutil
|
|
|
|
for proc in psutil.process_iter(["pid", "cmdline"]):
|
|
try:
|
|
cmd = proc.info["cmdline"]
|
|
if cmd and match_fn(cmd):
|
|
return proc.info["pid"]
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
continue
|
|
return None
|
|
|
|
|
|
def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
|
|
"""Return the microservice registry for health-check and relaunch.
|
|
|
|
On agents these run as systemd units invoking /usr/local/bin/decnet,
|
|
which doesn't include "decnet.cli" in its cmdline. On master dev boxes
|
|
they're launched via `python -m decnet.cli`. Match either form — cmd
|
|
is a list of argv tokens, so substring-check the joined string.
|
|
"""
|
|
_py = sys.executable
|
|
|
|
def _matches(sub: str, extras: tuple[str, ...] = ()):
|
|
def _check(cmd) -> bool:
|
|
joined = " ".join(cmd) if not isinstance(cmd, str) else cmd
|
|
if "decnet" not in joined:
|
|
return False
|
|
if sub not in joined:
|
|
return False
|
|
return all(e in joined for e in extras)
|
|
return _check
|
|
|
|
return [
|
|
("Collector", _matches("collect"),
|
|
[_py, "-m", "decnet.cli", "collect", "--daemon", "--log-file", log_file]),
|
|
("Mutator", _matches("mutate", ("--watch",)),
|
|
[_py, "-m", "decnet.cli", "mutate", "--daemon", "--watch"]),
|
|
("Prober", _matches("probe"),
|
|
[_py, "-m", "decnet.cli", "probe", "--daemon", "--log-file", log_file]),
|
|
("Profiler", _matches("profiler"),
|
|
[_py, "-m", "decnet.cli", "profiler", "--daemon"]),
|
|
("Sniffer", _matches("sniffer"),
|
|
[_py, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", log_file]),
|
|
("API",
|
|
lambda cmd: "uvicorn" in cmd and "decnet.web.api:app" in cmd,
|
|
[_py, "-m", "uvicorn", "decnet.web.api:app",
|
|
"--host", DECNET_API_HOST, "--port", str(DECNET_API_PORT)]),
|
|
]
|
|
|
|
|
|
def _kill_all_services() -> None:
|
|
"""Find and kill all running DECNET microservice processes."""
|
|
registry = _service_registry(str(DECNET_INGEST_LOG_FILE))
|
|
killed = 0
|
|
for name, match_fn, _launch_args in registry:
|
|
pid = _is_running(match_fn)
|
|
if pid is not None:
|
|
console.print(f"[yellow]Stopping {name} (PID {pid})...[/]")
|
|
os.kill(pid, signal.SIGTERM)
|
|
killed += 1
|
|
|
|
if killed:
|
|
console.print(f"[green]{killed} background process(es) stopped.[/]")
|
|
else:
|
|
console.print("[dim]No DECNET services were running.[/]")
|
|
|
|
|
|
_DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770"
|
|
|
|
|
|
def _swarmctl_base_url(url: Optional[str]) -> str:
|
|
return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL)
|
|
|
|
|
|
def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
|
|
"""Tiny sync wrapper around httpx; avoids leaking async into the CLI."""
|
|
import httpx
|
|
try:
|
|
resp = httpx.request(method, url, json=json_body, timeout=timeout)
|
|
except httpx.HTTPError as exc:
|
|
console.print(f"[red]Could not reach swarm controller at {url}: {exc}[/]")
|
|
console.print("[dim]Is `decnet swarmctl` running?[/]")
|
|
raise typer.Exit(2)
|
|
if resp.status_code >= 400:
|
|
try:
|
|
detail = resp.json().get("detail", resp.text)
|
|
except Exception: # nosec B110
|
|
detail = resp.text
|
|
console.print(f"[red]{method} {url} failed: {resp.status_code} — {detail}[/]")
|
|
raise typer.Exit(1)
|
|
return resp
|