From cb692d570ae6b1c75d1404689c21229506596bf4 Mon Sep 17 00:00:00 2001 From: anti Date: Fri, 24 Apr 2026 00:23:00 -0400 Subject: [PATCH] feat(cli): status queries systemd for every decnet-* unit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'decnet status' used to psutil-scan for cmdlines matching hand-coded service launch args. That worked on dev boxes running workers via 'python -m decnet.cli ...' but missed the systemd reality on real hosts: units may be installed but not started, failed, or in auto-restart — all invisible to a cmdline grep. New behaviour: status calls `systemctl list-units --type=service --all --output=json 'decnet-*.service'` and renders the unit/load/active/ sub/description matrix. One view works for masters, agents, and mixed hosts — iterates over whatever 'decnet-*' units were installed by 'decnet init' / the enroll-bundle. Agent/master mode filtering is no longer needed in the CLI; the host literally does not have master-only units installed if it enrolled as an agent. The psutil path survives as a fallback for boxes without systemd (dev laptops, CI containers, minimal init systems) so the command stays useful there. Clearly labelled 'psutil fallback' in the table title so operators know which view they're looking at. --- decnet/cli/lifecycle.py | 54 +++++++++++++++++++++++++++++++++++++++-- decnet/cli/utils.py | 40 ++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 2 deletions(-) diff --git a/decnet/cli/lifecycle.py b/decnet/cli/lifecycle.py index 0661729a..76f26d34 100644 --- a/decnet/cli/lifecycle.py +++ b/decnet/cli/lifecycle.py @@ -55,15 +55,65 @@ def register(app: typer.Typer) -> None: @app.command() def status() -> None: - """Show running deckies and their status.""" + """Show running deckies and the state of every ``decnet-*`` unit. + + Prefers systemd (``systemctl list-units 'decnet-*.service'``) so + agents, masters and mixed hosts all get one consistent view of + what's installed, loaded, and active. Falls back to the psutil + cmdline registry on boxes without systemd (dev laptops, CI + containers, non-systemd init) so `decnet status` is still useful + there. + """ log.info("status command invoked") from decnet.engine import status as _status _status() + units = _utils._systemd_units() + if units is not None: + _render_systemd_units(units) + else: + _render_psutil_fallback() + + def _render_systemd_units(units: list[dict]) -> None: + svc_table = Table(title="DECNET Services (systemd)", show_lines=True) + svc_table.add_column("Unit", style="bold cyan") + svc_table.add_column("Load") + svc_table.add_column("Active") + svc_table.add_column("Sub") + svc_table.add_column("Description", style="dim") + + if not units: + console.print( + "[yellow]No decnet-* systemd units loaded. " + "Run `sudo decnet init` to install them.[/]" + ) + return + + def _active_style(active: str) -> str: + if active == "active": + return "[green]active[/]" + if active == "failed": + return "[red]failed[/]" + return f"[yellow]{active}[/]" + + for u in sorted(units, key=lambda x: x.get("unit", "")): + svc_table.add_row( + u.get("unit", ""), + u.get("load", ""), + _active_style(u.get("active", "")), + u.get("sub", ""), + u.get("description", ""), + ) + console.print(svc_table) + + def _render_psutil_fallback() -> None: registry = _utils._service_registry(str(DECNET_INGEST_LOG_FILE)) if _agent_mode_active(): registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}] - svc_table = Table(title="DECNET Services", show_lines=True) + svc_table = Table( + title="DECNET Services (psutil fallback — systemd unavailable)", + show_lines=True, + ) svc_table.add_column("Service", style="bold cyan") svc_table.add_column("Status") svc_table.add_column("PID", style="dim") diff --git a/decnet/cli/utils.py b/decnet/cli/utils.py index 19541738..c1dbf6fa 100644 --- a/decnet/cli/utils.py +++ b/decnet/cli/utils.py @@ -134,6 +134,46 @@ def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]: ] +def _systemd_units(pattern: str = "decnet-*.service") -> list[dict] | None: + """Return state of every systemd unit matching *pattern*, or ``None`` + when systemctl is unavailable (non-systemd host, container lab, + PATH-stripped env, user-manager unreachable). + + Output shape mirrors ``systemctl list-units --output=json``: each + dict has ``unit``, ``load``, ``active``, ``sub``, ``description``. + Empty list = systemd works but no matching units are loaded (fresh + host that never ran ``decnet init``). + """ + import json # local import — avoids paying it on every CLI startup + import shutil + + if not shutil.which("systemctl"): + return None + try: + proc = subprocess.run( # nosec B603 B607 — fixed argv, no shell + [ + "systemctl", "list-units", + "--type=service", "--all", + "--no-legend", "--no-pager", + "--output=json", + pattern, + ], + capture_output=True, + text=True, + timeout=5, + check=False, + ) + except (OSError, subprocess.SubprocessError): + return None + if proc.returncode != 0: + return None + try: + data = json.loads(proc.stdout or "[]") + except json.JSONDecodeError: + return None + return data if isinstance(data, list) else None + + def _kill_all_services() -> None: """Find and kill all running DECNET microservice processes.""" registry = _service_registry(str(DECNET_INGEST_LOG_FILE))