From 262a84ca53c4c20a49ff7603f6b01f3123fdef6d Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 19 Apr 2026 22:42:52 -0400 Subject: [PATCH] refactor(cli): split decnet/cli.py monolith into decnet/cli/ package The 1,878-line cli.py held every Typer command plus process/HTTP helpers and mode-gating logic. Split into one module per command using a register(app) pattern so submodules never import app at module scope, eliminating circular-import risk. - utils.py: process helpers, _http_request, _kill_all_services, console, log - gating.py: MASTER_ONLY_* sets, _require_master_mode, _gate_commands_by_mode - deploy.py: deploy + _deploy_swarm (tightly coupled) - lifecycle.py: status, teardown, redeploy - workers.py: probe, collect, mutate, correlate - inventory.py, swarm.py, db.py, and one file per remaining command __init__.py calls register(app) on each module then runs the mode gate last, and re-exports the private symbols tests patch against (_db_reset_mysql_async, _kill_all_services, _require_master_mode, etc.). Test patches retargeted to the submodule where each name now resolves. Enroll-bundle tarball test updated to assert decnet/cli/__init__.py. No behavioral change. --- decnet/cli.py | 1878 -------------------- decnet/cli/__init__.py | 80 + decnet/cli/agent.py | 64 + decnet/cli/api.py | 53 + decnet/cli/db.py | 130 ++ decnet/cli/deploy.py | 307 ++++ decnet/cli/forwarder.py | 74 + decnet/cli/gating.py | 71 + decnet/cli/inventory.py | 52 + decnet/cli/lifecycle.py | 97 + decnet/cli/listener.py | 57 + decnet/cli/profiler.py | 34 + decnet/cli/sniffer.py | 31 + decnet/cli/swarm.py | 346 ++++ decnet/cli/swarmctl.py | 104 ++ decnet/cli/updater.py | 46 + decnet/cli/utils.py | 177 ++ decnet/cli/web.py | 120 ++ decnet/cli/workers.py | 142 ++ tests/api/swarm_mgmt/test_enroll_bundle.py | 2 +- tests/swarm/test_cli_swarm.py | 16 +- tests/swarm/test_cli_swarm_update.py | 4 +- tests/test_cli.py | 52 +- tests/test_cli_db_reset.py | 8 +- 24 files changed, 2026 insertions(+), 1919 deletions(-) delete mode 100644 decnet/cli.py create mode 100644 decnet/cli/__init__.py create mode 100644 decnet/cli/agent.py create mode 100644 decnet/cli/api.py create mode 100644 decnet/cli/db.py create mode 100644 decnet/cli/deploy.py create mode 100644 decnet/cli/forwarder.py create mode 100644 decnet/cli/gating.py create mode 100644 decnet/cli/inventory.py create mode 100644 decnet/cli/lifecycle.py create mode 100644 decnet/cli/listener.py create mode 100644 decnet/cli/profiler.py create mode 100644 decnet/cli/sniffer.py create mode 100644 decnet/cli/swarm.py create mode 100644 decnet/cli/swarmctl.py create mode 100644 decnet/cli/updater.py create mode 100644 decnet/cli/utils.py create mode 100644 decnet/cli/web.py create mode 100644 decnet/cli/workers.py diff --git a/decnet/cli.py b/decnet/cli.py deleted file mode 100644 index 274f2ba..0000000 --- a/decnet/cli.py +++ /dev/null @@ -1,1878 +0,0 @@ -""" -DECNET CLI — entry point for all commands. - -Usage: - decnet deploy --mode unihost --deckies 5 --randomize-services - decnet status - decnet teardown [--all | --id decky-01] - decnet services -""" - -import signal -from pathlib import Path -from typing import Optional - -import typer -from rich.console import Console -from rich.table import Table - -from decnet.logging import get_logger -from decnet.env import ( - DECNET_API_HOST, - DECNET_API_PORT, - DECNET_INGEST_LOG_FILE, - DECNET_WEB_HOST, - DECNET_WEB_PORT, -) -from decnet.archetypes import Archetype, all_archetypes, get_archetype -from decnet.config import ( - DecnetConfig, -) -from decnet.distros import all_distros, get_distro -from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini -from decnet.ini_loader import load_ini -from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip -from decnet.services.registry import all_services - -log = get_logger("cli") - - -def _daemonize() -> None: - """Fork the current process into a background daemon (Unix double-fork).""" - import os - import sys - - if os.fork() > 0: - raise SystemExit(0) - os.setsid() - if os.fork() > 0: - raise SystemExit(0) - sys.stdout = open(os.devnull, "w") # noqa: SIM115 - sys.stderr = open(os.devnull, "w") # noqa: SIM115 - sys.stdin = open(os.devnull, "r") # noqa: SIM115 - - -def _pid_dir() -> Path: - """Return the writable PID directory. - - /opt/decnet when it exists and is writable (production), else - ~/.decnet (dev). The directory is created if needed.""" - import os - candidates = [Path("/opt/decnet"), Path.home() / ".decnet"] - for path in candidates: - try: - path.mkdir(parents=True, exist_ok=True) - if os.access(path, os.W_OK): - return path - except (PermissionError, OSError): - continue - # Last-resort fallback so we never raise from a helper. - return Path("/tmp") # nosec B108 - - -def _spawn_detached(argv: list[str], pid_file: Path) -> int: - """Spawn a DECNET subcommand as a fully-independent sibling process. - - The parent does NOT wait() on this child. start_new_session=True puts - the child in its own session so SIGHUP on parent exit doesn't kill it; - stdin/stdout/stderr go to /dev/null so the launching shell can close - without EIO on the child. close_fds=True prevents inherited sockets - from pinning ports we're trying to rebind. - - This is deliberately NOT a supervisor — we fire-and-forget. If the - child dies, the operator restarts it manually via its own subcommand - (e.g. `decnet forwarder --daemon …`). Detached means detached. - """ - import os - import subprocess # nosec B404 - - # If the pid_file points at a live process, don't spawn a duplicate — - # agent/swarmctl auto-spawn is called on every startup, and the first - # run's sibling is still alive across restarts. - if pid_file.exists(): - try: - existing = int(pid_file.read_text().strip()) - os.kill(existing, 0) - return existing - except (ValueError, ProcessLookupError, PermissionError, OSError): - pass # stale pid_file — fall through and spawn - - with open(os.devnull, "rb") as dn_in, open(os.devnull, "ab") as dn_out: - proc = subprocess.Popen( # nosec B603 - argv, - stdin=dn_in, stdout=dn_out, stderr=dn_out, - start_new_session=True, close_fds=True, - ) - pid_file.parent.mkdir(parents=True, exist_ok=True) - pid_file.write_text(f"{proc.pid}\n") - return proc.pid - - -app = typer.Typer( - name="decnet", - help="Deploy a deception network of honeypot deckies on your LAN.", - no_args_is_help=True, -) -console = Console() - - -def _kill_all_services() -> None: - """Find and kill all running DECNET microservice processes.""" - import os - - registry = _service_registry(str(DECNET_INGEST_LOG_FILE)) - killed = 0 - for name, match_fn, _launch_args in registry: - pid = _is_running(match_fn) - if pid is not None: - console.print(f"[yellow]Stopping {name} (PID {pid})...[/]") - os.kill(pid, signal.SIGTERM) - killed += 1 - - if killed: - console.print(f"[green]{killed} background process(es) stopped.[/]") - else: - console.print("[dim]No DECNET services were running.[/]") - - -@app.command() -def api( - port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"), - host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"), - log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), - workers: int = typer.Option(1, "--workers", "-w", min=1, help="Number of uvicorn worker processes"), -) -> None: - """Run the DECNET API and Web Dashboard in standalone mode.""" - import subprocess # nosec B404 - import sys - import os - import signal - - _require_master_mode("api") - if daemon: - log.info("API daemonizing host=%s port=%d workers=%d", host, port, workers) - _daemonize() - - log.info("API command invoked host=%s port=%d workers=%d", host, port, workers) - console.print(f"[green]Starting DECNET API on {host}:{port} (workers={workers})...[/]") - _env: dict[str, str] = os.environ.copy() - _env["DECNET_INGEST_LOG_FILE"] = str(log_file) - _cmd = [sys.executable, "-m", "uvicorn", "decnet.web.api:app", - "--host", host, "--port", str(port), "--workers", str(workers)] - # Put uvicorn (and its worker children) in their own process group so we - # can signal the whole tree on Ctrl+C. Without this, only the supervisor - # receives SIGINT from the terminal and worker children may survive and - # be respawned — the "forkbomb" ANTI hit during testing. - try: - proc = subprocess.Popen(_cmd, env=_env, start_new_session=True) # nosec B603 B404 - try: - proc.wait() - except KeyboardInterrupt: - try: - os.killpg(proc.pid, signal.SIGTERM) - try: - proc.wait(timeout=10) - except subprocess.TimeoutExpired: - os.killpg(proc.pid, signal.SIGKILL) - proc.wait() - except ProcessLookupError: - pass - except (FileNotFoundError, subprocess.SubprocessError): - console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]") - - -@app.command() -def swarmctl( - port: int = typer.Option(8770, "--port", help="Port for the swarm controller"), - host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), - no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"), - tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"), - cert: Optional[str] = typer.Option(None, "--cert", help="BYOC: path to TLS server cert (PEM). Auto-issues from the DECNET CA if omitted."), - key: Optional[str] = typer.Option(None, "--key", help="BYOC: path to TLS server private key (PEM)."), - client_ca: Optional[str] = typer.Option(None, "--client-ca", help="CA bundle used to verify worker client certs. Defaults to the DECNET CA."), -) -> None: - """Run the DECNET SWARM controller (master-side, separate process from `decnet api`). - - By default, `decnet swarmctl` auto-spawns `decnet listener` as a fully- - detached sibling process so the master starts accepting forwarder - connections on 6514 without a second manual invocation. The listener - survives swarmctl restarts and crashes — if it dies on its own, - restart it manually with `decnet listener --daemon …`. Pass - --no-listener to skip. - - Pass ``--tls`` to serve over HTTPS with mutual-TLS enforcement. By - default the server cert is auto-issued from the DECNET CA under - ``~/.decnet/swarmctl/`` so enrolled workers (which already ship that - CA's ``ca.crt``) trust it out of the box. BYOC via ``--cert``/``--key`` - if you need a publicly-trusted or externally-managed cert. - """ - import subprocess # nosec B404 - import sys - import os - import signal - - _require_master_mode("swarmctl") - if daemon: - log.info("swarmctl daemonizing host=%s port=%d", host, port) - _daemonize() - - if not no_listener: - listener_host = os.environ.get("DECNET_LISTENER_HOST", "0.0.0.0") # nosec B104 - listener_port = int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514")) - lst_argv = [ - sys.executable, "-m", "decnet", "listener", - "--host", listener_host, - "--port", str(listener_port), - "--daemon", - ] - try: - pid = _spawn_detached(lst_argv, _pid_dir() / "listener.pid") - log.info("swarmctl auto-spawned listener pid=%d bind=%s:%d", - pid, listener_host, listener_port) - console.print(f"[dim]Auto-spawned listener (pid {pid}) on {listener_host}:{listener_port}.[/]") - except Exception as e: # noqa: BLE001 - log.warning("swarmctl could not auto-spawn listener: %s", e) - console.print(f"[yellow]listener auto-spawn skipped: {e}[/]") - - log.info("swarmctl command invoked host=%s port=%d tls=%s", host, port, tls) - scheme = "https" if tls else "http" - console.print(f"[green]Starting DECNET SWARM controller on {scheme}://{host}:{port}...[/]") - _cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app", - "--host", host, "--port", str(port)] - if tls: - from decnet.swarm import pki as _pki - # BYOC path: operator supplied cert+key explicitly. Else auto-issue - # from the existing DECNET CA so workers' already-deployed ca.crt - # verifies the endpoint with no extra steps. - if cert and key: - cert_path, key_path = cert, key - elif cert or key: - console.print("[red]--cert and --key must be provided together.[/]") - raise typer.Exit(code=2) - else: - auto_cert, auto_key, _auto_ca = _pki.ensure_swarmctl_cert(host) - cert_path, key_path = str(auto_cert), str(auto_key) - console.print(f"[dim]Auto-issued swarmctl server cert → {cert_path}[/]") - ca_path = client_ca or str(_pki.DEFAULT_CA_DIR / "ca.crt") - _cmd += [ - "--ssl-keyfile", key_path, - "--ssl-certfile", cert_path, - "--ssl-ca-certs", ca_path, - # CERT_REQUIRED — a worker must present a CA-signed client cert - # before FastAPI sees the request. The heartbeat endpoint then - # pins the cert fingerprint per-host on top of this. - "--ssl-cert-reqs", "2", - ] - try: - proc = subprocess.Popen(_cmd, start_new_session=True) # nosec B603 B404 - try: - proc.wait() - except KeyboardInterrupt: - try: - os.killpg(proc.pid, signal.SIGTERM) - try: - proc.wait(timeout=10) - except subprocess.TimeoutExpired: - os.killpg(proc.pid, signal.SIGKILL) - proc.wait() - except ProcessLookupError: - pass - except (FileNotFoundError, subprocess.SubprocessError): - console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]") - - -@app.command() -def agent( - port: int = typer.Option(8765, "--port", help="Port for the worker agent"), - host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the worker agent"), # nosec B104 - agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent, expanded under the running user's HOME — set this when running as sudo/root)"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), - no_forwarder: bool = typer.Option(False, "--no-forwarder", help="Do not auto-spawn the log forwarder alongside the agent"), -) -> None: - """Run the DECNET SWARM worker agent (requires a cert bundle in ~/.decnet/agent/). - - By default, `decnet agent` auto-spawns `decnet forwarder` as a fully- - detached sibling process so worker logs start flowing to the master - without a second manual invocation. The forwarder survives agent - restarts and crashes — if it dies on its own, restart it manually - with `decnet forwarder --daemon …`. Pass --no-forwarder to skip. - """ - import os - import pathlib as _pathlib - import sys as _sys - from decnet.agent import server as _agent_server - from decnet.env import DECNET_SWARM_MASTER_HOST, DECNET_INGEST_LOG_FILE - from decnet.swarm import pki as _pki - - resolved_dir = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR - - if daemon: - log.info("agent daemonizing host=%s port=%d", host, port) - _daemonize() - - # Auto-spawn the forwarder as a detached sibling BEFORE blocking on the - # agent server. Requires DECNET_SWARM_MASTER_HOST — if unset, the - # auto-spawn is silently skipped (single-host dev, or operator plans to - # start the forwarder separately). - if not no_forwarder and DECNET_SWARM_MASTER_HOST: - fw_argv = [ - _sys.executable, "-m", "decnet", "forwarder", - "--master-host", DECNET_SWARM_MASTER_HOST, - "--master-port", str(int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))), - "--agent-dir", str(resolved_dir), - "--log-file", str(DECNET_INGEST_LOG_FILE), - "--daemon", - ] - try: - pid = _spawn_detached(fw_argv, _pid_dir() / "forwarder.pid") - log.info("agent auto-spawned forwarder pid=%d master=%s", pid, DECNET_SWARM_MASTER_HOST) - console.print(f"[dim]Auto-spawned forwarder (pid {pid}) → {DECNET_SWARM_MASTER_HOST}.[/]") - except Exception as e: # noqa: BLE001 - log.warning("agent could not auto-spawn forwarder: %s", e) - console.print(f"[yellow]forwarder auto-spawn skipped: {e}[/]") - elif not no_forwarder: - log.info("agent skipping forwarder auto-spawn (DECNET_SWARM_MASTER_HOST unset)") - - log.info("agent command invoked host=%s port=%d dir=%s", host, port, resolved_dir) - console.print(f"[green]Starting DECNET worker agent on {host}:{port} (mTLS)...[/]") - rc = _agent_server.run(host, port, agent_dir=resolved_dir) - if rc != 0: - raise typer.Exit(rc) - - -@app.command() -def updater( - port: int = typer.Option(8766, "--port", help="Port for the self-updater daemon"), - host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the updater"), # nosec B104 - updater_dir: Optional[str] = typer.Option(None, "--updater-dir", help="Updater cert bundle dir (default: ~/.decnet/updater)"), - install_dir: Optional[str] = typer.Option(None, "--install-dir", help="Release install root (default: /opt/decnet)"), - agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker agent cert bundle (for local /health probes; default: ~/.decnet/agent)"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), -) -> None: - """Run the DECNET self-updater (requires a bundle in ~/.decnet/updater/).""" - import pathlib as _pathlib - from decnet.swarm import pki as _pki - from decnet.updater import server as _upd_server - - resolved_updater = _pathlib.Path(updater_dir) if updater_dir else _upd_server.DEFAULT_UPDATER_DIR - resolved_install = _pathlib.Path(install_dir) if install_dir else _pathlib.Path("/opt/decnet") - resolved_agent = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR - - if daemon: - log.info("updater daemonizing host=%s port=%d", host, port) - _daemonize() - - log.info( - "updater command invoked host=%s port=%d updater_dir=%s install_dir=%s", - host, port, resolved_updater, resolved_install, - ) - console.print(f"[green]Starting DECNET self-updater on {host}:{port} (mTLS)...[/]") - rc = _upd_server.run( - host, port, - updater_dir=resolved_updater, - install_dir=resolved_install, - agent_dir=resolved_agent, - ) - if rc != 0: - raise typer.Exit(rc) - - -@app.command() -def listener( - bind_host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the master syslog-TLS listener"), # nosec B104 - bind_port: int = typer.Option(6514, "--port", help="Listener TCP port (RFC 5425 default 6514)"), - log_path: Optional[str] = typer.Option(None, "--log-path", help="RFC 5424 forensic sink (default: ./master.log)"), - json_path: Optional[str] = typer.Option(None, "--json-path", help="Parsed-JSON ingest sink (default: ./master.json)"), - ca_dir: Optional[str] = typer.Option(None, "--ca-dir", help="DECNET CA dir (default: ~/.decnet/ca)"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), -) -> None: - """Run the master-side syslog-over-TLS listener (RFC 5425, mTLS).""" - import asyncio - import pathlib - from decnet.swarm import pki - from decnet.swarm.log_listener import ListenerConfig, run_listener - - resolved_ca_dir = pathlib.Path(ca_dir) if ca_dir else pki.DEFAULT_CA_DIR - resolved_log = pathlib.Path(log_path) if log_path else pathlib.Path("master.log") - resolved_json = pathlib.Path(json_path) if json_path else pathlib.Path("master.json") - - cfg = ListenerConfig( - log_path=resolved_log, json_path=resolved_json, - bind_host=bind_host, bind_port=bind_port, ca_dir=resolved_ca_dir, - ) - - if daemon: - log.info("listener daemonizing host=%s port=%d", bind_host, bind_port) - _daemonize() - - log.info("listener command invoked host=%s port=%d", bind_host, bind_port) - console.print(f"[green]Starting DECNET log listener on {bind_host}:{bind_port} (mTLS)...[/]") - - async def _main() -> None: - stop = asyncio.Event() - loop = asyncio.get_running_loop() - for sig in (signal.SIGTERM, signal.SIGINT): - try: - loop.add_signal_handler(sig, stop.set) - except (NotImplementedError, RuntimeError): # pragma: no cover - pass - await run_listener(cfg, stop_event=stop) - - try: - asyncio.run(_main()) - except KeyboardInterrupt: - pass - - -@app.command() -def forwarder( - master_host: Optional[str] = typer.Option(None, "--master-host", help="Master listener hostname/IP (default: $DECNET_SWARM_MASTER_HOST)"), - master_port: int = typer.Option(6514, "--master-port", help="Master listener TCP port (RFC 5425 default 6514)"), - log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Local RFC 5424 file to tail and forward"), - agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent)"), - state_db: Optional[str] = typer.Option(None, "--state-db", help="Forwarder offset SQLite path (default: /forwarder.db)"), - poll_interval: float = typer.Option(0.5, "--poll-interval", help="Seconds between log file stat checks"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), -) -> None: - """Run the worker-side syslog-over-TLS forwarder (RFC 5425, mTLS to master:6514).""" - import asyncio - import pathlib - from decnet.env import DECNET_SWARM_MASTER_HOST - from decnet.swarm import pki - from decnet.swarm.log_forwarder import ForwarderConfig, run_forwarder - - resolved_host = master_host or DECNET_SWARM_MASTER_HOST - if not resolved_host: - console.print("[red]--master-host is required (or set DECNET_SWARM_MASTER_HOST).[/]") - raise typer.Exit(2) - - resolved_agent_dir = pathlib.Path(agent_dir) if agent_dir else pki.DEFAULT_AGENT_DIR - if not (resolved_agent_dir / "worker.crt").exists(): - console.print(f"[red]No worker cert bundle at {resolved_agent_dir} — enroll from the master first.[/]") - raise typer.Exit(2) - - if not log_file: - console.print("[red]--log-file is required.[/]") - raise typer.Exit(2) - - cfg = ForwarderConfig( - log_path=pathlib.Path(log_file), - master_host=resolved_host, - master_port=master_port, - agent_dir=resolved_agent_dir, - state_db=pathlib.Path(state_db) if state_db else None, - ) - - if daemon: - log.info("forwarder daemonizing master=%s:%d log=%s", resolved_host, master_port, log_file) - _daemonize() - - log.info("forwarder command invoked master=%s:%d log=%s", resolved_host, master_port, log_file) - console.print(f"[green]Starting DECNET forwarder → {resolved_host}:{master_port} (mTLS)...[/]") - - async def _main() -> None: - stop = asyncio.Event() - loop = asyncio.get_running_loop() - for sig in (signal.SIGTERM, signal.SIGINT): - try: - loop.add_signal_handler(sig, stop.set) - except (NotImplementedError, RuntimeError): # pragma: no cover - pass - await run_forwarder(cfg, poll_interval=poll_interval, stop_event=stop) - - try: - asyncio.run(_main()) - except KeyboardInterrupt: - pass - - -# --------------------------------------------------------------------------- -# `decnet swarm ...` — master-side operator commands (HTTP to local swarmctl) -# --------------------------------------------------------------------------- - -swarm_app = typer.Typer( - name="swarm", - help="Manage swarm workers (enroll, list, decommission). Requires `decnet swarmctl` running.", - no_args_is_help=True, -) -app.add_typer(swarm_app, name="swarm") - - -_DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770" - - -def _swarmctl_base_url(url: Optional[str]) -> str: - import os as _os - return url or _os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL) - - -def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0): - """Tiny sync wrapper around httpx; avoids leaking async into the CLI.""" - import httpx - try: - resp = httpx.request(method, url, json=json_body, timeout=timeout) - except httpx.HTTPError as exc: - console.print(f"[red]Could not reach swarm controller at {url}: {exc}[/]") - console.print("[dim]Is `decnet swarmctl` running?[/]") - raise typer.Exit(2) - if resp.status_code >= 400: - try: - detail = resp.json().get("detail", resp.text) - except Exception: # nosec B110 - detail = resp.text - console.print(f"[red]{method} {url} failed: {resp.status_code} — {detail}[/]") - raise typer.Exit(1) - return resp - - -def _deploy_swarm(config: "DecnetConfig", *, dry_run: bool, no_cache: bool) -> None: - """Shard deckies round-robin across enrolled workers and POST to swarmctl.""" - base = _swarmctl_base_url(None) - resp = _http_request("GET", base + "/swarm/hosts?host_status=enrolled") - enrolled = resp.json() - resp2 = _http_request("GET", base + "/swarm/hosts?host_status=active") - active = resp2.json() - # Treat enrolled+active workers as dispatch targets. - workers = [*enrolled, *active] - if not workers: - console.print("[red]No enrolled workers — run `decnet swarm enroll ...` first.[/]") - raise typer.Exit(1) - - # Round-robin assign deckies to workers by host_uuid (mutate the config's - # decky entries in-place — DecnetConfig is a pydantic model so we use - # model_copy on each decky). - assigned: list = [] - for idx, d in enumerate(config.deckies): - target = workers[idx % len(workers)] - assigned.append(d.model_copy(update={"host_uuid": target["uuid"]})) - config = config.model_copy(update={"deckies": assigned}) - - body = {"config": config.model_dump(mode="json"), "dry_run": dry_run, "no_cache": no_cache} - console.print(f"[cyan]Dispatching {len(config.deckies)} deckies across {len(workers)} worker(s)...[/]") - # Swarm deploy can be slow (image pulls on each worker) — give it plenty. - resp3 = _http_request("POST", base + "/swarm/deploy", json_body=body, timeout=900.0) - results = resp3.json().get("results", []) - - table = Table(title="SWARM deploy results") - for col in ("worker", "host_uuid", "ok", "detail"): - table.add_column(col) - any_failed = False - for r in results: - ok = bool(r.get("ok")) - if not ok: - any_failed = True - detail = r.get("detail") - if isinstance(detail, dict): - detail = detail.get("status") or "ok" - table.add_row( - str(r.get("host_name") or ""), - str(r.get("host_uuid") or ""), - "[green]yes[/]" if ok else "[red]no[/]", - str(detail)[:80], - ) - console.print(table) - if any_failed: - raise typer.Exit(1) - - -@swarm_app.command("enroll") -def swarm_enroll( - name: str = typer.Option(..., "--name", help="Short hostname for the worker (also the cert CN)"), - address: str = typer.Option(..., "--address", help="IP or DNS the master uses to reach the worker"), - agent_port: int = typer.Option(8765, "--agent-port", help="Worker agent TCP port"), - sans: Optional[str] = typer.Option(None, "--sans", help="Comma-separated extra SANs for the worker cert"), - notes: Optional[str] = typer.Option(None, "--notes", help="Free-form operator notes"), - out_dir: Optional[str] = typer.Option(None, "--out-dir", help="Write the bundle (ca.crt/worker.crt/worker.key) to this dir for scp"), - updater: bool = typer.Option(False, "--updater", help="Also issue an updater-identity cert (CN=updater@) for the remote self-updater"), - url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL (default: 127.0.0.1:8770)"), -) -> None: - """Issue a mTLS bundle for a new worker and register it in the swarm.""" - import pathlib as _pathlib - - body: dict = {"name": name, "address": address, "agent_port": agent_port} - if sans: - body["sans"] = [s.strip() for s in sans.split(",") if s.strip()] - if notes: - body["notes"] = notes - if updater: - body["issue_updater_bundle"] = True - - resp = _http_request("POST", _swarmctl_base_url(url) + "/swarm/enroll", json_body=body) - data = resp.json() - - console.print(f"[green]Enrolled worker:[/] {data['name']} " - f"[dim]uuid=[/]{data['host_uuid']} " - f"[dim]fingerprint=[/]{data['fingerprint']}") - if data.get("updater"): - console.print(f"[green] + updater identity[/] " - f"[dim]fingerprint=[/]{data['updater']['fingerprint']}") - - if out_dir: - target = _pathlib.Path(out_dir).expanduser() - target.mkdir(parents=True, exist_ok=True) - (target / "ca.crt").write_text(data["ca_cert_pem"]) - (target / "worker.crt").write_text(data["worker_cert_pem"]) - (target / "worker.key").write_text(data["worker_key_pem"]) - for leaf in ("worker.key",): - try: - (target / leaf).chmod(0o600) - except OSError: - pass - console.print(f"[cyan]Agent bundle written to[/] {target}") - - if data.get("updater"): - upd_target = target.parent / f"{target.name}-updater" - upd_target.mkdir(parents=True, exist_ok=True) - (upd_target / "ca.crt").write_text(data["ca_cert_pem"]) - (upd_target / "updater.crt").write_text(data["updater"]["updater_cert_pem"]) - (upd_target / "updater.key").write_text(data["updater"]["updater_key_pem"]) - try: - (upd_target / "updater.key").chmod(0o600) - except OSError: - pass - console.print(f"[cyan]Updater bundle written to[/] {upd_target}") - console.print("[dim]Ship the agent dir to ~/.decnet/agent/ and the updater dir to ~/.decnet/updater/ on the worker.[/]") - else: - console.print("[dim]Ship this directory to the worker at ~/.decnet/agent/ (or wherever `decnet agent --agent-dir` points).[/]") - else: - console.print("[yellow]No --out-dir given — bundle PEMs are in the JSON response; persist them before leaving this shell.[/]") - - -@swarm_app.command("list") -def swarm_list( - host_status: Optional[str] = typer.Option(None, "--status", help="Filter by status (enrolled|active|unreachable|decommissioned)"), - url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"), -) -> None: - """List enrolled workers.""" - q = f"?host_status={host_status}" if host_status else "" - resp = _http_request("GET", _swarmctl_base_url(url) + "/swarm/hosts" + q) - rows = resp.json() - if not rows: - console.print("[dim]No workers enrolled.[/]") - return - table = Table(title="DECNET swarm workers") - for col in ("name", "address", "port", "status", "last heartbeat", "enrolled"): - table.add_column(col) - for r in rows: - table.add_row( - r.get("name") or "", - r.get("address") or "", - str(r.get("agent_port") or ""), - r.get("status") or "", - str(r.get("last_heartbeat") or "—"), - str(r.get("enrolled_at") or "—"), - ) - console.print(table) - - -@swarm_app.command("check") -def swarm_check( - url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"), - json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"), -) -> None: - """Actively probe every enrolled worker and refresh status + last_heartbeat.""" - resp = _http_request("POST", _swarmctl_base_url(url) + "/swarm/check", timeout=60.0) - payload = resp.json() - results = payload.get("results", []) - - if json_out: - console.print_json(data=payload) - return - - if not results: - console.print("[dim]No workers enrolled.[/]") - return - - table = Table(title="DECNET swarm check") - for col in ("name", "address", "reachable", "detail"): - table.add_column(col) - for r in results: - reachable = r.get("reachable") - mark = "[green]yes[/]" if reachable else "[red]no[/]" - detail = r.get("detail") - detail_str = "—" - if isinstance(detail, dict): - detail_str = detail.get("status") or ", ".join(f"{k}={v}" for k, v in detail.items()) - elif detail is not None: - detail_str = str(detail) - table.add_row( - r.get("name") or "", - r.get("address") or "", - mark, - detail_str, - ) - console.print(table) - - -@swarm_app.command("update") -def swarm_update( - host: Optional[str] = typer.Option(None, "--host", help="Target worker (name or UUID). Omit with --all."), - all_hosts: bool = typer.Option(False, "--all", help="Push to every enrolled worker."), - include_self: bool = typer.Option(False, "--include-self", help="Also push to each updater's /update-self after a successful agent update."), - root: Optional[str] = typer.Option(None, "--root", help="Source tree to tar (default: CWD)."), - exclude: list[str] = typer.Option([], "--exclude", help="Additional exclude glob. Repeatable."), - updater_port: int = typer.Option(8766, "--updater-port", help="Port the workers' updater listens on."), - dry_run: bool = typer.Option(False, "--dry-run", help="Build the tarball and print stats; no network."), - url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL."), -) -> None: - """Push the current working tree to workers' self-updaters (with auto-rollback on failure).""" - import asyncio - import pathlib as _pathlib - - from decnet.swarm.tar_tree import tar_working_tree, detect_git_sha - from decnet.swarm.updater_client import UpdaterClient - - if not (host or all_hosts): - console.print("[red]Supply --host or --all.[/]") - raise typer.Exit(2) - if host and all_hosts: - console.print("[red]--host and --all are mutually exclusive.[/]") - raise typer.Exit(2) - - base = _swarmctl_base_url(url) - resp = _http_request("GET", base + "/swarm/hosts") - rows = resp.json() - if host: - targets = [r for r in rows if r.get("name") == host or r.get("uuid") == host] - if not targets: - console.print(f"[red]No enrolled worker matching '{host}'.[/]") - raise typer.Exit(1) - else: - targets = [r for r in rows if r.get("status") != "decommissioned"] - if not targets: - console.print("[dim]No targets.[/]") - return - - tree_root = _pathlib.Path(root) if root else _pathlib.Path.cwd() - sha = detect_git_sha(tree_root) - console.print(f"[dim]Tarring[/] {tree_root} [dim]sha={sha or '(not a git repo)'}[/]") - tarball = tar_working_tree(tree_root, extra_excludes=exclude) - console.print(f"[dim]Tarball size:[/] {len(tarball):,} bytes") - - if dry_run: - console.print("[yellow]--dry-run: not pushing.[/]") - for t in targets: - console.print(f" would push to [cyan]{t.get('name')}[/] at {t.get('address')}:{updater_port}") - return - - async def _push_one(h: dict) -> dict: - name = h.get("name") or h.get("uuid") - out: dict = {"name": name, "address": h.get("address"), "agent": None, "self": None} - try: - async with UpdaterClient(h, updater_port=updater_port) as u: - r = await u.update(tarball, sha=sha) - out["agent"] = {"status": r.status_code, "body": r.json() if r.content else {}} - if r.status_code == 200 and include_self: - # Agent first, updater second — see plan. - rs = await u.update_self(tarball, sha=sha) - # Connection-drop is expected for update-self. - out["self"] = {"status": rs.status_code, "body": rs.json() if rs.content else {}} - except Exception as exc: # noqa: BLE001 - out["error"] = f"{type(exc).__name__}: {exc}" - return out - - async def _push_all() -> list[dict]: - return await asyncio.gather(*(_push_one(t) for t in targets)) - - results = asyncio.run(_push_all()) - - table = Table(title="DECNET swarm update") - for col in ("host", "address", "agent", "self", "detail"): - table.add_column(col) - any_failure = False - for r in results: - agent = r.get("agent") or {} - selff = r.get("self") or {} - err = r.get("error") - if err: - any_failure = True - table.add_row(r["name"], r.get("address") or "", "[red]error[/]", "—", err) - continue - a_status = agent.get("status") - if a_status == 200: - agent_cell = "[green]updated[/]" - elif a_status == 409: - agent_cell = "[yellow]rolled-back[/]" - any_failure = True - else: - agent_cell = f"[red]{a_status}[/]" - any_failure = True - if not include_self: - self_cell = "—" - elif selff.get("status") == 200 or selff.get("status") is None: - self_cell = "[green]ok[/]" if selff else "[dim]skipped[/]" - else: - self_cell = f"[red]{selff.get('status')}[/]" - detail = "" - body = agent.get("body") or {} - if isinstance(body, dict): - detail = body.get("release", {}).get("sha") or body.get("detail", {}).get("error") or "" - table.add_row(r["name"], r.get("address") or "", agent_cell, self_cell, str(detail)[:80]) - console.print(table) - - if any_failure: - raise typer.Exit(1) - - -@swarm_app.command("deckies") -def swarm_deckies( - host: Optional[str] = typer.Option(None, "--host", help="Filter by worker name or UUID"), - state: Optional[str] = typer.Option(None, "--state", help="Filter by shard state (pending|running|failed|torn_down)"), - url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"), - json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"), -) -> None: - """List deployed deckies across the swarm with their owning worker host.""" - base = _swarmctl_base_url(url) - - host_uuid: Optional[str] = None - if host: - resp = _http_request("GET", base + "/swarm/hosts") - rows = resp.json() - match = next((r for r in rows if r.get("uuid") == host or r.get("name") == host), None) - if match is None: - console.print(f"[red]No enrolled worker matching '{host}'.[/]") - raise typer.Exit(1) - host_uuid = match["uuid"] - - query = [] - if host_uuid: - query.append(f"host_uuid={host_uuid}") - if state: - query.append(f"state={state}") - path = "/swarm/deckies" + ("?" + "&".join(query) if query else "") - - resp = _http_request("GET", base + path) - rows = resp.json() - - if json_out: - console.print_json(data=rows) - return - - if not rows: - console.print("[dim]No deckies deployed.[/]") - return - - table = Table(title="DECNET swarm deckies") - for col in ("decky", "host", "address", "state", "services"): - table.add_column(col) - for r in rows: - services = ",".join(r.get("services") or []) or "—" - state_val = r.get("state") or "pending" - colored = { - "running": f"[green]{state_val}[/]", - "failed": f"[red]{state_val}[/]", - "pending": f"[yellow]{state_val}[/]", - "torn_down": f"[dim]{state_val}[/]", - }.get(state_val, state_val) - table.add_row( - r.get("decky_name") or "", - r.get("host_name") or "", - r.get("host_address") or "", - colored, - services, - ) - console.print(table) - - -@swarm_app.command("decommission") -def swarm_decommission( - name: Optional[str] = typer.Option(None, "--name", help="Worker hostname"), - uuid: Optional[str] = typer.Option(None, "--uuid", help="Worker UUID (skip lookup)"), - url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"), - yes: bool = typer.Option(False, "--yes", "-y", help="Skip interactive confirmation"), -) -> None: - """Remove a worker from the swarm (cascades decky shard rows).""" - if not (name or uuid): - console.print("[red]Supply --name or --uuid.[/]") - raise typer.Exit(2) - - base = _swarmctl_base_url(url) - target_uuid = uuid - target_name = name - if target_uuid is None: - resp = _http_request("GET", base + "/swarm/hosts") - rows = resp.json() - match = next((r for r in rows if r.get("name") == name), None) - if match is None: - console.print(f"[red]No enrolled worker named '{name}'.[/]") - raise typer.Exit(1) - target_uuid = match["uuid"] - target_name = match.get("name") or target_name - - if not yes: - confirm = typer.confirm(f"Decommission worker {target_name!r} ({target_uuid})?", default=False) - if not confirm: - console.print("[dim]Aborted.[/]") - raise typer.Exit(0) - - _http_request("DELETE", f"{base}/swarm/hosts/{target_uuid}") - console.print(f"[green]Decommissioned {target_name or target_uuid}.[/]") - - -@app.command() -def deploy( - mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"), - deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1), - interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"), - subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"), - ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"), - services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"), - randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"), - distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"), - randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"), - log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"), - archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"), - mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"), - dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"), - no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"), - parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"), - ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"), - config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"), - api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"), - api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"), - daemon: bool = typer.Option(False, "--daemon", help="Detach to background as a daemon process"), -) -> None: - """Deploy deckies to the LAN.""" - import os - - _require_master_mode("deploy") - if daemon: - log.info("deploy daemonizing mode=%s deckies=%s", mode, deckies) - _daemonize() - - log.info("deploy command invoked mode=%s deckies=%s dry_run=%s", mode, deckies, dry_run) - if mode not in ("unihost", "swarm"): - console.print("[red]--mode must be 'unihost' or 'swarm'[/]") - raise typer.Exit(1) - - # ------------------------------------------------------------------ # - # Config-file path # - # ------------------------------------------------------------------ # - if config_file: - try: - ini = load_ini(config_file) - except FileNotFoundError as e: - console.print(f"[red]{e}[/]") - raise typer.Exit(1) - - iface = interface or ini.interface or detect_interface() - subnet_cidr = subnet or ini.subnet - effective_gateway = ini.gateway - if subnet_cidr is None: - subnet_cidr, effective_gateway = detect_subnet(iface) - elif effective_gateway is None: - _, effective_gateway = detect_subnet(iface) - - host_ip = get_host_ip(iface) - console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} " - f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} " - f"[dim]Host IP:[/] {host_ip}") - - if ini.custom_services: - from decnet.custom_service import CustomService - from decnet.services.registry import register_custom_service - for cs in ini.custom_services: - register_custom_service( - CustomService( - name=cs.name, - image=cs.image, - exec_cmd=cs.exec_cmd, - ports=cs.ports, - ) - ) - - effective_log_file = log_file - try: - decky_configs = build_deckies_from_ini( - ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval - ) - except ValueError as e: - console.print(f"[red]{e}[/]") - raise typer.Exit(1) - # ------------------------------------------------------------------ # - # Classic CLI path # - # ------------------------------------------------------------------ # - else: - if deckies is None: - console.print("[red]--deckies is required when --config is not used.[/]") - raise typer.Exit(1) - - services_list = [s.strip() for s in services.split(",")] if services else None - if services_list: - known = set(all_service_names()) - unknown = [s for s in services_list if s not in known] - if unknown: - console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]") - raise typer.Exit(1) - - arch: Archetype | None = None - if archetype_name: - try: - arch = get_archetype(archetype_name) - except ValueError as e: - console.print(f"[red]{e}[/]") - raise typer.Exit(1) - - if not services_list and not randomize_services and not arch: - console.print("[red]Specify --services, --archetype, or --randomize-services.[/]") - raise typer.Exit(1) - - iface = interface or detect_interface() - if subnet is None: - subnet_cidr, effective_gateway = detect_subnet(iface) - else: - subnet_cidr = subnet - _, effective_gateway = detect_subnet(iface) - - host_ip = get_host_ip(iface) - console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} " - f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}") - - distros_list = [d.strip() for d in distro.split(",")] if distro else None - if distros_list: - try: - for slug in distros_list: - get_distro(slug) - except ValueError as e: - console.print(f"[red]{e}[/]") - raise typer.Exit(1) - - ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start) - decky_configs = build_deckies( - deckies, ips, services_list, randomize_services, - distros_explicit=distros_list, randomize_distros=randomize_distros, - archetype=arch, mutate_interval=mutate_interval, - ) - effective_log_file = log_file - - if api and not effective_log_file: - effective_log_file = os.path.join(os.getcwd(), "decnet.log") - console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]") - - config = DecnetConfig( - mode=mode, - interface=iface, - subnet=subnet_cidr, - gateway=effective_gateway, - deckies=decky_configs, - log_file=effective_log_file, - ipvlan=ipvlan, - mutate_interval=mutate_interval, - ) - - log.debug("deploy: config built deckies=%d interface=%s subnet=%s", len(config.deckies), config.interface, config.subnet) - - if mode == "swarm": - _deploy_swarm(config, dry_run=dry_run, no_cache=no_cache) - if dry_run: - log.info("deploy: swarm dry-run complete, no workers dispatched") - else: - log.info("deploy: swarm deployment complete deckies=%d", len(config.deckies)) - return - - from decnet.engine import deploy as _deploy - _deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel) - if dry_run: - log.info("deploy: dry-run complete, no containers started") - else: - log.info("deploy: deployment complete deckies=%d", len(config.deckies)) - - if mutate_interval is not None and not dry_run: - import subprocess # nosec B404 - import sys - console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]") - try: - subprocess.Popen( # nosec B603 - [sys.executable, "-m", "decnet.cli", "mutate", "--watch"], - stdout=subprocess.DEVNULL, - stderr=subprocess.STDOUT, - start_new_session=True, - ) - except (FileNotFoundError, subprocess.SubprocessError): - console.print("[red]Failed to start mutator watcher.[/]") - - if effective_log_file and not dry_run and not api: - import subprocess # nosec B404 - import sys - from pathlib import Path as _Path - _collector_err = _Path(effective_log_file).with_suffix(".collector.log") - console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}") - subprocess.Popen( # nosec B603 - [sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)], - stdin=subprocess.DEVNULL, - stdout=open(_collector_err, "a"), - stderr=subprocess.STDOUT, - start_new_session=True, - ) - - if api and not dry_run: - import subprocess # nosec B404 - import sys - console.print(f"[green]Starting DECNET API on port {api_port}...[/]") - _env: dict[str, str] = os.environ.copy() - _env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "") - try: - subprocess.Popen( # nosec B603 - [sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)], - env=_env, - stdout=subprocess.DEVNULL, - stderr=subprocess.STDOUT - ) - console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]") - except (FileNotFoundError, subprocess.SubprocessError): - console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]") - - if effective_log_file and not dry_run: - import subprocess # nosec B404 - import sys - console.print("[bold cyan]Starting DECNET-PROBER[/] (auto-discovers attackers from log stream)") - try: - _prober_args = [ - sys.executable, "-m", "decnet.cli", "probe", - "--daemon", - "--log-file", str(effective_log_file), - ] - subprocess.Popen( # nosec B603 - _prober_args, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.STDOUT, - start_new_session=True, - ) - except (FileNotFoundError, subprocess.SubprocessError): - console.print("[red]Failed to start DECNET-PROBER.[/]") - - if effective_log_file and not dry_run: - import subprocess # nosec B404 - import sys - console.print("[bold cyan]Starting DECNET-PROFILER[/] (builds attacker profiles from log stream)") - try: - subprocess.Popen( # nosec B603 - [sys.executable, "-m", "decnet.cli", "profiler", "--daemon"], - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.STDOUT, - start_new_session=True, - ) - except (FileNotFoundError, subprocess.SubprocessError): - console.print("[red]Failed to start DECNET-PROFILER.[/]") - - if effective_log_file and not dry_run: - import subprocess # nosec B404 - import sys - console.print("[bold cyan]Starting DECNET-SNIFFER[/] (passive network capture)") - try: - subprocess.Popen( # nosec B603 - [sys.executable, "-m", "decnet.cli", "sniffer", - "--daemon", - "--log-file", str(effective_log_file)], - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.STDOUT, - start_new_session=True, - ) - except (FileNotFoundError, subprocess.SubprocessError): - console.print("[red]Failed to start DECNET-SNIFFER.[/]") - - -def _is_running(match_fn) -> int | None: - """Return PID of a running DECNET process matching ``match_fn(cmdline)``, or None.""" - import psutil - - for proc in psutil.process_iter(["pid", "cmdline"]): - try: - cmd = proc.info["cmdline"] - if cmd and match_fn(cmd): - return proc.info["pid"] - except (psutil.NoSuchProcess, psutil.AccessDenied): - continue - return None - - -# Each entry: (display_name, detection_fn, launch_args_fn) -# launch_args_fn receives log_file and returns the Popen argv list. -def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]: - """Return the microservice registry for health-check and relaunch.""" - import sys - - _py = sys.executable - - # On agents these run as systemd units invoking /usr/local/bin/decnet, - # which doesn't include "decnet.cli" in its cmdline. On master dev boxes - # they're launched via `python -m decnet.cli`. Match either form — cmd - # is a list of argv tokens, so substring-check each token. - def _matches(sub: str, extras: tuple[str, ...] = ()): - def _check(cmd) -> bool: - joined = " ".join(cmd) if not isinstance(cmd, str) else cmd - if "decnet" not in joined: - return False - if sub not in joined: - return False - return all(e in joined for e in extras) - return _check - - return [ - ( - "Collector", - _matches("collect"), - [_py, "-m", "decnet.cli", "collect", "--daemon", "--log-file", log_file], - ), - ( - "Mutator", - _matches("mutate", ("--watch",)), - [_py, "-m", "decnet.cli", "mutate", "--daemon", "--watch"], - ), - ( - "Prober", - _matches("probe"), - [_py, "-m", "decnet.cli", "probe", "--daemon", "--log-file", log_file], - ), - ( - "Profiler", - _matches("profiler"), - [_py, "-m", "decnet.cli", "profiler", "--daemon"], - ), - ( - "Sniffer", - _matches("sniffer"), - [_py, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", log_file], - ), - ( - "API", - lambda cmd: "uvicorn" in cmd and "decnet.web.api:app" in cmd, - [_py, "-m", "uvicorn", "decnet.web.api:app", - "--host", DECNET_API_HOST, "--port", str(DECNET_API_PORT)], - ), - ] - - -@app.command() -def redeploy( - log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to the DECNET log file"), -) -> None: - """Check running DECNET services and relaunch any that are down.""" - import subprocess # nosec B404 - - log.info("redeploy: checking services") - registry = _service_registry(str(log_file)) - - table = Table(title="DECNET Services", show_lines=True) - table.add_column("Service", style="bold cyan") - table.add_column("Status") - table.add_column("PID", style="dim") - table.add_column("Action") - - relaunched = 0 - for name, match_fn, launch_args in registry: - pid = _is_running(match_fn) - if pid is not None: - table.add_row(name, "[green]UP[/]", str(pid), "—") - else: - try: - subprocess.Popen( # nosec B603 - launch_args, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.STDOUT, - start_new_session=True, - ) - table.add_row(name, "[red]DOWN[/]", "—", "[green]relaunched[/]") - relaunched += 1 - except (FileNotFoundError, subprocess.SubprocessError) as exc: - table.add_row(name, "[red]DOWN[/]", "—", f"[red]failed: {exc}[/]") - - console.print(table) - if relaunched: - console.print(f"[green]{relaunched} service(s) relaunched.[/]") - else: - console.print("[green]All services running.[/]") - - -@app.command() -def probe( - log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path for RFC 5424 syslog + .json output (reads attackers from .json, writes results to both)"), - interval: int = typer.Option(300, "--interval", "-i", help="Seconds between probe cycles (default: 300)"), - timeout: float = typer.Option(5.0, "--timeout", help="Per-probe TCP timeout in seconds"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background (used by deploy, no console output)"), -) -> None: - """Fingerprint attackers (JARM + HASSH + TCP/IP stack) discovered in the log stream.""" - import asyncio - from decnet.prober import prober_worker - - if daemon: - log.info("probe daemonizing log_file=%s interval=%d", log_file, interval) - _daemonize() - asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout)) - return - - else: - log.info("probe command invoked log_file=%s interval=%d", log_file, interval) - console.print(f"[bold cyan]DECNET-PROBER[/] watching {log_file} for attackers (interval: {interval}s)") - console.print("[dim]Press Ctrl+C to stop[/]") - try: - asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout)) - except KeyboardInterrupt: - console.print("\n[yellow]DECNET-PROBER stopped.[/]") - - -@app.command() -def collect( - log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), -) -> None: - """Stream Docker logs from all running decky service containers to a log file.""" - import asyncio - from decnet.collector import log_collector_worker - - if daemon: - log.info("collect daemonizing log_file=%s", log_file) - _daemonize() - - log.info("collect command invoked log_file=%s", log_file) - console.print(f"[bold cyan]Collector starting[/] → {log_file}") - asyncio.run(log_collector_worker(log_file)) - - -@app.command() -def mutate( - watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"), - decky_name: Optional[str] = typer.Option(None, "--decky", help="Force mutate a specific decky immediately"), - force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), -) -> None: - """Manually trigger or continuously watch for decky mutation.""" - import asyncio - from decnet.mutator import mutate_decky, mutate_all, run_watch_loop - from decnet.web.dependencies import repo - - if daemon: - log.info("mutate daemonizing watch=%s", watch) - _daemonize() - - async def _run() -> None: - await repo.initialize() - if watch: - await run_watch_loop(repo) - elif decky_name: - await mutate_decky(decky_name, repo) - elif force_all: - await mutate_all(force=True, repo=repo) - else: - await mutate_all(force=False, repo=repo) - - asyncio.run(_run()) - - -@app.command() -def status() -> None: - """Show running deckies and their status.""" - log.info("status command invoked") - from decnet.engine import status as _status - _status() - - registry = _service_registry(str(DECNET_INGEST_LOG_FILE)) - # On agents, Mutator + Profiler are master-only (they need the master - # DB and orchestrate across the swarm), and the API is never shipped. - # Hide those rows so operators aren't chasing permanent DOWN entries. - if _agent_mode_active(): - registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}] - svc_table = Table(title="DECNET Services", show_lines=True) - svc_table.add_column("Service", style="bold cyan") - svc_table.add_column("Status") - svc_table.add_column("PID", style="dim") - - for name, match_fn, _launch_args in registry: - pid = _is_running(match_fn) - if pid is not None: - svc_table.add_row(name, "[green]UP[/]", str(pid)) - else: - svc_table.add_row(name, "[red]DOWN[/]", "—") - - console.print(svc_table) - - -@app.command() -def teardown( - all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"), - id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"), -) -> None: - """Stop and remove deckies.""" - _require_master_mode("teardown") - if not all_ and not id_: - console.print("[red]Specify --all or --id .[/]") - raise typer.Exit(1) - - log.info("teardown command invoked all=%s id=%s", all_, id_) - from decnet.engine import teardown as _teardown - _teardown(decky_id=id_) - log.info("teardown complete all=%s id=%s", all_, id_) - - if all_: - _kill_all_services() - - -@app.command(name="services") -def list_services() -> None: - """List all registered honeypot service plugins.""" - svcs = all_services() - table = Table(title="Available Services", show_lines=True) - table.add_column("Name", style="bold cyan") - table.add_column("Ports") - table.add_column("Image") - for name, svc in sorted(svcs.items()): - table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image) - console.print(table) - - -@app.command(name="distros") -def list_distros() -> None: - """List all available OS distro profiles for deckies.""" - table = Table(title="Available Distro Profiles", show_lines=True) - table.add_column("Slug", style="bold cyan") - table.add_column("Display Name") - table.add_column("Docker Image", style="dim") - for slug, profile in sorted(all_distros().items()): - table.add_row(slug, profile.display_name, profile.image) - console.print(table) - - -@app.command(name="correlate") -def correlate( - log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"), - min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"), - output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"), - emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), -) -> None: - """Analyse logs for cross-decky traversals and print the attacker movement graph.""" - import sys - import json as _json - from pathlib import Path - from decnet.correlation.engine import CorrelationEngine - - if daemon: - log.info("correlate daemonizing log_file=%s", log_file) - _daemonize() - - engine = CorrelationEngine() - - if log_file: - path = Path(log_file) - if not path.exists(): - console.print(f"[red]Log file not found: {log_file}[/]") - raise typer.Exit(1) - engine.ingest_file(path) - elif not sys.stdin.isatty(): - for line in sys.stdin: - engine.ingest(line) - else: - console.print("[red]Provide --log-file or pipe log data via stdin.[/]") - raise typer.Exit(1) - - traversals = engine.traversals(min_deckies) - - if output == "json": - console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2)) - elif output == "syslog": - for line in engine.traversal_syslog_lines(min_deckies): - typer.echo(line) - else: - if not traversals: - console.print( - f"[yellow]No traversals detected " - f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]" - ) - else: - console.print(engine.report_table(min_deckies)) - console.print( - f"[dim]Parsed {engine.lines_parsed} lines · " - f"indexed {engine.events_indexed} events · " - f"{len(engine.all_attackers())} unique IPs · " - f"[bold]{len(traversals)}[/] traversal(s)[/]" - ) - - if emit_syslog: - for line in engine.traversal_syslog_lines(min_deckies): - typer.echo(line) - - -@app.command(name="archetypes") -def list_archetypes() -> None: - """List all machine archetype profiles.""" - table = Table(title="Machine Archetypes", show_lines=True) - table.add_column("Slug", style="bold cyan") - table.add_column("Display Name") - table.add_column("Default Services", style="green") - table.add_column("Description", style="dim") - for slug, arch in sorted(all_archetypes().items()): - table.add_row( - slug, - arch.display_name, - ", ".join(arch.services), - arch.description, - ) - console.print(table) - - -@app.command(name="web") -def serve_web( - web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"), - host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"), - api_port: int = typer.Option(DECNET_API_PORT, "--api-port", help="Port the DECNET API is listening on"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), -) -> None: - """Serve the DECNET Web Dashboard frontend. - - Proxies /api/* requests to the API server so the frontend can use - relative URLs (/api/v1/...) with no CORS configuration required. - """ - import http.client - import http.server - import socketserver - from pathlib import Path - - dist_dir = Path(__file__).parent.parent / "decnet_web" / "dist" - - if not dist_dir.exists(): - console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]") - raise typer.Exit(1) - - if daemon: - log.info("web daemonizing host=%s port=%d api_port=%d", host, web_port, api_port) - _daemonize() - - _api_port = api_port - - class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): - def do_GET(self): - if self.path.startswith("/api/"): - self._proxy("GET") - return - path = self.translate_path(self.path) - if not Path(path).exists() or Path(path).is_dir(): - self.path = "/index.html" - return super().do_GET() - - def do_POST(self): - if self.path.startswith("/api/"): - self._proxy("POST") - return - self.send_error(405) - - def do_PUT(self): - if self.path.startswith("/api/"): - self._proxy("PUT") - return - self.send_error(405) - - def do_DELETE(self): - if self.path.startswith("/api/"): - self._proxy("DELETE") - return - self.send_error(405) - - def _proxy(self, method: str) -> None: - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length) if content_length else None - - forward = {k: v for k, v in self.headers.items() - if k.lower() not in ("host", "connection")} - - try: - conn = http.client.HTTPConnection("127.0.0.1", _api_port, timeout=120) - conn.request(method, self.path, body=body, headers=forward) - resp = conn.getresponse() - - self.send_response(resp.status) - for key, val in resp.getheaders(): - if key.lower() not in ("connection", "transfer-encoding"): - self.send_header(key, val) - self.end_headers() - - # Disable socket timeout for SSE streams — they are - # long-lived by design and the 120s timeout would kill them. - content_type = resp.getheader("Content-Type", "") - if "text/event-stream" in content_type: - conn.sock.settimeout(None) - - # read1() returns bytes immediately available in the buffer - # without blocking for more. Plain read(4096) waits until - # 4096 bytes accumulate — fatal for SSE where each event - # is only ~100-500 bytes. - _read = getattr(resp, "read1", resp.read) - while True: - chunk = _read(4096) - if not chunk: - break - self.wfile.write(chunk) - self.wfile.flush() - except Exception as exc: - log.warning("web proxy error %s %s: %s", method, self.path, exc) - self.send_error(502, f"API proxy error: {exc}") - finally: - try: - conn.close() - except Exception: # nosec B110 — best-effort conn cleanup - pass - - def log_message(self, fmt: str, *args: object) -> None: - log.debug("web %s", fmt % args) - - import os - os.chdir(dist_dir) - - socketserver.TCPServer.allow_reuse_address = True - with socketserver.ThreadingTCPServer((host, web_port), SPAHTTPRequestHandler) as httpd: - console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]") - console.print(f"[dim]Proxying /api/* → http://127.0.0.1:{_api_port}[/]") - try: - httpd.serve_forever() - except KeyboardInterrupt: - console.print("\n[dim]Shutting down dashboard server.[/]") - -@app.command(name="profiler") -def profiler_cmd( - interval: int = typer.Option(30, "--interval", "-i", help="Seconds between profile rebuild cycles"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), -) -> None: - """Run the attacker profiler as a standalone microservice.""" - import asyncio - from decnet.profiler import attacker_profile_worker - from decnet.web.dependencies import repo - - if daemon: - log.info("profiler daemonizing interval=%d", interval) - _daemonize() - - log.info("profiler starting interval=%d", interval) - console.print(f"[bold cyan]Profiler starting[/] (interval: {interval}s)") - - async def _run() -> None: - await repo.initialize() - await attacker_profile_worker(repo, interval=interval) - - try: - asyncio.run(_run()) - except KeyboardInterrupt: - console.print("\n[yellow]Profiler stopped.[/]") - - -@app.command(name="sniffer") -def sniffer_cmd( - log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write captured syslog + JSON records"), - daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), -) -> None: - """Run the network sniffer as a standalone microservice.""" - import asyncio - from decnet.sniffer import sniffer_worker - - if daemon: - log.info("sniffer daemonizing log_file=%s", log_file) - _daemonize() - - log.info("sniffer starting log_file=%s", log_file) - console.print(f"[bold cyan]Sniffer starting[/] → {log_file}") - - try: - asyncio.run(sniffer_worker(log_file)) - except KeyboardInterrupt: - console.print("\n[yellow]Sniffer stopped.[/]") - - -_DB_RESET_TABLES: tuple[str, ...] = ( - # Order matters for DROP TABLE: child FKs first. - # - attacker_behavior FK-references attackers. - # - decky_shards FK-references swarm_hosts. - "attacker_behavior", - "attackers", - "logs", - "bounty", - "state", - "users", - "decky_shards", - "swarm_hosts", -) - - -async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None: - """Inspect + (optionally) wipe a MySQL database. Pulled out of the CLI - wrapper so tests can drive it without spawning a Typer runner.""" - from urllib.parse import urlparse - from sqlalchemy import text - from sqlalchemy.ext.asyncio import create_async_engine - - db_name = urlparse(dsn).path.lstrip("/") or "(default)" - engine = create_async_engine(dsn) - try: - # Collect current row counts per table. Missing tables yield -1. - rows: dict[str, int] = {} - async with engine.connect() as conn: - for tbl in _DB_RESET_TABLES: - try: - result = await conn.execute(text(f"SELECT COUNT(*) FROM `{tbl}`")) # nosec B608 - rows[tbl] = result.scalar() or 0 - except Exception: # noqa: BLE001 — ProgrammingError for missing table varies by driver - rows[tbl] = -1 - - summary = Table(title=f"DECNET MySQL reset — database `{db_name}` (mode={mode})") - summary.add_column("Table", style="cyan") - summary.add_column("Rows", justify="right") - for tbl, count in rows.items(): - summary.add_row(tbl, "[dim]missing[/]" if count < 0 else f"{count:,}") - console.print(summary) - - if not confirm: - console.print( - "[yellow]Dry-run only. Re-run with [bold]--i-know-what-im-doing[/] " - "to actually execute.[/]" - ) - return - - # Destructive phase. FK checks off so TRUNCATE/DROP works in any order. - async with engine.begin() as conn: - await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0")) - for tbl in _DB_RESET_TABLES: - if rows.get(tbl, -1) < 0: - continue # skip absent tables silently - if mode == "truncate": - await conn.execute(text(f"TRUNCATE TABLE `{tbl}`")) - console.print(f"[green]✓ TRUNCATE {tbl}[/]") - else: # drop-tables - await conn.execute(text(f"DROP TABLE `{tbl}`")) - console.print(f"[green]✓ DROP TABLE {tbl}[/]") - await conn.execute(text("SET FOREIGN_KEY_CHECKS = 1")) - - console.print(f"[bold green]Done. Database `{db_name}` reset ({mode}).[/]") - finally: - await engine.dispose() - - -@app.command(name="db-reset") -def db_reset( - i_know: bool = typer.Option( - False, - "--i-know-what-im-doing", - help="Required to actually execute. Without it, the command runs in dry-run mode.", - ), - mode: str = typer.Option( - "truncate", - "--mode", - help="truncate (wipe rows, keep schema) | drop-tables (DROP TABLE for each DECNET table)", - ), - url: Optional[str] = typer.Option( - None, - "--url", - help="Override DECNET_DB_URL for this invocation (e.g. when cleanup needs admin creds).", - ), -) -> None: - """Wipe the MySQL database used by the DECNET dashboard. - - Destructive. Runs dry by default — pass --i-know-what-im-doing to commit. - Only supported against MySQL; refuses to operate on SQLite. - """ - import asyncio - import os - - if mode not in ("truncate", "drop-tables"): - console.print(f"[red]Invalid --mode '{mode}'. Expected: truncate | drop-tables.[/]") - raise typer.Exit(2) - - db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower() - if db_type != "mysql": - console.print( - f"[red]db-reset is MySQL-only (DECNET_DB_TYPE='{db_type}'). " - f"For SQLite, just delete the decnet.db file.[/]" - ) - raise typer.Exit(2) - - dsn = url or os.environ.get("DECNET_DB_URL") - if not dsn: - # Fall back to component env vars (DECNET_DB_HOST/PORT/NAME/USER/PASSWORD). - from decnet.web.db.mysql.database import build_mysql_url - try: - dsn = build_mysql_url() - except ValueError as e: - console.print(f"[red]{e}[/]") - raise typer.Exit(2) from e - - log.info("db-reset invoked mode=%s confirm=%s", mode, i_know) - try: - asyncio.run(_db_reset_mysql_async(dsn, mode=mode, confirm=i_know)) - except Exception as e: # noqa: BLE001 - console.print(f"[red]db-reset failed: {e}[/]") - raise typer.Exit(1) from e - - -# ─────────────────────────────────────────────────────────────────────────── -# Role-based CLI gating. -# -# MAINTAINERS: when you add a new Typer command (or add_typer group) that is -# master-only, register its name in MASTER_ONLY_COMMANDS / MASTER_ONLY_GROUPS -# below. The gate is the only thing that: -# (a) hides the command from `decnet --help` on worker hosts, and -# (b) prevents a misconfigured worker from invoking master-side logic. -# Forgetting to register a new command is a role-boundary bug. Grep for -# MASTER_ONLY when touching command registration. -# -# Worker-legitimate commands (NOT in these sets): agent, updater, forwarder, -# status, collect, probe, sniffer. Agents run deckies locally and should be -# able to inspect them + run the per-host microservices (collector streams -# container logs, prober characterizes attackers hitting this host, sniffer -# captures traffic). Mutator and Profiler stay master-only: the mutator -# orchestrates respawns across the swarm; the profiler rebuilds attacker -# profiles against the master DB (no per-host DB exists). -# ─────────────────────────────────────────────────────────────────────────── -MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({ - "api", "swarmctl", "deploy", "redeploy", "teardown", - "mutate", "listener", "profiler", - "services", "distros", "correlate", "archetypes", "web", - "db-reset", -}) -MASTER_ONLY_GROUPS: frozenset[str] = frozenset({"swarm"}) - - -def _agent_mode_active() -> bool: - """True when the host is configured as an agent AND master commands are - disallowed (the default for agents). Workers overriding this explicitly - set DECNET_DISALLOW_MASTER=false to opt into hybrid use.""" - import os - mode = os.environ.get("DECNET_MODE", "master").lower() - disallow = os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true" - return mode == "agent" and disallow - - -def _require_master_mode(command_name: str) -> None: - """Defence-in-depth: called at the top of every master-only command body. - - The registration-time gate in _gate_commands_by_mode() already hides - these commands from Typer's dispatch table, but this check protects - against direct function imports (e.g. from tests or third-party tools) - that would bypass Typer entirely.""" - if _agent_mode_active(): - console.print( - f"[red]`decnet {command_name}` is a master-only command; this host " - f"is configured as an agent (DECNET_MODE=agent).[/]" - ) - raise typer.Exit(1) - - -def _gate_commands_by_mode(_app: typer.Typer) -> None: - if not _agent_mode_active(): - return - _app.registered_commands = [ - c for c in _app.registered_commands - if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS - ] - _app.registered_groups = [ - g for g in _app.registered_groups - if g.name not in MASTER_ONLY_GROUPS - ] - - -_gate_commands_by_mode(app) - - -if __name__ == '__main__': # pragma: no cover - app() diff --git a/decnet/cli/__init__.py b/decnet/cli/__init__.py new file mode 100644 index 0000000..e2976c8 --- /dev/null +++ b/decnet/cli/__init__.py @@ -0,0 +1,80 @@ +""" +DECNET CLI — entry point for all commands. + +Usage: + decnet deploy --mode unihost --deckies 5 --randomize-services + decnet status + decnet teardown [--all | --id decky-01] + decnet services + +Layout: each command module exports ``register(app)`` which attaches its +commands to the passed Typer app. ``__init__.py`` builds the root app, +calls every module's ``register`` in order, then runs the master-only +gate. The gate must fire LAST so it sees the fully-populated dispatch +table before filtering. +""" + +from __future__ import annotations + +import typer + +from . import ( + agent, + api, + db, + deploy, + forwarder, + inventory, + lifecycle, + listener, + profiler, + sniffer, + swarm, + swarmctl, + updater, + web, + workers, +) +from .gating import _gate_commands_by_mode +from .utils import console as console, log as log + +app = typer.Typer( + name="decnet", + help="Deploy a deception network of honeypot deckies on your LAN.", + no_args_is_help=True, +) + +# Order matches the old flat layout so `decnet --help` reads the same. +for _mod in ( + api, swarmctl, agent, updater, listener, forwarder, + swarm, + deploy, lifecycle, workers, inventory, + web, profiler, sniffer, db, +): + _mod.register(app) + +_gate_commands_by_mode(app) + +# Backwards-compat re-exports. Tests and third-party tooling import these +# directly from ``decnet.cli``; the refactor must keep them resolvable. +from .db import _db_reset_mysql_async # noqa: E402,F401 +from .gating import ( # noqa: E402,F401 + MASTER_ONLY_COMMANDS, + MASTER_ONLY_GROUPS, + _agent_mode_active, + _require_master_mode, +) +from .utils import ( # noqa: E402,F401 + _daemonize, + _http_request, + _is_running, + _kill_all_services, + _pid_dir, + _service_registry, + _spawn_detached, + _swarmctl_base_url, +) + + +if __name__ == "__main__": # pragma: no cover + app() diff --git a/decnet/cli/agent.py b/decnet/cli/agent.py new file mode 100644 index 0000000..ae89a46 --- /dev/null +++ b/decnet/cli/agent.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import os +import pathlib as _pathlib +import sys as _sys +from typing import Optional + +import typer + +from . import utils as _utils +from .utils import console, log + + +def register(app: typer.Typer) -> None: + @app.command() + def agent( + port: int = typer.Option(8765, "--port", help="Port for the worker agent"), + host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the worker agent"), # nosec B104 + agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent, expanded under the running user's HOME — set this when running as sudo/root)"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + no_forwarder: bool = typer.Option(False, "--no-forwarder", help="Do not auto-spawn the log forwarder alongside the agent"), + ) -> None: + """Run the DECNET SWARM worker agent (requires a cert bundle in ~/.decnet/agent/). + + By default, `decnet agent` auto-spawns `decnet forwarder` as a fully- + detached sibling process so worker logs start flowing to the master + without a second manual invocation. The forwarder survives agent + restarts and crashes — if it dies on its own, restart it manually + with `decnet forwarder --daemon …`. Pass --no-forwarder to skip. + """ + from decnet.agent import server as _agent_server + from decnet.env import DECNET_SWARM_MASTER_HOST, DECNET_INGEST_LOG_FILE + from decnet.swarm import pki as _pki + + resolved_dir = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR + + if daemon: + log.info("agent daemonizing host=%s port=%d", host, port) + _utils._daemonize() + + if not no_forwarder and DECNET_SWARM_MASTER_HOST: + fw_argv = [ + _sys.executable, "-m", "decnet", "forwarder", + "--master-host", DECNET_SWARM_MASTER_HOST, + "--master-port", str(int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))), + "--agent-dir", str(resolved_dir), + "--log-file", str(DECNET_INGEST_LOG_FILE), + "--daemon", + ] + try: + pid = _utils._spawn_detached(fw_argv, _utils._pid_dir() / "forwarder.pid") + log.info("agent auto-spawned forwarder pid=%d master=%s", pid, DECNET_SWARM_MASTER_HOST) + console.print(f"[dim]Auto-spawned forwarder (pid {pid}) → {DECNET_SWARM_MASTER_HOST}.[/]") + except Exception as e: # noqa: BLE001 + log.warning("agent could not auto-spawn forwarder: %s", e) + console.print(f"[yellow]forwarder auto-spawn skipped: {e}[/]") + elif not no_forwarder: + log.info("agent skipping forwarder auto-spawn (DECNET_SWARM_MASTER_HOST unset)") + + log.info("agent command invoked host=%s port=%d dir=%s", host, port, resolved_dir) + console.print(f"[green]Starting DECNET worker agent on {host}:{port} (mTLS)...[/]") + rc = _agent_server.run(host, port, agent_dir=resolved_dir) + if rc != 0: + raise typer.Exit(rc) diff --git a/decnet/cli/api.py b/decnet/cli/api.py new file mode 100644 index 0000000..80f88cd --- /dev/null +++ b/decnet/cli/api.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import os +import signal +import subprocess # nosec B404 +import sys + +import typer + +from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE + +from . import utils as _utils +from .gating import _require_master_mode +from .utils import console, log + + +def register(app: typer.Typer) -> None: + @app.command() + def api( + port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"), + host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"), + log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + workers: int = typer.Option(1, "--workers", "-w", min=1, help="Number of uvicorn worker processes"), + ) -> None: + """Run the DECNET API and Web Dashboard in standalone mode.""" + _require_master_mode("api") + if daemon: + log.info("API daemonizing host=%s port=%d workers=%d", host, port, workers) + _utils._daemonize() + + log.info("API command invoked host=%s port=%d workers=%d", host, port, workers) + console.print(f"[green]Starting DECNET API on {host}:{port} (workers={workers})...[/]") + _env: dict[str, str] = os.environ.copy() + _env["DECNET_INGEST_LOG_FILE"] = str(log_file) + _cmd = [sys.executable, "-m", "uvicorn", "decnet.web.api:app", + "--host", host, "--port", str(port), "--workers", str(workers)] + try: + proc = subprocess.Popen(_cmd, env=_env, start_new_session=True) # nosec B603 B404 + try: + proc.wait() + except KeyboardInterrupt: + try: + os.killpg(proc.pid, signal.SIGTERM) + try: + proc.wait(timeout=10) + except subprocess.TimeoutExpired: + os.killpg(proc.pid, signal.SIGKILL) + proc.wait() + except ProcessLookupError: + pass + except (FileNotFoundError, subprocess.SubprocessError): + console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]") diff --git a/decnet/cli/db.py b/decnet/cli/db.py new file mode 100644 index 0000000..73c9430 --- /dev/null +++ b/decnet/cli/db.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +from typing import Optional + +import typer +from rich.table import Table + +from .utils import console, log + + +_DB_RESET_TABLES: tuple[str, ...] = ( + # Order matters for DROP TABLE: child FKs first. + # - attacker_behavior FK-references attackers. + # - decky_shards FK-references swarm_hosts. + "attacker_behavior", + "attackers", + "logs", + "bounty", + "state", + "users", + "decky_shards", + "swarm_hosts", +) + + +async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None: + """Inspect + (optionally) wipe a MySQL database. Pulled out of the CLI + wrapper so tests can drive it without spawning a Typer runner.""" + from urllib.parse import urlparse + from sqlalchemy import text + from sqlalchemy.ext.asyncio import create_async_engine + + db_name = urlparse(dsn).path.lstrip("/") or "(default)" + engine = create_async_engine(dsn) + try: + rows: dict[str, int] = {} + async with engine.connect() as conn: + for tbl in _DB_RESET_TABLES: + try: + result = await conn.execute(text(f"SELECT COUNT(*) FROM `{tbl}`")) # nosec B608 + rows[tbl] = result.scalar() or 0 + except Exception: # noqa: BLE001 — ProgrammingError for missing table varies by driver + rows[tbl] = -1 + + summary = Table(title=f"DECNET MySQL reset — database `{db_name}` (mode={mode})") + summary.add_column("Table", style="cyan") + summary.add_column("Rows", justify="right") + for tbl, count in rows.items(): + summary.add_row(tbl, "[dim]missing[/]" if count < 0 else f"{count:,}") + console.print(summary) + + if not confirm: + console.print( + "[yellow]Dry-run only. Re-run with [bold]--i-know-what-im-doing[/] " + "to actually execute.[/]" + ) + return + + async with engine.begin() as conn: + await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0")) + for tbl in _DB_RESET_TABLES: + if rows.get(tbl, -1) < 0: + continue + if mode == "truncate": + await conn.execute(text(f"TRUNCATE TABLE `{tbl}`")) + console.print(f"[green]✓ TRUNCATE {tbl}[/]") + else: + await conn.execute(text(f"DROP TABLE `{tbl}`")) + console.print(f"[green]✓ DROP TABLE {tbl}[/]") + await conn.execute(text("SET FOREIGN_KEY_CHECKS = 1")) + + console.print(f"[bold green]Done. Database `{db_name}` reset ({mode}).[/]") + finally: + await engine.dispose() + + +def register(app: typer.Typer) -> None: + @app.command(name="db-reset") + def db_reset( + i_know: bool = typer.Option( + False, + "--i-know-what-im-doing", + help="Required to actually execute. Without it, the command runs in dry-run mode.", + ), + mode: str = typer.Option( + "truncate", + "--mode", + help="truncate (wipe rows, keep schema) | drop-tables (DROP TABLE for each DECNET table)", + ), + url: Optional[str] = typer.Option( + None, + "--url", + help="Override DECNET_DB_URL for this invocation (e.g. when cleanup needs admin creds).", + ), + ) -> None: + """Wipe the MySQL database used by the DECNET dashboard. + + Destructive. Runs dry by default — pass --i-know-what-im-doing to commit. + Only supported against MySQL; refuses to operate on SQLite. + """ + import asyncio + import os + + if mode not in ("truncate", "drop-tables"): + console.print(f"[red]Invalid --mode '{mode}'. Expected: truncate | drop-tables.[/]") + raise typer.Exit(2) + + db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower() + if db_type != "mysql": + console.print( + f"[red]db-reset is MySQL-only (DECNET_DB_TYPE='{db_type}'). " + f"For SQLite, just delete the decnet.db file.[/]" + ) + raise typer.Exit(2) + + dsn = url or os.environ.get("DECNET_DB_URL") + if not dsn: + from decnet.web.db.mysql.database import build_mysql_url + try: + dsn = build_mysql_url() + except ValueError as e: + console.print(f"[red]{e}[/]") + raise typer.Exit(2) from e + + log.info("db-reset invoked mode=%s confirm=%s", mode, i_know) + try: + asyncio.run(_db_reset_mysql_async(dsn, mode=mode, confirm=i_know)) + except Exception as e: # noqa: BLE001 + console.print(f"[red]db-reset failed: {e}[/]") + raise typer.Exit(1) from e diff --git a/decnet/cli/deploy.py b/decnet/cli/deploy.py new file mode 100644 index 0000000..1ec48b0 --- /dev/null +++ b/decnet/cli/deploy.py @@ -0,0 +1,307 @@ +from __future__ import annotations + +from typing import Optional + +import typer +from rich.table import Table + +from decnet.archetypes import Archetype, get_archetype +from decnet.config import DecnetConfig +from decnet.distros import get_distro +from decnet.env import DECNET_API_HOST, DECNET_INGEST_LOG_FILE +from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini +from decnet.ini_loader import load_ini +from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip + +from . import utils as _utils +from .gating import _require_master_mode +from .utils import console, log + + +def _deploy_swarm(config: "DecnetConfig", *, dry_run: bool, no_cache: bool) -> None: + """Shard deckies round-robin across enrolled workers and POST to swarmctl.""" + base = _utils._swarmctl_base_url(None) + resp = _utils._http_request("GET", base + "/swarm/hosts?host_status=enrolled") + enrolled = resp.json() + resp2 = _utils._http_request("GET", base + "/swarm/hosts?host_status=active") + active = resp2.json() + workers = [*enrolled, *active] + if not workers: + console.print("[red]No enrolled workers — run `decnet swarm enroll ...` first.[/]") + raise typer.Exit(1) + + assigned: list = [] + for idx, d in enumerate(config.deckies): + target = workers[idx % len(workers)] + assigned.append(d.model_copy(update={"host_uuid": target["uuid"]})) + config = config.model_copy(update={"deckies": assigned}) + + body = {"config": config.model_dump(mode="json"), "dry_run": dry_run, "no_cache": no_cache} + console.print(f"[cyan]Dispatching {len(config.deckies)} deckies across {len(workers)} worker(s)...[/]") + resp3 = _utils._http_request("POST", base + "/swarm/deploy", json_body=body, timeout=900.0) + results = resp3.json().get("results", []) + + table = Table(title="SWARM deploy results") + for col in ("worker", "host_uuid", "ok", "detail"): + table.add_column(col) + any_failed = False + for r in results: + ok = bool(r.get("ok")) + if not ok: + any_failed = True + detail = r.get("detail") + if isinstance(detail, dict): + detail = detail.get("status") or "ok" + table.add_row( + str(r.get("host_name") or ""), + str(r.get("host_uuid") or ""), + "[green]yes[/]" if ok else "[red]no[/]", + str(detail)[:80], + ) + console.print(table) + if any_failed: + raise typer.Exit(1) + + +def register(app: typer.Typer) -> None: + @app.command() + def deploy( + mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"), + deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1), + interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"), + subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"), + ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"), + services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"), + randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"), + distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"), + randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"), + log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"), + archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"), + mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"), + dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"), + no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"), + parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"), + ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"), + config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"), + api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"), + api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"), + daemon: bool = typer.Option(False, "--daemon", help="Detach to background as a daemon process"), + ) -> None: + """Deploy deckies to the LAN.""" + import os + import subprocess # nosec B404 + import sys + from pathlib import Path as _Path + + _require_master_mode("deploy") + if daemon: + log.info("deploy daemonizing mode=%s deckies=%s", mode, deckies) + _utils._daemonize() + + log.info("deploy command invoked mode=%s deckies=%s dry_run=%s", mode, deckies, dry_run) + if mode not in ("unihost", "swarm"): + console.print("[red]--mode must be 'unihost' or 'swarm'[/]") + raise typer.Exit(1) + + if config_file: + try: + ini = load_ini(config_file) + except FileNotFoundError as e: + console.print(f"[red]{e}[/]") + raise typer.Exit(1) + + iface = interface or ini.interface or detect_interface() + subnet_cidr = subnet or ini.subnet + effective_gateway = ini.gateway + if subnet_cidr is None: + subnet_cidr, effective_gateway = detect_subnet(iface) + elif effective_gateway is None: + _, effective_gateway = detect_subnet(iface) + + host_ip = get_host_ip(iface) + console.print(f"[dim]Config:[/] {config_file} [dim]Interface:[/] {iface} " + f"[dim]Subnet:[/] {subnet_cidr} [dim]Gateway:[/] {effective_gateway} " + f"[dim]Host IP:[/] {host_ip}") + + if ini.custom_services: + from decnet.custom_service import CustomService + from decnet.services.registry import register_custom_service + for cs in ini.custom_services: + register_custom_service( + CustomService( + name=cs.name, + image=cs.image, + exec_cmd=cs.exec_cmd, + ports=cs.ports, + ) + ) + + effective_log_file = log_file + try: + decky_configs = build_deckies_from_ini( + ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval + ) + except ValueError as e: + console.print(f"[red]{e}[/]") + raise typer.Exit(1) + else: + if deckies is None: + console.print("[red]--deckies is required when --config is not used.[/]") + raise typer.Exit(1) + + services_list = [s.strip() for s in services.split(",")] if services else None + if services_list: + known = set(all_service_names()) + unknown = [s for s in services_list if s not in known] + if unknown: + console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]") + raise typer.Exit(1) + + arch: Archetype | None = None + if archetype_name: + try: + arch = get_archetype(archetype_name) + except ValueError as e: + console.print(f"[red]{e}[/]") + raise typer.Exit(1) + + if not services_list and not randomize_services and not arch: + console.print("[red]Specify --services, --archetype, or --randomize-services.[/]") + raise typer.Exit(1) + + iface = interface or detect_interface() + if subnet is None: + subnet_cidr, effective_gateway = detect_subnet(iface) + else: + subnet_cidr = subnet + _, effective_gateway = detect_subnet(iface) + + host_ip = get_host_ip(iface) + console.print(f"[dim]Interface:[/] {iface} [dim]Subnet:[/] {subnet_cidr} " + f"[dim]Gateway:[/] {effective_gateway} [dim]Host IP:[/] {host_ip}") + + distros_list = [d.strip() for d in distro.split(",")] if distro else None + if distros_list: + try: + for slug in distros_list: + get_distro(slug) + except ValueError as e: + console.print(f"[red]{e}[/]") + raise typer.Exit(1) + + ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start) + decky_configs = build_deckies( + deckies, ips, services_list, randomize_services, + distros_explicit=distros_list, randomize_distros=randomize_distros, + archetype=arch, mutate_interval=mutate_interval, + ) + effective_log_file = log_file + + if api and not effective_log_file: + effective_log_file = os.path.join(os.getcwd(), "decnet.log") + console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]") + + config = DecnetConfig( + mode=mode, + interface=iface, + subnet=subnet_cidr, + gateway=effective_gateway, + deckies=decky_configs, + log_file=effective_log_file, + ipvlan=ipvlan, + mutate_interval=mutate_interval, + ) + + log.debug("deploy: config built deckies=%d interface=%s subnet=%s", len(config.deckies), config.interface, config.subnet) + + if mode == "swarm": + _deploy_swarm(config, dry_run=dry_run, no_cache=no_cache) + if dry_run: + log.info("deploy: swarm dry-run complete, no workers dispatched") + else: + log.info("deploy: swarm deployment complete deckies=%d", len(config.deckies)) + return + + from decnet.engine import deploy as _deploy + _deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel) + if dry_run: + log.info("deploy: dry-run complete, no containers started") + else: + log.info("deploy: deployment complete deckies=%d", len(config.deckies)) + + if mutate_interval is not None and not dry_run: + console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]") + try: + subprocess.Popen( # nosec B603 + [sys.executable, "-m", "decnet.cli", "mutate", "--watch"], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + start_new_session=True, + ) + except (FileNotFoundError, subprocess.SubprocessError): + console.print("[red]Failed to start mutator watcher.[/]") + + if effective_log_file and not dry_run and not api: + _collector_err = _Path(effective_log_file).with_suffix(".collector.log") + console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}") + subprocess.Popen( # nosec B603 + [sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)], + stdin=subprocess.DEVNULL, + stdout=open(_collector_err, "a"), + stderr=subprocess.STDOUT, + start_new_session=True, + ) + + if api and not dry_run: + console.print(f"[green]Starting DECNET API on port {api_port}...[/]") + _env: dict[str, str] = os.environ.copy() + _env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "") + try: + subprocess.Popen( # nosec B603 + [sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)], + env=_env, + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT + ) + console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]") + except (FileNotFoundError, subprocess.SubprocessError): + console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]") + + if effective_log_file and not dry_run: + console.print("[bold cyan]Starting DECNET-PROBER[/] (auto-discovers attackers from log stream)") + try: + subprocess.Popen( # nosec B603 + [sys.executable, "-m", "decnet.cli", "probe", "--daemon", "--log-file", str(effective_log_file)], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + start_new_session=True, + ) + except (FileNotFoundError, subprocess.SubprocessError): + console.print("[red]Failed to start DECNET-PROBER.[/]") + + if effective_log_file and not dry_run: + console.print("[bold cyan]Starting DECNET-PROFILER[/] (builds attacker profiles from log stream)") + try: + subprocess.Popen( # nosec B603 + [sys.executable, "-m", "decnet.cli", "profiler", "--daemon"], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + start_new_session=True, + ) + except (FileNotFoundError, subprocess.SubprocessError): + console.print("[red]Failed to start DECNET-PROFILER.[/]") + + if effective_log_file and not dry_run: + console.print("[bold cyan]Starting DECNET-SNIFFER[/] (passive network capture)") + try: + subprocess.Popen( # nosec B603 + [sys.executable, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", str(effective_log_file)], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + start_new_session=True, + ) + except (FileNotFoundError, subprocess.SubprocessError): + console.print("[red]Failed to start DECNET-SNIFFER.[/]") diff --git a/decnet/cli/forwarder.py b/decnet/cli/forwarder.py new file mode 100644 index 0000000..b736fd8 --- /dev/null +++ b/decnet/cli/forwarder.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import asyncio +import pathlib +import signal +from typing import Optional + +import typer + +from decnet.env import DECNET_INGEST_LOG_FILE + +from . import utils as _utils +from .utils import console, log + + +def register(app: typer.Typer) -> None: + @app.command() + def forwarder( + master_host: Optional[str] = typer.Option(None, "--master-host", help="Master listener hostname/IP (default: $DECNET_SWARM_MASTER_HOST)"), + master_port: int = typer.Option(6514, "--master-port", help="Master listener TCP port (RFC 5425 default 6514)"), + log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Local RFC 5424 file to tail and forward"), + agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent)"), + state_db: Optional[str] = typer.Option(None, "--state-db", help="Forwarder offset SQLite path (default: /forwarder.db)"), + poll_interval: float = typer.Option(0.5, "--poll-interval", help="Seconds between log file stat checks"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + ) -> None: + """Run the worker-side syslog-over-TLS forwarder (RFC 5425, mTLS to master:6514).""" + from decnet.env import DECNET_SWARM_MASTER_HOST + from decnet.swarm import pki + from decnet.swarm.log_forwarder import ForwarderConfig, run_forwarder + + resolved_host = master_host or DECNET_SWARM_MASTER_HOST + if not resolved_host: + console.print("[red]--master-host is required (or set DECNET_SWARM_MASTER_HOST).[/]") + raise typer.Exit(2) + + resolved_agent_dir = pathlib.Path(agent_dir) if agent_dir else pki.DEFAULT_AGENT_DIR + if not (resolved_agent_dir / "worker.crt").exists(): + console.print(f"[red]No worker cert bundle at {resolved_agent_dir} — enroll from the master first.[/]") + raise typer.Exit(2) + + if not log_file: + console.print("[red]--log-file is required.[/]") + raise typer.Exit(2) + + cfg = ForwarderConfig( + log_path=pathlib.Path(log_file), + master_host=resolved_host, + master_port=master_port, + agent_dir=resolved_agent_dir, + state_db=pathlib.Path(state_db) if state_db else None, + ) + + if daemon: + log.info("forwarder daemonizing master=%s:%d log=%s", resolved_host, master_port, log_file) + _utils._daemonize() + + log.info("forwarder command invoked master=%s:%d log=%s", resolved_host, master_port, log_file) + console.print(f"[green]Starting DECNET forwarder → {resolved_host}:{master_port} (mTLS)...[/]") + + async def _main() -> None: + stop = asyncio.Event() + loop = asyncio.get_running_loop() + for sig in (signal.SIGTERM, signal.SIGINT): + try: + loop.add_signal_handler(sig, stop.set) + except (NotImplementedError, RuntimeError): # pragma: no cover + pass + await run_forwarder(cfg, poll_interval=poll_interval, stop_event=stop) + + try: + asyncio.run(_main()) + except KeyboardInterrupt: + pass diff --git a/decnet/cli/gating.py b/decnet/cli/gating.py new file mode 100644 index 0000000..5c15352 --- /dev/null +++ b/decnet/cli/gating.py @@ -0,0 +1,71 @@ +"""Role-based CLI gating. + +MAINTAINERS: when you add a new Typer command (or add_typer group) that is +master-only, register its name in MASTER_ONLY_COMMANDS / MASTER_ONLY_GROUPS +below. The gate is the only thing that: + (a) hides the command from `decnet --help` on worker hosts, and + (b) prevents a misconfigured worker from invoking master-side logic. +Forgetting to register a new command is a role-boundary bug. Grep for +MASTER_ONLY when touching command registration. + +Worker-legitimate commands (NOT in these sets): agent, updater, forwarder, +status, collect, probe, sniffer. Agents run deckies locally and should be +able to inspect them + run the per-host microservices (collector streams +container logs, prober characterizes attackers hitting this host, sniffer +captures traffic). Mutator and Profiler stay master-only: the mutator +orchestrates respawns across the swarm; the profiler rebuilds attacker +profiles against the master DB (no per-host DB exists). +""" + +from __future__ import annotations + +import os + +import typer + +from .utils import console + +MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({ + "api", "swarmctl", "deploy", "redeploy", "teardown", + "mutate", "listener", "profiler", + "services", "distros", "correlate", "archetypes", "web", + "db-reset", +}) +MASTER_ONLY_GROUPS: frozenset[str] = frozenset({"swarm"}) + + +def _agent_mode_active() -> bool: + """True when the host is configured as an agent AND master commands are + disallowed (the default for agents). Workers overriding this explicitly + set DECNET_DISALLOW_MASTER=false to opt into hybrid use.""" + mode = os.environ.get("DECNET_MODE", "master").lower() + disallow = os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true" + return mode == "agent" and disallow + + +def _require_master_mode(command_name: str) -> None: + """Defence-in-depth: called at the top of every master-only command body. + + The registration-time gate in _gate_commands_by_mode() already hides + these commands from Typer's dispatch table, but this check protects + against direct function imports (e.g. from tests or third-party tools) + that would bypass Typer entirely.""" + if _agent_mode_active(): + console.print( + f"[red]`decnet {command_name}` is a master-only command; this host " + f"is configured as an agent (DECNET_MODE=agent).[/]" + ) + raise typer.Exit(1) + + +def _gate_commands_by_mode(_app: typer.Typer) -> None: + if not _agent_mode_active(): + return + _app.registered_commands = [ + c for c in _app.registered_commands + if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS + ] + _app.registered_groups = [ + g for g in _app.registered_groups + if g.name not in MASTER_ONLY_GROUPS + ] diff --git a/decnet/cli/inventory.py b/decnet/cli/inventory.py new file mode 100644 index 0000000..fb75c36 --- /dev/null +++ b/decnet/cli/inventory.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import typer +from rich.table import Table + +from decnet.archetypes import all_archetypes +from decnet.distros import all_distros +from decnet.services.registry import all_services + +from .utils import console + + +def register(app: typer.Typer) -> None: + @app.command(name="services") + def list_services() -> None: + """List all registered honeypot service plugins.""" + svcs = all_services() + table = Table(title="Available Services", show_lines=True) + table.add_column("Name", style="bold cyan") + table.add_column("Ports") + table.add_column("Image") + for name, svc in sorted(svcs.items()): + table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image) + console.print(table) + + @app.command(name="distros") + def list_distros() -> None: + """List all available OS distro profiles for deckies.""" + table = Table(title="Available Distro Profiles", show_lines=True) + table.add_column("Slug", style="bold cyan") + table.add_column("Display Name") + table.add_column("Docker Image", style="dim") + for slug, profile in sorted(all_distros().items()): + table.add_row(slug, profile.display_name, profile.image) + console.print(table) + + @app.command(name="archetypes") + def list_archetypes() -> None: + """List all machine archetype profiles.""" + table = Table(title="Machine Archetypes", show_lines=True) + table.add_column("Slug", style="bold cyan") + table.add_column("Display Name") + table.add_column("Default Services", style="green") + table.add_column("Description", style="dim") + for slug, arch in sorted(all_archetypes().items()): + table.add_row( + slug, + arch.display_name, + ", ".join(arch.services), + arch.description, + ) + console.print(table) diff --git a/decnet/cli/lifecycle.py b/decnet/cli/lifecycle.py new file mode 100644 index 0000000..0661729 --- /dev/null +++ b/decnet/cli/lifecycle.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +import subprocess # nosec B404 +from typing import Optional + +import typer +from rich.table import Table + +from decnet.env import DECNET_INGEST_LOG_FILE + +from . import utils as _utils +from .gating import _agent_mode_active, _require_master_mode +from .utils import console, log + + +def register(app: typer.Typer) -> None: + @app.command() + def redeploy( + log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to the DECNET log file"), + ) -> None: + """Check running DECNET services and relaunch any that are down.""" + log.info("redeploy: checking services") + registry = _utils._service_registry(str(log_file)) + + table = Table(title="DECNET Services", show_lines=True) + table.add_column("Service", style="bold cyan") + table.add_column("Status") + table.add_column("PID", style="dim") + table.add_column("Action") + + relaunched = 0 + for name, match_fn, launch_args in registry: + pid = _utils._is_running(match_fn) + if pid is not None: + table.add_row(name, "[green]UP[/]", str(pid), "—") + else: + try: + subprocess.Popen( # nosec B603 + launch_args, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + start_new_session=True, + ) + table.add_row(name, "[red]DOWN[/]", "—", "[green]relaunched[/]") + relaunched += 1 + except (FileNotFoundError, subprocess.SubprocessError) as exc: + table.add_row(name, "[red]DOWN[/]", "—", f"[red]failed: {exc}[/]") + + console.print(table) + if relaunched: + console.print(f"[green]{relaunched} service(s) relaunched.[/]") + else: + console.print("[green]All services running.[/]") + + @app.command() + def status() -> None: + """Show running deckies and their status.""" + log.info("status command invoked") + from decnet.engine import status as _status + _status() + + registry = _utils._service_registry(str(DECNET_INGEST_LOG_FILE)) + if _agent_mode_active(): + registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}] + svc_table = Table(title="DECNET Services", show_lines=True) + svc_table.add_column("Service", style="bold cyan") + svc_table.add_column("Status") + svc_table.add_column("PID", style="dim") + + for name, match_fn, _launch_args in registry: + pid = _utils._is_running(match_fn) + if pid is not None: + svc_table.add_row(name, "[green]UP[/]", str(pid)) + else: + svc_table.add_row(name, "[red]DOWN[/]", "—") + + console.print(svc_table) + + @app.command() + def teardown( + all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"), + id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"), + ) -> None: + """Stop and remove deckies.""" + _require_master_mode("teardown") + if not all_ and not id_: + console.print("[red]Specify --all or --id .[/]") + raise typer.Exit(1) + + log.info("teardown command invoked all=%s id=%s", all_, id_) + from decnet.engine import teardown as _teardown + _teardown(decky_id=id_) + log.info("teardown complete all=%s id=%s", all_, id_) + + if all_: + _utils._kill_all_services() diff --git a/decnet/cli/listener.py b/decnet/cli/listener.py new file mode 100644 index 0000000..d95a362 --- /dev/null +++ b/decnet/cli/listener.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +import asyncio +import pathlib +import signal +from typing import Optional + +import typer + +from . import utils as _utils +from .utils import console, log + + +def register(app: typer.Typer) -> None: + @app.command() + def listener( + bind_host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the master syslog-TLS listener"), # nosec B104 + bind_port: int = typer.Option(6514, "--port", help="Listener TCP port (RFC 5425 default 6514)"), + log_path: Optional[str] = typer.Option(None, "--log-path", help="RFC 5424 forensic sink (default: ./master.log)"), + json_path: Optional[str] = typer.Option(None, "--json-path", help="Parsed-JSON ingest sink (default: ./master.json)"), + ca_dir: Optional[str] = typer.Option(None, "--ca-dir", help="DECNET CA dir (default: ~/.decnet/ca)"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + ) -> None: + """Run the master-side syslog-over-TLS listener (RFC 5425, mTLS).""" + from decnet.swarm import pki + from decnet.swarm.log_listener import ListenerConfig, run_listener + + resolved_ca_dir = pathlib.Path(ca_dir) if ca_dir else pki.DEFAULT_CA_DIR + resolved_log = pathlib.Path(log_path) if log_path else pathlib.Path("master.log") + resolved_json = pathlib.Path(json_path) if json_path else pathlib.Path("master.json") + + cfg = ListenerConfig( + log_path=resolved_log, json_path=resolved_json, + bind_host=bind_host, bind_port=bind_port, ca_dir=resolved_ca_dir, + ) + + if daemon: + log.info("listener daemonizing host=%s port=%d", bind_host, bind_port) + _utils._daemonize() + + log.info("listener command invoked host=%s port=%d", bind_host, bind_port) + console.print(f"[green]Starting DECNET log listener on {bind_host}:{bind_port} (mTLS)...[/]") + + async def _main() -> None: + stop = asyncio.Event() + loop = asyncio.get_running_loop() + for sig in (signal.SIGTERM, signal.SIGINT): + try: + loop.add_signal_handler(sig, stop.set) + except (NotImplementedError, RuntimeError): # pragma: no cover + pass + await run_listener(cfg, stop_event=stop) + + try: + asyncio.run(_main()) + except KeyboardInterrupt: + pass diff --git a/decnet/cli/profiler.py b/decnet/cli/profiler.py new file mode 100644 index 0000000..c3c6269 --- /dev/null +++ b/decnet/cli/profiler.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import typer + +from . import utils as _utils +from .utils import console, log + + +def register(app: typer.Typer) -> None: + @app.command(name="profiler") + def profiler_cmd( + interval: int = typer.Option(30, "--interval", "-i", help="Seconds between profile rebuild cycles"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + ) -> None: + """Run the attacker profiler as a standalone microservice.""" + import asyncio + from decnet.profiler import attacker_profile_worker + from decnet.web.dependencies import repo + + if daemon: + log.info("profiler daemonizing interval=%d", interval) + _utils._daemonize() + + log.info("profiler starting interval=%d", interval) + console.print(f"[bold cyan]Profiler starting[/] (interval: {interval}s)") + + async def _run() -> None: + await repo.initialize() + await attacker_profile_worker(repo, interval=interval) + + try: + asyncio.run(_run()) + except KeyboardInterrupt: + console.print("\n[yellow]Profiler stopped.[/]") diff --git a/decnet/cli/sniffer.py b/decnet/cli/sniffer.py new file mode 100644 index 0000000..febd000 --- /dev/null +++ b/decnet/cli/sniffer.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +import typer + +from decnet.env import DECNET_INGEST_LOG_FILE + +from . import utils as _utils +from .utils import console, log + + +def register(app: typer.Typer) -> None: + @app.command(name="sniffer") + def sniffer_cmd( + log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write captured syslog + JSON records"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + ) -> None: + """Run the network sniffer as a standalone microservice.""" + import asyncio + from decnet.sniffer import sniffer_worker + + if daemon: + log.info("sniffer daemonizing log_file=%s", log_file) + _utils._daemonize() + + log.info("sniffer starting log_file=%s", log_file) + console.print(f"[bold cyan]Sniffer starting[/] → {log_file}") + + try: + asyncio.run(sniffer_worker(log_file)) + except KeyboardInterrupt: + console.print("\n[yellow]Sniffer stopped.[/]") diff --git a/decnet/cli/swarm.py b/decnet/cli/swarm.py new file mode 100644 index 0000000..8a0157e --- /dev/null +++ b/decnet/cli/swarm.py @@ -0,0 +1,346 @@ +"""`decnet swarm ...` — master-side operator commands (HTTP to local swarmctl).""" + +from __future__ import annotations + +from typing import Optional + +import typer +from rich.table import Table + +from . import utils as _utils +from .utils import console + + +def register(app: typer.Typer) -> None: + swarm_app = typer.Typer( + name="swarm", + help="Manage swarm workers (enroll, list, decommission). Requires `decnet swarmctl` running.", + no_args_is_help=True, + ) + app.add_typer(swarm_app, name="swarm") + + @swarm_app.command("enroll") + def swarm_enroll( + name: str = typer.Option(..., "--name", help="Short hostname for the worker (also the cert CN)"), + address: str = typer.Option(..., "--address", help="IP or DNS the master uses to reach the worker"), + agent_port: int = typer.Option(8765, "--agent-port", help="Worker agent TCP port"), + sans: Optional[str] = typer.Option(None, "--sans", help="Comma-separated extra SANs for the worker cert"), + notes: Optional[str] = typer.Option(None, "--notes", help="Free-form operator notes"), + out_dir: Optional[str] = typer.Option(None, "--out-dir", help="Write the bundle (ca.crt/worker.crt/worker.key) to this dir for scp"), + updater: bool = typer.Option(False, "--updater", help="Also issue an updater-identity cert (CN=updater@) for the remote self-updater"), + url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL (default: 127.0.0.1:8770)"), + ) -> None: + """Issue a mTLS bundle for a new worker and register it in the swarm.""" + import pathlib as _pathlib + + body: dict = {"name": name, "address": address, "agent_port": agent_port} + if sans: + body["sans"] = [s.strip() for s in sans.split(",") if s.strip()] + if notes: + body["notes"] = notes + if updater: + body["issue_updater_bundle"] = True + + resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/enroll", json_body=body) + data = resp.json() + + console.print(f"[green]Enrolled worker:[/] {data['name']} " + f"[dim]uuid=[/]{data['host_uuid']} " + f"[dim]fingerprint=[/]{data['fingerprint']}") + if data.get("updater"): + console.print(f"[green] + updater identity[/] " + f"[dim]fingerprint=[/]{data['updater']['fingerprint']}") + + if out_dir: + target = _pathlib.Path(out_dir).expanduser() + target.mkdir(parents=True, exist_ok=True) + (target / "ca.crt").write_text(data["ca_cert_pem"]) + (target / "worker.crt").write_text(data["worker_cert_pem"]) + (target / "worker.key").write_text(data["worker_key_pem"]) + for leaf in ("worker.key",): + try: + (target / leaf).chmod(0o600) + except OSError: + pass + console.print(f"[cyan]Agent bundle written to[/] {target}") + + if data.get("updater"): + upd_target = target.parent / f"{target.name}-updater" + upd_target.mkdir(parents=True, exist_ok=True) + (upd_target / "ca.crt").write_text(data["ca_cert_pem"]) + (upd_target / "updater.crt").write_text(data["updater"]["updater_cert_pem"]) + (upd_target / "updater.key").write_text(data["updater"]["updater_key_pem"]) + try: + (upd_target / "updater.key").chmod(0o600) + except OSError: + pass + console.print(f"[cyan]Updater bundle written to[/] {upd_target}") + console.print("[dim]Ship the agent dir to ~/.decnet/agent/ and the updater dir to ~/.decnet/updater/ on the worker.[/]") + else: + console.print("[dim]Ship this directory to the worker at ~/.decnet/agent/ (or wherever `decnet agent --agent-dir` points).[/]") + else: + console.print("[yellow]No --out-dir given — bundle PEMs are in the JSON response; persist them before leaving this shell.[/]") + + @swarm_app.command("list") + def swarm_list( + host_status: Optional[str] = typer.Option(None, "--status", help="Filter by status (enrolled|active|unreachable|decommissioned)"), + url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"), + ) -> None: + """List enrolled workers.""" + q = f"?host_status={host_status}" if host_status else "" + resp = _utils._http_request("GET", _utils._swarmctl_base_url(url) + "/swarm/hosts" + q) + rows = resp.json() + if not rows: + console.print("[dim]No workers enrolled.[/]") + return + table = Table(title="DECNET swarm workers") + for col in ("name", "address", "port", "status", "last heartbeat", "enrolled"): + table.add_column(col) + for r in rows: + table.add_row( + r.get("name") or "", + r.get("address") or "", + str(r.get("agent_port") or ""), + r.get("status") or "", + str(r.get("last_heartbeat") or "—"), + str(r.get("enrolled_at") or "—"), + ) + console.print(table) + + @swarm_app.command("check") + def swarm_check( + url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"), + json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"), + ) -> None: + """Actively probe every enrolled worker and refresh status + last_heartbeat.""" + resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/check", timeout=60.0) + payload = resp.json() + results = payload.get("results", []) + + if json_out: + console.print_json(data=payload) + return + + if not results: + console.print("[dim]No workers enrolled.[/]") + return + + table = Table(title="DECNET swarm check") + for col in ("name", "address", "reachable", "detail"): + table.add_column(col) + for r in results: + reachable = r.get("reachable") + mark = "[green]yes[/]" if reachable else "[red]no[/]" + detail = r.get("detail") + detail_str = "—" + if isinstance(detail, dict): + detail_str = detail.get("status") or ", ".join(f"{k}={v}" for k, v in detail.items()) + elif detail is not None: + detail_str = str(detail) + table.add_row( + r.get("name") or "", + r.get("address") or "", + mark, + detail_str, + ) + console.print(table) + + @swarm_app.command("update") + def swarm_update( + host: Optional[str] = typer.Option(None, "--host", help="Target worker (name or UUID). Omit with --all."), + all_hosts: bool = typer.Option(False, "--all", help="Push to every enrolled worker."), + include_self: bool = typer.Option(False, "--include-self", help="Also push to each updater's /update-self after a successful agent update."), + root: Optional[str] = typer.Option(None, "--root", help="Source tree to tar (default: CWD)."), + exclude: list[str] = typer.Option([], "--exclude", help="Additional exclude glob. Repeatable."), + updater_port: int = typer.Option(8766, "--updater-port", help="Port the workers' updater listens on."), + dry_run: bool = typer.Option(False, "--dry-run", help="Build the tarball and print stats; no network."), + url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL."), + ) -> None: + """Push the current working tree to workers' self-updaters (with auto-rollback on failure).""" + import asyncio + import pathlib as _pathlib + + from decnet.swarm.tar_tree import tar_working_tree, detect_git_sha + from decnet.swarm.updater_client import UpdaterClient + + if not (host or all_hosts): + console.print("[red]Supply --host or --all.[/]") + raise typer.Exit(2) + if host and all_hosts: + console.print("[red]--host and --all are mutually exclusive.[/]") + raise typer.Exit(2) + + base = _utils._swarmctl_base_url(url) + resp = _utils._http_request("GET", base + "/swarm/hosts") + rows = resp.json() + if host: + targets = [r for r in rows if r.get("name") == host or r.get("uuid") == host] + if not targets: + console.print(f"[red]No enrolled worker matching '{host}'.[/]") + raise typer.Exit(1) + else: + targets = [r for r in rows if r.get("status") != "decommissioned"] + if not targets: + console.print("[dim]No targets.[/]") + return + + tree_root = _pathlib.Path(root) if root else _pathlib.Path.cwd() + sha = detect_git_sha(tree_root) + console.print(f"[dim]Tarring[/] {tree_root} [dim]sha={sha or '(not a git repo)'}[/]") + tarball = tar_working_tree(tree_root, extra_excludes=exclude) + console.print(f"[dim]Tarball size:[/] {len(tarball):,} bytes") + + if dry_run: + console.print("[yellow]--dry-run: not pushing.[/]") + for t in targets: + console.print(f" would push to [cyan]{t.get('name')}[/] at {t.get('address')}:{updater_port}") + return + + async def _push_one(h: dict) -> dict: + name = h.get("name") or h.get("uuid") + out: dict = {"name": name, "address": h.get("address"), "agent": None, "self": None} + try: + async with UpdaterClient(h, updater_port=updater_port) as u: + r = await u.update(tarball, sha=sha) + out["agent"] = {"status": r.status_code, "body": r.json() if r.content else {}} + if r.status_code == 200 and include_self: + rs = await u.update_self(tarball, sha=sha) + out["self"] = {"status": rs.status_code, "body": rs.json() if rs.content else {}} + except Exception as exc: # noqa: BLE001 + out["error"] = f"{type(exc).__name__}: {exc}" + return out + + async def _push_all() -> list[dict]: + return await asyncio.gather(*(_push_one(t) for t in targets)) + + results = asyncio.run(_push_all()) + + table = Table(title="DECNET swarm update") + for col in ("host", "address", "agent", "self", "detail"): + table.add_column(col) + any_failure = False + for r in results: + agent = r.get("agent") or {} + selff = r.get("self") or {} + err = r.get("error") + if err: + any_failure = True + table.add_row(r["name"], r.get("address") or "", "[red]error[/]", "—", err) + continue + a_status = agent.get("status") + if a_status == 200: + agent_cell = "[green]updated[/]" + elif a_status == 409: + agent_cell = "[yellow]rolled-back[/]" + any_failure = True + else: + agent_cell = f"[red]{a_status}[/]" + any_failure = True + if not include_self: + self_cell = "—" + elif selff.get("status") == 200 or selff.get("status") is None: + self_cell = "[green]ok[/]" if selff else "[dim]skipped[/]" + else: + self_cell = f"[red]{selff.get('status')}[/]" + detail = "" + body = agent.get("body") or {} + if isinstance(body, dict): + detail = body.get("release", {}).get("sha") or body.get("detail", {}).get("error") or "" + table.add_row(r["name"], r.get("address") or "", agent_cell, self_cell, str(detail)[:80]) + console.print(table) + + if any_failure: + raise typer.Exit(1) + + @swarm_app.command("deckies") + def swarm_deckies( + host: Optional[str] = typer.Option(None, "--host", help="Filter by worker name or UUID"), + state: Optional[str] = typer.Option(None, "--state", help="Filter by shard state (pending|running|failed|torn_down)"), + url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"), + json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"), + ) -> None: + """List deployed deckies across the swarm with their owning worker host.""" + base = _utils._swarmctl_base_url(url) + + host_uuid: Optional[str] = None + if host: + resp = _utils._http_request("GET", base + "/swarm/hosts") + rows = resp.json() + match = next((r for r in rows if r.get("uuid") == host or r.get("name") == host), None) + if match is None: + console.print(f"[red]No enrolled worker matching '{host}'.[/]") + raise typer.Exit(1) + host_uuid = match["uuid"] + + query = [] + if host_uuid: + query.append(f"host_uuid={host_uuid}") + if state: + query.append(f"state={state}") + path = "/swarm/deckies" + ("?" + "&".join(query) if query else "") + + resp = _utils._http_request("GET", base + path) + rows = resp.json() + + if json_out: + console.print_json(data=rows) + return + + if not rows: + console.print("[dim]No deckies deployed.[/]") + return + + table = Table(title="DECNET swarm deckies") + for col in ("decky", "host", "address", "state", "services"): + table.add_column(col) + for r in rows: + services = ",".join(r.get("services") or []) or "—" + state_val = r.get("state") or "pending" + colored = { + "running": f"[green]{state_val}[/]", + "failed": f"[red]{state_val}[/]", + "pending": f"[yellow]{state_val}[/]", + "torn_down": f"[dim]{state_val}[/]", + }.get(state_val, state_val) + table.add_row( + r.get("decky_name") or "", + r.get("host_name") or "", + r.get("host_address") or "", + colored, + services, + ) + console.print(table) + + @swarm_app.command("decommission") + def swarm_decommission( + name: Optional[str] = typer.Option(None, "--name", help="Worker hostname"), + uuid: Optional[str] = typer.Option(None, "--uuid", help="Worker UUID (skip lookup)"), + url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"), + yes: bool = typer.Option(False, "--yes", "-y", help="Skip interactive confirmation"), + ) -> None: + """Remove a worker from the swarm (cascades decky shard rows).""" + if not (name or uuid): + console.print("[red]Supply --name or --uuid.[/]") + raise typer.Exit(2) + + base = _utils._swarmctl_base_url(url) + target_uuid = uuid + target_name = name + if target_uuid is None: + resp = _utils._http_request("GET", base + "/swarm/hosts") + rows = resp.json() + match = next((r for r in rows if r.get("name") == name), None) + if match is None: + console.print(f"[red]No enrolled worker named '{name}'.[/]") + raise typer.Exit(1) + target_uuid = match["uuid"] + target_name = match.get("name") or target_name + + if not yes: + confirm = typer.confirm(f"Decommission worker {target_name!r} ({target_uuid})?", default=False) + if not confirm: + console.print("[dim]Aborted.[/]") + raise typer.Exit(0) + + _utils._http_request("DELETE", f"{base}/swarm/hosts/{target_uuid}") + console.print(f"[green]Decommissioned {target_name or target_uuid}.[/]") diff --git a/decnet/cli/swarmctl.py b/decnet/cli/swarmctl.py new file mode 100644 index 0000000..687823c --- /dev/null +++ b/decnet/cli/swarmctl.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import os +import signal +import subprocess # nosec B404 +import sys +from typing import Optional + +import typer + +from . import utils as _utils +from .gating import _require_master_mode +from .utils import console, log + + +def register(app: typer.Typer) -> None: + @app.command() + def swarmctl( + port: int = typer.Option(8770, "--port", help="Port for the swarm controller"), + host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"), + tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"), + cert: Optional[str] = typer.Option(None, "--cert", help="BYOC: path to TLS server cert (PEM). Auto-issues from the DECNET CA if omitted."), + key: Optional[str] = typer.Option(None, "--key", help="BYOC: path to TLS server private key (PEM)."), + client_ca: Optional[str] = typer.Option(None, "--client-ca", help="CA bundle used to verify worker client certs. Defaults to the DECNET CA."), + ) -> None: + """Run the DECNET SWARM controller (master-side, separate process from `decnet api`). + + By default, `decnet swarmctl` auto-spawns `decnet listener` as a fully- + detached sibling process so the master starts accepting forwarder + connections on 6514 without a second manual invocation. The listener + survives swarmctl restarts and crashes — if it dies on its own, + restart it manually with `decnet listener --daemon …`. Pass + --no-listener to skip. + + Pass ``--tls`` to serve over HTTPS with mutual-TLS enforcement. By + default the server cert is auto-issued from the DECNET CA under + ``~/.decnet/swarmctl/`` so enrolled workers (which already ship that + CA's ``ca.crt``) trust it out of the box. BYOC via ``--cert``/``--key`` + if you need a publicly-trusted or externally-managed cert. + """ + _require_master_mode("swarmctl") + if daemon: + log.info("swarmctl daemonizing host=%s port=%d", host, port) + _utils._daemonize() + + if not no_listener: + listener_host = os.environ.get("DECNET_LISTENER_HOST", "0.0.0.0") # nosec B104 + listener_port = int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514")) + lst_argv = [ + sys.executable, "-m", "decnet", "listener", + "--host", listener_host, + "--port", str(listener_port), + "--daemon", + ] + try: + pid = _utils._spawn_detached(lst_argv, _utils._pid_dir() / "listener.pid") + log.info("swarmctl auto-spawned listener pid=%d bind=%s:%d", + pid, listener_host, listener_port) + console.print(f"[dim]Auto-spawned listener (pid {pid}) on {listener_host}:{listener_port}.[/]") + except Exception as e: # noqa: BLE001 + log.warning("swarmctl could not auto-spawn listener: %s", e) + console.print(f"[yellow]listener auto-spawn skipped: {e}[/]") + + log.info("swarmctl command invoked host=%s port=%d tls=%s", host, port, tls) + scheme = "https" if tls else "http" + console.print(f"[green]Starting DECNET SWARM controller on {scheme}://{host}:{port}...[/]") + _cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app", + "--host", host, "--port", str(port)] + if tls: + from decnet.swarm import pki as _pki + if cert and key: + cert_path, key_path = cert, key + elif cert or key: + console.print("[red]--cert and --key must be provided together.[/]") + raise typer.Exit(code=2) + else: + auto_cert, auto_key, _auto_ca = _pki.ensure_swarmctl_cert(host) + cert_path, key_path = str(auto_cert), str(auto_key) + console.print(f"[dim]Auto-issued swarmctl server cert → {cert_path}[/]") + ca_path = client_ca or str(_pki.DEFAULT_CA_DIR / "ca.crt") + _cmd += [ + "--ssl-keyfile", key_path, + "--ssl-certfile", cert_path, + "--ssl-ca-certs", ca_path, + "--ssl-cert-reqs", "2", + ] + try: + proc = subprocess.Popen(_cmd, start_new_session=True) # nosec B603 B404 + try: + proc.wait() + except KeyboardInterrupt: + try: + os.killpg(proc.pid, signal.SIGTERM) + try: + proc.wait(timeout=10) + except subprocess.TimeoutExpired: + os.killpg(proc.pid, signal.SIGKILL) + proc.wait() + except ProcessLookupError: + pass + except (FileNotFoundError, subprocess.SubprocessError): + console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]") diff --git a/decnet/cli/updater.py b/decnet/cli/updater.py new file mode 100644 index 0000000..20c9ac4 --- /dev/null +++ b/decnet/cli/updater.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +import pathlib as _pathlib +from typing import Optional + +import typer + +from . import utils as _utils +from .utils import console, log + + +def register(app: typer.Typer) -> None: + @app.command() + def updater( + port: int = typer.Option(8766, "--port", help="Port for the self-updater daemon"), + host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the updater"), # nosec B104 + updater_dir: Optional[str] = typer.Option(None, "--updater-dir", help="Updater cert bundle dir (default: ~/.decnet/updater)"), + install_dir: Optional[str] = typer.Option(None, "--install-dir", help="Release install root (default: /opt/decnet)"), + agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker agent cert bundle (for local /health probes; default: ~/.decnet/agent)"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + ) -> None: + """Run the DECNET self-updater (requires a bundle in ~/.decnet/updater/).""" + from decnet.swarm import pki as _pki + from decnet.updater import server as _upd_server + + resolved_updater = _pathlib.Path(updater_dir) if updater_dir else _upd_server.DEFAULT_UPDATER_DIR + resolved_install = _pathlib.Path(install_dir) if install_dir else _pathlib.Path("/opt/decnet") + resolved_agent = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR + + if daemon: + log.info("updater daemonizing host=%s port=%d", host, port) + _utils._daemonize() + + log.info( + "updater command invoked host=%s port=%d updater_dir=%s install_dir=%s", + host, port, resolved_updater, resolved_install, + ) + console.print(f"[green]Starting DECNET self-updater on {host}:{port} (mTLS)...[/]") + rc = _upd_server.run( + host, port, + updater_dir=resolved_updater, + install_dir=resolved_install, + agent_dir=resolved_agent, + ) + if rc != 0: + raise typer.Exit(rc) diff --git a/decnet/cli/utils.py b/decnet/cli/utils.py new file mode 100644 index 0000000..1954173 --- /dev/null +++ b/decnet/cli/utils.py @@ -0,0 +1,177 @@ +"""Shared CLI helpers: console, logger, process management, swarm HTTP client. + +Submodules reference these as ``from . import utils`` then ``utils.foo(...)`` +so tests can patch ``decnet.cli.utils.`` and have every caller see it. +""" + +from __future__ import annotations + +import os +import signal +import subprocess # nosec B404 +import sys +from pathlib import Path +from typing import Optional + +import typer +from rich.console import Console + +from decnet.logging import get_logger +from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE + +log = get_logger("cli") +console = Console() + + +def _daemonize() -> None: + """Fork the current process into a background daemon (Unix double-fork).""" + if os.fork() > 0: + raise SystemExit(0) + os.setsid() + if os.fork() > 0: + raise SystemExit(0) + sys.stdout = open(os.devnull, "w") # noqa: SIM115 + sys.stderr = open(os.devnull, "w") # noqa: SIM115 + sys.stdin = open(os.devnull, "r") # noqa: SIM115 + + +def _pid_dir() -> Path: + """Return the writable PID directory. + + /opt/decnet when it exists and is writable (production), else + ~/.decnet (dev). The directory is created if needed.""" + candidates = [Path("/opt/decnet"), Path.home() / ".decnet"] + for path in candidates: + try: + path.mkdir(parents=True, exist_ok=True) + if os.access(path, os.W_OK): + return path + except (PermissionError, OSError): + continue + return Path("/tmp") # nosec B108 + + +def _spawn_detached(argv: list[str], pid_file: Path) -> int: + """Spawn a DECNET subcommand as a fully-independent sibling process. + + The parent does NOT wait() on this child. start_new_session=True puts + the child in its own session so SIGHUP on parent exit doesn't kill it; + stdin/stdout/stderr go to /dev/null so the launching shell can close + without EIO on the child. close_fds=True prevents inherited sockets + from pinning ports we're trying to rebind. + + This is deliberately NOT a supervisor — we fire-and-forget. If the + child dies, the operator restarts it manually via its own subcommand. + """ + if pid_file.exists(): + try: + existing = int(pid_file.read_text().strip()) + os.kill(existing, 0) + return existing + except (ValueError, ProcessLookupError, PermissionError, OSError): + pass # stale pid_file — fall through and spawn + + with open(os.devnull, "rb") as dn_in, open(os.devnull, "ab") as dn_out: + proc = subprocess.Popen( # nosec B603 + argv, + stdin=dn_in, stdout=dn_out, stderr=dn_out, + start_new_session=True, close_fds=True, + ) + pid_file.parent.mkdir(parents=True, exist_ok=True) + pid_file.write_text(f"{proc.pid}\n") + return proc.pid + + +def _is_running(match_fn) -> int | None: + """Return PID of a running DECNET process matching ``match_fn(cmdline)``, or None.""" + import psutil + + for proc in psutil.process_iter(["pid", "cmdline"]): + try: + cmd = proc.info["cmdline"] + if cmd and match_fn(cmd): + return proc.info["pid"] + except (psutil.NoSuchProcess, psutil.AccessDenied): + continue + return None + + +def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]: + """Return the microservice registry for health-check and relaunch. + + On agents these run as systemd units invoking /usr/local/bin/decnet, + which doesn't include "decnet.cli" in its cmdline. On master dev boxes + they're launched via `python -m decnet.cli`. Match either form — cmd + is a list of argv tokens, so substring-check the joined string. + """ + _py = sys.executable + + def _matches(sub: str, extras: tuple[str, ...] = ()): + def _check(cmd) -> bool: + joined = " ".join(cmd) if not isinstance(cmd, str) else cmd + if "decnet" not in joined: + return False + if sub not in joined: + return False + return all(e in joined for e in extras) + return _check + + return [ + ("Collector", _matches("collect"), + [_py, "-m", "decnet.cli", "collect", "--daemon", "--log-file", log_file]), + ("Mutator", _matches("mutate", ("--watch",)), + [_py, "-m", "decnet.cli", "mutate", "--daemon", "--watch"]), + ("Prober", _matches("probe"), + [_py, "-m", "decnet.cli", "probe", "--daemon", "--log-file", log_file]), + ("Profiler", _matches("profiler"), + [_py, "-m", "decnet.cli", "profiler", "--daemon"]), + ("Sniffer", _matches("sniffer"), + [_py, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", log_file]), + ("API", + lambda cmd: "uvicorn" in cmd and "decnet.web.api:app" in cmd, + [_py, "-m", "uvicorn", "decnet.web.api:app", + "--host", DECNET_API_HOST, "--port", str(DECNET_API_PORT)]), + ] + + +def _kill_all_services() -> None: + """Find and kill all running DECNET microservice processes.""" + registry = _service_registry(str(DECNET_INGEST_LOG_FILE)) + killed = 0 + for name, match_fn, _launch_args in registry: + pid = _is_running(match_fn) + if pid is not None: + console.print(f"[yellow]Stopping {name} (PID {pid})...[/]") + os.kill(pid, signal.SIGTERM) + killed += 1 + + if killed: + console.print(f"[green]{killed} background process(es) stopped.[/]") + else: + console.print("[dim]No DECNET services were running.[/]") + + +_DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770" + + +def _swarmctl_base_url(url: Optional[str]) -> str: + return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL) + + +def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0): + """Tiny sync wrapper around httpx; avoids leaking async into the CLI.""" + import httpx + try: + resp = httpx.request(method, url, json=json_body, timeout=timeout) + except httpx.HTTPError as exc: + console.print(f"[red]Could not reach swarm controller at {url}: {exc}[/]") + console.print("[dim]Is `decnet swarmctl` running?[/]") + raise typer.Exit(2) + if resp.status_code >= 400: + try: + detail = resp.json().get("detail", resp.text) + except Exception: # nosec B110 + detail = resp.text + console.print(f"[red]{method} {url} failed: {resp.status_code} — {detail}[/]") + raise typer.Exit(1) + return resp diff --git a/decnet/cli/web.py b/decnet/cli/web.py new file mode 100644 index 0000000..a7abb7b --- /dev/null +++ b/decnet/cli/web.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +import typer + +from decnet.env import DECNET_API_PORT, DECNET_WEB_HOST, DECNET_WEB_PORT + +from . import utils as _utils +from .utils import console, log + + +def register(app: typer.Typer) -> None: + @app.command(name="web") + def serve_web( + web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"), + host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"), + api_port: int = typer.Option(DECNET_API_PORT, "--api-port", help="Port the DECNET API is listening on"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + ) -> None: + """Serve the DECNET Web Dashboard frontend. + + Proxies /api/* requests to the API server so the frontend can use + relative URLs (/api/v1/...) with no CORS configuration required. + """ + import http.client + import http.server + import os + import socketserver + from pathlib import Path + + dist_dir = Path(__file__).resolve().parent.parent.parent / "decnet_web" / "dist" + + if not dist_dir.exists(): + console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]") + raise typer.Exit(1) + + if daemon: + log.info("web daemonizing host=%s port=%d api_port=%d", host, web_port, api_port) + _utils._daemonize() + + _api_port = api_port + + class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): + def do_GET(self): + if self.path.startswith("/api/"): + self._proxy("GET") + return + path = self.translate_path(self.path) + if not Path(path).exists() or Path(path).is_dir(): + self.path = "/index.html" + return super().do_GET() + + def do_POST(self): + if self.path.startswith("/api/"): + self._proxy("POST") + return + self.send_error(405) + + def do_PUT(self): + if self.path.startswith("/api/"): + self._proxy("PUT") + return + self.send_error(405) + + def do_DELETE(self): + if self.path.startswith("/api/"): + self._proxy("DELETE") + return + self.send_error(405) + + def _proxy(self, method: str) -> None: + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length) if content_length else None + + forward = {k: v for k, v in self.headers.items() + if k.lower() not in ("host", "connection")} + + try: + conn = http.client.HTTPConnection("127.0.0.1", _api_port, timeout=120) + conn.request(method, self.path, body=body, headers=forward) + resp = conn.getresponse() + + self.send_response(resp.status) + for key, val in resp.getheaders(): + if key.lower() not in ("connection", "transfer-encoding"): + self.send_header(key, val) + self.end_headers() + + content_type = resp.getheader("Content-Type", "") + if "text/event-stream" in content_type: + conn.sock.settimeout(None) + + _read = getattr(resp, "read1", resp.read) + while True: + chunk = _read(4096) + if not chunk: + break + self.wfile.write(chunk) + self.wfile.flush() + except Exception as exc: + log.warning("web proxy error %s %s: %s", method, self.path, exc) + self.send_error(502, f"API proxy error: {exc}") + finally: + try: + conn.close() + except Exception: # nosec B110 — best-effort conn cleanup + pass + + def log_message(self, fmt: str, *args: object) -> None: + log.debug("web %s", fmt % args) + + os.chdir(dist_dir) + + socketserver.TCPServer.allow_reuse_address = True + with socketserver.ThreadingTCPServer((host, web_port), SPAHTTPRequestHandler) as httpd: + console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]") + console.print(f"[dim]Proxying /api/* → http://127.0.0.1:{_api_port}[/]") + try: + httpd.serve_forever() + except KeyboardInterrupt: + console.print("\n[dim]Shutting down dashboard server.[/]") diff --git a/decnet/cli/workers.py b/decnet/cli/workers.py new file mode 100644 index 0000000..cc0ba52 --- /dev/null +++ b/decnet/cli/workers.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +from typing import Optional + +import typer + +from decnet.env import DECNET_INGEST_LOG_FILE + +from . import utils as _utils +from .utils import console, log + + +def register(app: typer.Typer) -> None: + @app.command() + def probe( + log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path for RFC 5424 syslog + .json output (reads attackers from .json, writes results to both)"), + interval: int = typer.Option(300, "--interval", "-i", help="Seconds between probe cycles (default: 300)"), + timeout: float = typer.Option(5.0, "--timeout", help="Per-probe TCP timeout in seconds"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background (used by deploy, no console output)"), + ) -> None: + """Fingerprint attackers (JARM + HASSH + TCP/IP stack) discovered in the log stream.""" + import asyncio + from decnet.prober import prober_worker + + if daemon: + log.info("probe daemonizing log_file=%s interval=%d", log_file, interval) + _utils._daemonize() + asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout)) + return + + log.info("probe command invoked log_file=%s interval=%d", log_file, interval) + console.print(f"[bold cyan]DECNET-PROBER[/] watching {log_file} for attackers (interval: {interval}s)") + console.print("[dim]Press Ctrl+C to stop[/]") + try: + asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout)) + except KeyboardInterrupt: + console.print("\n[yellow]DECNET-PROBER stopped.[/]") + + @app.command() + def collect( + log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + ) -> None: + """Stream Docker logs from all running decky service containers to a log file.""" + import asyncio + from decnet.collector import log_collector_worker + + if daemon: + log.info("collect daemonizing log_file=%s", log_file) + _utils._daemonize() + + log.info("collect command invoked log_file=%s", log_file) + console.print(f"[bold cyan]Collector starting[/] → {log_file}") + asyncio.run(log_collector_worker(log_file)) + + @app.command() + def mutate( + watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"), + decky_name: Optional[str] = typer.Option(None, "--decky", help="Force mutate a specific decky immediately"), + force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + ) -> None: + """Manually trigger or continuously watch for decky mutation.""" + import asyncio + from decnet.mutator import mutate_decky, mutate_all, run_watch_loop + from decnet.web.dependencies import repo + + if daemon: + log.info("mutate daemonizing watch=%s", watch) + _utils._daemonize() + + async def _run() -> None: + await repo.initialize() + if watch: + await run_watch_loop(repo) + elif decky_name: + await mutate_decky(decky_name, repo) + elif force_all: + await mutate_all(force=True, repo=repo) + else: + await mutate_all(force=False, repo=repo) + + asyncio.run(_run()) + + @app.command(name="correlate") + def correlate( + log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"), + min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"), + output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"), + emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"), + daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), + ) -> None: + """Analyse logs for cross-decky traversals and print the attacker movement graph.""" + import sys + import json as _json + from pathlib import Path + from decnet.correlation.engine import CorrelationEngine + + if daemon: + log.info("correlate daemonizing log_file=%s", log_file) + _utils._daemonize() + + engine = CorrelationEngine() + + if log_file: + path = Path(log_file) + if not path.exists(): + console.print(f"[red]Log file not found: {log_file}[/]") + raise typer.Exit(1) + engine.ingest_file(path) + elif not sys.stdin.isatty(): + for line in sys.stdin: + engine.ingest(line) + else: + console.print("[red]Provide --log-file or pipe log data via stdin.[/]") + raise typer.Exit(1) + + traversals = engine.traversals(min_deckies) + + if output == "json": + console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2)) + elif output == "syslog": + for line in engine.traversal_syslog_lines(min_deckies): + typer.echo(line) + else: + if not traversals: + console.print( + f"[yellow]No traversals detected " + f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]" + ) + else: + console.print(engine.report_table(min_deckies)) + console.print( + f"[dim]Parsed {engine.lines_parsed} lines · " + f"indexed {engine.events_indexed} events · " + f"{len(engine.all_attackers())} unique IPs · " + f"[bold]{len(traversals)}[/] traversal(s)[/]" + ) + + if emit_syslog: + for line in engine.traversal_syslog_lines(min_deckies): + typer.echo(line) diff --git a/tests/api/swarm_mgmt/test_enroll_bundle.py b/tests/api/swarm_mgmt/test_enroll_bundle.py index e494763..fa6874b 100644 --- a/tests/api/swarm_mgmt/test_enroll_bundle.py +++ b/tests/api/swarm_mgmt/test_enroll_bundle.py @@ -293,7 +293,7 @@ async def test_get_tgz_contents(client, auth_token, tmp_path): assert "home/.decnet/agent/worker.crt" in names assert "home/.decnet/agent/worker.key" in names assert "services.ini" in names - assert "decnet/cli.py" in names # source shipped + assert "decnet/cli/__init__.py" in names # source shipped assert "pyproject.toml" in names # Excluded paths must NOT be shipped diff --git a/tests/swarm/test_cli_swarm.py b/tests/swarm/test_cli_swarm.py index 764d93c..81d4ba8 100644 --- a/tests/swarm/test_cli_swarm.py +++ b/tests/swarm/test_cli_swarm.py @@ -15,7 +15,7 @@ import pytest from typer.testing import CliRunner from decnet import cli as cli_mod -from decnet.cli import app +from decnet.cli import app, deploy as cli_deploy, utils as cli_utils runner = CliRunner() @@ -49,7 +49,7 @@ def http_stub(monkeypatch: pytest.MonkeyPatch) -> _HttpStub: return resp raise AssertionError(f"Unscripted HTTP call: {method} {url}") - monkeypatch.setattr(cli_mod, "_http_request", _fake) + monkeypatch.setattr(cli_utils, "_http_request", _fake) return calls @@ -259,9 +259,9 @@ def test_deploy_swarm_round_robins_and_posts(http_stub, monkeypatch: pytest.Monk }) # Stub network detection so we don't need root / real NICs. - monkeypatch.setattr(cli_mod, "detect_interface", lambda: "eth0") - monkeypatch.setattr(cli_mod, "detect_subnet", lambda _iface: ("10.0.0.0/24", "10.0.0.254")) - monkeypatch.setattr(cli_mod, "get_host_ip", lambda _iface: "10.0.0.100") + monkeypatch.setattr(cli_deploy, "detect_interface", lambda: "eth0") + monkeypatch.setattr(cli_deploy, "detect_subnet", lambda _iface: ("10.0.0.0/24", "10.0.0.254")) + monkeypatch.setattr(cli_deploy, "get_host_ip", lambda _iface: "10.0.0.100") result = runner.invoke(app, [ "deploy", "--mode", "swarm", "--deckies", "3", @@ -280,9 +280,9 @@ def test_deploy_swarm_round_robins_and_posts(http_stub, monkeypatch: pytest.Monk def test_deploy_swarm_fails_if_no_workers(http_stub, monkeypatch: pytest.MonkeyPatch) -> None: http_stub.script[("GET", "/swarm/hosts?host_status=enrolled")] = _FakeResp([]) http_stub.script[("GET", "/swarm/hosts?host_status=active")] = _FakeResp([]) - monkeypatch.setattr(cli_mod, "detect_interface", lambda: "eth0") - monkeypatch.setattr(cli_mod, "detect_subnet", lambda _iface: ("10.0.0.0/24", "10.0.0.254")) - monkeypatch.setattr(cli_mod, "get_host_ip", lambda _iface: "10.0.0.100") + monkeypatch.setattr(cli_deploy, "detect_interface", lambda: "eth0") + monkeypatch.setattr(cli_deploy, "detect_subnet", lambda _iface: ("10.0.0.0/24", "10.0.0.254")) + monkeypatch.setattr(cli_deploy, "get_host_ip", lambda _iface: "10.0.0.100") result = runner.invoke(app, [ "deploy", "--mode", "swarm", "--deckies", "2", diff --git a/tests/swarm/test_cli_swarm_update.py b/tests/swarm/test_cli_swarm_update.py index a36a9ec..9bbf70d 100644 --- a/tests/swarm/test_cli_swarm_update.py +++ b/tests/swarm/test_cli_swarm_update.py @@ -14,7 +14,7 @@ import pytest from typer.testing import CliRunner from decnet import cli as cli_mod -from decnet.cli import app +from decnet.cli import app, utils as cli_utils runner = CliRunner() @@ -40,7 +40,7 @@ def http_stub(monkeypatch: pytest.MonkeyPatch) -> dict: return _FakeResp(state["hosts"]) raise AssertionError(f"Unscripted HTTP call: {method} {url}") - monkeypatch.setattr(cli_mod, "_http_request", _fake) + monkeypatch.setattr(cli_utils, "_http_request", _fake) return state diff --git a/tests/test_cli.py b/tests/test_cli.py index ed4e180..9b4bfe6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -60,10 +60,10 @@ class TestArchetypesCommand: class TestDeployCommand: @patch("decnet.engine.deploy") - @patch("decnet.cli.allocate_ips", return_value=["192.168.1.10"]) - @patch("decnet.cli.get_host_ip", return_value="192.168.1.2") - @patch("decnet.cli.detect_subnet", return_value=("192.168.1.0/24", "192.168.1.1")) - @patch("decnet.cli.detect_interface", return_value="eth0") + @patch("decnet.cli.deploy.allocate_ips", return_value=["192.168.1.10"]) + @patch("decnet.cli.deploy.get_host_ip", return_value="192.168.1.2") + @patch("decnet.cli.deploy.detect_subnet", return_value=("192.168.1.0/24", "192.168.1.1")) + @patch("decnet.cli.deploy.detect_interface", return_value="eth0") def test_deploy_dry_run(self, mock_iface, mock_subnet, mock_hip, mock_ips, mock_deploy): result = runner.invoke(app, [ @@ -73,13 +73,13 @@ class TestDeployCommand: mock_deploy.assert_called_once() def test_deploy_no_interface_found(self): - with patch("decnet.cli.detect_interface", side_effect=ValueError("No interface")): + with patch("decnet.cli.deploy.detect_interface", side_effect=ValueError("No interface")): result = runner.invoke(app, ["deploy", "--deckies", "1"]) assert result.exit_code == 1 def test_deploy_no_subnet_found(self): - with patch("decnet.cli.detect_interface", return_value="eth0"), \ - patch("decnet.cli.detect_subnet", side_effect=ValueError("No subnet")): + with patch("decnet.cli.deploy.detect_interface", return_value="eth0"), \ + patch("decnet.cli.deploy.detect_subnet", side_effect=ValueError("No subnet")): result = runner.invoke(app, ["deploy", "--deckies", "1", "--services", "ssh"]) assert result.exit_code == 1 @@ -87,21 +87,21 @@ class TestDeployCommand: result = runner.invoke(app, ["deploy", "--mode", "invalid", "--deckies", "1"]) assert result.exit_code == 1 - @patch("decnet.cli.detect_interface", return_value="eth0") + @patch("decnet.cli.deploy.detect_interface", return_value="eth0") def test_deploy_no_deckies_no_config(self, mock_iface): result = runner.invoke(app, ["deploy", "--services", "ssh"]) assert result.exit_code == 1 - @patch("decnet.cli.detect_interface", return_value="eth0") + @patch("decnet.cli.deploy.detect_interface", return_value="eth0") def test_deploy_no_services_no_randomize(self, mock_iface): result = runner.invoke(app, ["deploy", "--deckies", "1"]) assert result.exit_code == 1 @patch("decnet.engine.deploy") - @patch("decnet.cli.allocate_ips", return_value=["192.168.1.10"]) - @patch("decnet.cli.get_host_ip", return_value="192.168.1.2") - @patch("decnet.cli.detect_subnet", return_value=("192.168.1.0/24", "192.168.1.1")) - @patch("decnet.cli.detect_interface", return_value="eth0") + @patch("decnet.cli.deploy.allocate_ips", return_value=["192.168.1.10"]) + @patch("decnet.cli.deploy.get_host_ip", return_value="192.168.1.2") + @patch("decnet.cli.deploy.detect_subnet", return_value=("192.168.1.0/24", "192.168.1.1")) + @patch("decnet.cli.deploy.detect_interface", return_value="eth0") def test_deploy_with_archetype(self, mock_iface, mock_subnet, mock_hip, mock_ips, mock_deploy): result = runner.invoke(app, [ @@ -117,10 +117,10 @@ class TestDeployCommand: @patch("decnet.engine.deploy") @patch("subprocess.Popen") - @patch("decnet.cli.allocate_ips", return_value=["192.168.1.10"]) - @patch("decnet.cli.get_host_ip", return_value="192.168.1.2") - @patch("decnet.cli.detect_subnet", return_value=("192.168.1.0/24", "192.168.1.1")) - @patch("decnet.cli.detect_interface", return_value="eth0") + @patch("decnet.cli.deploy.allocate_ips", return_value=["192.168.1.10"]) + @patch("decnet.cli.deploy.get_host_ip", return_value="192.168.1.2") + @patch("decnet.cli.deploy.detect_subnet", return_value=("192.168.1.0/24", "192.168.1.1")) + @patch("decnet.cli.deploy.detect_interface", return_value="eth0") def test_deploy_full_with_api(self, mock_iface, mock_subnet, mock_hip, mock_ips, mock_popen, mock_deploy): # Test non-dry-run with API and collector starts @@ -131,10 +131,10 @@ class TestDeployCommand: assert mock_popen.call_count >= 1 # API @patch("decnet.engine.deploy") - @patch("decnet.cli.allocate_ips", return_value=["192.168.1.10"]) - @patch("decnet.cli.get_host_ip", return_value="192.168.1.2") - @patch("decnet.cli.detect_subnet", return_value=("192.168.1.0/24", "192.168.1.1")) - @patch("decnet.cli.detect_interface", return_value="eth0") + @patch("decnet.cli.deploy.allocate_ips", return_value=["192.168.1.10"]) + @patch("decnet.cli.deploy.get_host_ip", return_value="192.168.1.2") + @patch("decnet.cli.deploy.detect_subnet", return_value=("192.168.1.0/24", "192.168.1.1")) + @patch("decnet.cli.deploy.detect_interface", return_value="eth0") def test_deploy_with_distro(self, mock_iface, mock_subnet, mock_hip, mock_ips, mock_deploy): result = runner.invoke(app, [ @@ -149,10 +149,10 @@ class TestDeployCommand: assert result.exit_code == 1 @patch("decnet.engine.deploy") - @patch("decnet.cli.load_ini") - @patch("decnet.cli.get_host_ip", return_value="192.168.1.2") - @patch("decnet.cli.detect_subnet", return_value=("192.168.1.0/24", "192.168.1.1")) - @patch("decnet.cli.detect_interface", return_value="eth0") + @patch("decnet.cli.deploy.load_ini") + @patch("decnet.cli.deploy.get_host_ip", return_value="192.168.1.2") + @patch("decnet.cli.deploy.detect_subnet", return_value=("192.168.1.0/24", "192.168.1.1")) + @patch("decnet.cli.deploy.detect_interface", return_value="eth0") def test_deploy_with_config_file(self, mock_iface, mock_subnet, mock_hip, mock_load_ini, mock_deploy, tmp_path): from decnet.ini_loader import IniConfig, DeckySpec @@ -181,7 +181,7 @@ class TestTeardownCommand: result = runner.invoke(app, ["teardown"]) assert result.exit_code == 1 - @patch("decnet.cli._kill_all_services") + @patch("decnet.cli.utils._kill_all_services") @patch("decnet.engine.teardown") def test_teardown_all(self, mock_teardown, mock_kill): result = runner.invoke(app, ["teardown", "--all"]) diff --git a/tests/test_cli_db_reset.py b/tests/test_cli_db_reset.py index f85efb9..9795169 100644 --- a/tests/test_cli_db_reset.py +++ b/tests/test_cli_db_reset.py @@ -56,7 +56,7 @@ class TestDbResetDispatch: monkeypatch.setenv("DECNET_DB_URL", "mysql+aiomysql://u:p@h/d") mock = AsyncMock() - with patch("decnet.cli._db_reset_mysql_async", new=mock): + with patch("decnet.cli.db._db_reset_mysql_async", new=mock): result = runner.invoke(app, ["db-reset"]) assert result.exit_code == 0, result.stdout @@ -70,7 +70,7 @@ class TestDbResetDispatch: monkeypatch.setenv("DECNET_DB_URL", "mysql+aiomysql://u:p@h/d") mock = AsyncMock() - with patch("decnet.cli._db_reset_mysql_async", new=mock): + with patch("decnet.cli.db._db_reset_mysql_async", new=mock): result = runner.invoke(app, ["db-reset", "--i-know-what-im-doing"]) assert result.exit_code == 0, result.stdout @@ -81,7 +81,7 @@ class TestDbResetDispatch: monkeypatch.setenv("DECNET_DB_URL", "mysql+aiomysql://u:p@h/d") mock = AsyncMock() - with patch("decnet.cli._db_reset_mysql_async", new=mock): + with patch("decnet.cli.db._db_reset_mysql_async", new=mock): result = runner.invoke( app, ["db-reset", "--mode", "drop-tables", "--i-know-what-im-doing"] ) @@ -94,7 +94,7 @@ class TestDbResetDispatch: monkeypatch.setenv("DECNET_DB_URL", "mysql+aiomysql://from-env/db") mock = AsyncMock() - with patch("decnet.cli._db_reset_mysql_async", new=mock): + with patch("decnet.cli.db._db_reset_mysql_async", new=mock): result = runner.invoke(app, [ "db-reset", "--url", "mysql+aiomysql://override/db2", ])