feat(swarm): master-side SWARM controller (swarmctl) + agent CLI

Adds decnet/web/swarm_api.py as an independent FastAPI app with routers
for host enrollment, deployment dispatch (sharding DecnetConfig across
enrolled workers via AgentClient), and active health probing. Runs as
its own uvicorn subprocess via 'decnet swarmctl', mirroring the isolation
pattern used by 'decnet api'. Also wires up 'decnet agent' CLI entry for
the worker side.

29 tests added under tests/swarm/test_swarm_api.py cover enrollment
(including bundle generation + duplicate rejection), host CRUD, sharding
correctness, non-swarm-mode rejection, teardown, and health probes with
a stubbed AgentClient.
This commit is contained in:
2026-04-18 19:18:33 -04:00
parent cd0057c129
commit 63b0a58527
7 changed files with 838 additions and 0 deletions

View File

@@ -124,6 +124,64 @@ def api(
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
@app.command()
def swarmctl(
port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
) -> None:
"""Run the DECNET SWARM controller (master-side, separate process from `decnet api`)."""
import subprocess # nosec B404
import sys
import os
import signal
if daemon:
log.info("swarmctl daemonizing host=%s port=%d", host, port)
_daemonize()
log.info("swarmctl command invoked host=%s port=%d", host, port)
console.print(f"[green]Starting DECNET SWARM controller on {host}:{port}...[/]")
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app",
"--host", host, "--port", str(port)]
try:
proc = subprocess.Popen(_cmd, start_new_session=True) # nosec B603 B404
try:
proc.wait()
except KeyboardInterrupt:
try:
os.killpg(proc.pid, signal.SIGTERM)
try:
proc.wait(timeout=10)
except subprocess.TimeoutExpired:
os.killpg(proc.pid, signal.SIGKILL)
proc.wait()
except ProcessLookupError:
pass
except (FileNotFoundError, subprocess.SubprocessError):
console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]")
@app.command()
def agent(
port: int = typer.Option(8765, "--port", help="Port for the worker agent"),
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the worker agent"), # nosec B104
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
) -> None:
"""Run the DECNET SWARM worker agent (requires a cert bundle in ~/.decnet/agent/)."""
from decnet.agent import server as _agent_server
if daemon:
log.info("agent daemonizing host=%s port=%d", host, port)
_daemonize()
log.info("agent command invoked host=%s port=%d", host, port)
console.print(f"[green]Starting DECNET worker agent on {host}:{port} (mTLS)...[/]")
rc = _agent_server.run(host, port)
if rc != 0:
raise typer.Exit(rc)
@app.command()
def deploy(
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),