feat(swarm): master-side SWARM controller (swarmctl) + agent CLI
Adds decnet/web/swarm_api.py as an independent FastAPI app with routers for host enrollment, deployment dispatch (sharding DecnetConfig across enrolled workers via AgentClient), and active health probing. Runs as its own uvicorn subprocess via 'decnet swarmctl', mirroring the isolation pattern used by 'decnet api'. Also wires up 'decnet agent' CLI entry for the worker side. 29 tests added under tests/swarm/test_swarm_api.py cover enrollment (including bundle generation + duplicate rejection), host CRUD, sharding correctness, non-swarm-mode rejection, teardown, and health probes with a stubbed AgentClient.
This commit is contained in:
@@ -124,6 +124,64 @@ def api(
|
||||
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||
|
||||
|
||||
@app.command()
|
||||
def swarmctl(
|
||||
port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
|
||||
host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the DECNET SWARM controller (master-side, separate process from `decnet api`)."""
|
||||
import subprocess # nosec B404
|
||||
import sys
|
||||
import os
|
||||
import signal
|
||||
|
||||
if daemon:
|
||||
log.info("swarmctl daemonizing host=%s port=%d", host, port)
|
||||
_daemonize()
|
||||
|
||||
log.info("swarmctl command invoked host=%s port=%d", host, port)
|
||||
console.print(f"[green]Starting DECNET SWARM controller on {host}:{port}...[/]")
|
||||
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app",
|
||||
"--host", host, "--port", str(port)]
|
||||
try:
|
||||
proc = subprocess.Popen(_cmd, start_new_session=True) # nosec B603 B404
|
||||
try:
|
||||
proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
try:
|
||||
os.killpg(proc.pid, signal.SIGTERM)
|
||||
try:
|
||||
proc.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
os.killpg(proc.pid, signal.SIGKILL)
|
||||
proc.wait()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
except (FileNotFoundError, subprocess.SubprocessError):
|
||||
console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]")
|
||||
|
||||
|
||||
@app.command()
|
||||
def agent(
|
||||
port: int = typer.Option(8765, "--port", help="Port for the worker agent"),
|
||||
host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the worker agent"), # nosec B104
|
||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||
) -> None:
|
||||
"""Run the DECNET SWARM worker agent (requires a cert bundle in ~/.decnet/agent/)."""
|
||||
from decnet.agent import server as _agent_server
|
||||
|
||||
if daemon:
|
||||
log.info("agent daemonizing host=%s port=%d", host, port)
|
||||
_daemonize()
|
||||
|
||||
log.info("agent command invoked host=%s port=%d", host, port)
|
||||
console.print(f"[green]Starting DECNET worker agent on {host}:{port} (mTLS)...[/]")
|
||||
rc = _agent_server.run(host, port)
|
||||
if rc != 0:
|
||||
raise typer.Exit(rc)
|
||||
|
||||
|
||||
@app.command()
|
||||
def deploy(
|
||||
mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
|
||||
|
||||
Reference in New Issue
Block a user