feat(swarm-mgmt): agent enrollment bundle flow + admin swarm endpoints
This commit is contained in:
@@ -22,6 +22,7 @@ from .config.api_reinit import router as config_reinit_router
|
|||||||
from .health.api_get_health import router as health_router
|
from .health.api_get_health import router as health_router
|
||||||
from .artifacts.api_get_artifact import router as artifacts_router
|
from .artifacts.api_get_artifact import router as artifacts_router
|
||||||
from .swarm_updates import swarm_updates_router
|
from .swarm_updates import swarm_updates_router
|
||||||
|
from .swarm_mgmt import swarm_mgmt_router
|
||||||
|
|
||||||
api_router = APIRouter()
|
api_router = APIRouter()
|
||||||
|
|
||||||
@@ -64,3 +65,6 @@ api_router.include_router(artifacts_router)
|
|||||||
|
|
||||||
# Remote Updates (dashboard → worker updater daemons)
|
# Remote Updates (dashboard → worker updater daemons)
|
||||||
api_router.include_router(swarm_updates_router)
|
api_router.include_router(swarm_updates_router)
|
||||||
|
|
||||||
|
# Swarm Management (dashboard: hosts, deckies, agent enrollment bundles)
|
||||||
|
api_router.include_router(swarm_mgmt_router)
|
||||||
|
|||||||
24
decnet/web/router/swarm_mgmt/__init__.py
Normal file
24
decnet/web/router/swarm_mgmt/__init__.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
"""Swarm management endpoints for the React dashboard.
|
||||||
|
|
||||||
|
These are *not* the unauthenticated /swarm routes mounted on the separate
|
||||||
|
swarm-controller process (decnet/web/swarm_api.py on port 8770). These
|
||||||
|
live on the main web API, go through ``require_admin``, and are the
|
||||||
|
interface the dashboard uses to list hosts, decommission them, list
|
||||||
|
deckies across the fleet, and generate one-shot agent-enrollment
|
||||||
|
bundles.
|
||||||
|
|
||||||
|
Mounted under ``/api/v1/swarm`` by the main api router.
|
||||||
|
"""
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from .api_list_hosts import router as list_hosts_router
|
||||||
|
from .api_decommission_host import router as decommission_host_router
|
||||||
|
from .api_list_deckies import router as list_deckies_router
|
||||||
|
from .api_enroll_bundle import router as enroll_bundle_router
|
||||||
|
|
||||||
|
swarm_mgmt_router = APIRouter(prefix="/swarm")
|
||||||
|
|
||||||
|
swarm_mgmt_router.include_router(list_hosts_router)
|
||||||
|
swarm_mgmt_router.include_router(decommission_host_router)
|
||||||
|
swarm_mgmt_router.include_router(list_deckies_router)
|
||||||
|
swarm_mgmt_router.include_router(enroll_bundle_router)
|
||||||
41
decnet/web/router/swarm_mgmt/api_decommission_host.py
Normal file
41
decnet/web/router/swarm_mgmt/api_decommission_host.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
"""DELETE /swarm/hosts/{uuid} — decommission a worker from the dashboard."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, status
|
||||||
|
|
||||||
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
from decnet.web.dependencies import get_repo, require_admin
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete(
|
||||||
|
"/hosts/{uuid}",
|
||||||
|
status_code=status.HTTP_204_NO_CONTENT,
|
||||||
|
tags=["Swarm Management"],
|
||||||
|
)
|
||||||
|
async def decommission_host(
|
||||||
|
uuid: str,
|
||||||
|
admin: dict = Depends(require_admin),
|
||||||
|
repo: BaseRepository = Depends(get_repo),
|
||||||
|
) -> None:
|
||||||
|
row = await repo.get_swarm_host_by_uuid(uuid)
|
||||||
|
if row is None:
|
||||||
|
raise HTTPException(status_code=404, detail="host not found")
|
||||||
|
|
||||||
|
await repo.delete_decky_shards_for_host(uuid)
|
||||||
|
await repo.delete_swarm_host(uuid)
|
||||||
|
|
||||||
|
bundle_dir = pathlib.Path(row.get("cert_bundle_path") or "")
|
||||||
|
if bundle_dir.is_dir():
|
||||||
|
for child in bundle_dir.iterdir():
|
||||||
|
try:
|
||||||
|
child.unlink()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
bundle_dir.rmdir()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
354
decnet/web/router/swarm_mgmt/api_enroll_bundle.py
Normal file
354
decnet/web/router/swarm_mgmt/api_enroll_bundle.py
Normal file
@@ -0,0 +1,354 @@
|
|||||||
|
"""Agent-enrollment bundles — the Wazuh-style one-liner flow.
|
||||||
|
|
||||||
|
Three endpoints:
|
||||||
|
POST /swarm/enroll-bundle — admin issues certs + builds payload
|
||||||
|
GET /swarm/enroll-bundle/{t}.sh — bootstrap script (idempotent until .tgz)
|
||||||
|
GET /swarm/enroll-bundle/{t}.tgz — tarball payload (one-shot; trips served)
|
||||||
|
|
||||||
|
The operator's paste is a single pipe ``curl -fsSL <.sh> | sudo bash``.
|
||||||
|
Under the hood the bootstrap curls the ``.tgz`` from the same token.
|
||||||
|
Both files are rendered + persisted on POST; the ``.tgz`` GET atomically
|
||||||
|
marks the token served, reads the bytes under the lock, and unlinks both
|
||||||
|
files so a sweeper cannot race it. Unclaimed tokens expire after 5 min.
|
||||||
|
|
||||||
|
We avoid the single-self-extracting-script pattern because ``bash`` run
|
||||||
|
via pipe has ``$0 == "bash"`` — there is no file on disk to ``tail`` for
|
||||||
|
the embedded payload. Two URLs, one paste.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import fnmatch
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import secrets
|
||||||
|
import tarfile
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm import pki
|
||||||
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
from decnet.web.dependencies import get_repo, require_admin
|
||||||
|
|
||||||
|
log = get_logger("swarm_mgmt.enroll_bundle")
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
BUNDLE_TTL = timedelta(minutes=5)
|
||||||
|
BUNDLE_DIR = pathlib.Path(os.environ.get("DECNET_ENROLL_BUNDLE_DIR", "/tmp/decnet-enroll")) # nosec B108 - short-lived 0600 bundle cache, env-overridable
|
||||||
|
SWEEP_INTERVAL_SECS = 30
|
||||||
|
|
||||||
|
# Paths excluded from the bundled tarball. Matches the intent of
|
||||||
|
# decnet.swarm.tar_tree.DEFAULT_EXCLUDES but narrower — we never want
|
||||||
|
# tests, dev scaffolding, the master's DB, or the frontend source tree
|
||||||
|
# shipped to an agent.
|
||||||
|
_EXCLUDES: tuple[str, ...] = (
|
||||||
|
".venv", ".venv/*", "**/.venv/*",
|
||||||
|
"__pycache__", "**/__pycache__", "**/__pycache__/*",
|
||||||
|
".git", ".git/*",
|
||||||
|
".pytest_cache", ".pytest_cache/*",
|
||||||
|
".mypy_cache", ".mypy_cache/*",
|
||||||
|
"*.egg-info", "*.egg-info/*",
|
||||||
|
"*.pyc", "*.pyo",
|
||||||
|
"*.db", "*.db-wal", "*.db-shm", "decnet.db*",
|
||||||
|
"*.log",
|
||||||
|
"tests", "tests/*",
|
||||||
|
"development", "development/*",
|
||||||
|
"wiki-checkout", "wiki-checkout/*",
|
||||||
|
"decnet_web/node_modules", "decnet_web/node_modules/*",
|
||||||
|
"decnet_web/src", "decnet_web/src/*",
|
||||||
|
"decnet-state.json",
|
||||||
|
"master.log", "master.json",
|
||||||
|
"decnet.tar",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# DTOs
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class EnrollBundleRequest(BaseModel):
|
||||||
|
master_host: str = Field(..., min_length=1, max_length=253,
|
||||||
|
description="IP/host the agent will reach back to")
|
||||||
|
agent_name: str = Field(..., pattern=r"^[a-z0-9][a-z0-9-]{0,62}$",
|
||||||
|
description="Worker name (DNS-label safe)")
|
||||||
|
services_ini: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Optional INI text shipped to the agent as /etc/decnet/services.ini",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EnrollBundleResponse(BaseModel):
|
||||||
|
token: str
|
||||||
|
command: str
|
||||||
|
expires_at: datetime
|
||||||
|
host_uuid: str
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# In-memory registry
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _Bundle:
|
||||||
|
sh_path: pathlib.Path
|
||||||
|
tgz_path: pathlib.Path
|
||||||
|
expires_at: datetime
|
||||||
|
served: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
_BUNDLES: dict[str, _Bundle] = {}
|
||||||
|
_LOCK = asyncio.Lock()
|
||||||
|
_SWEEPER_TASK: Optional[asyncio.Task] = None
|
||||||
|
|
||||||
|
|
||||||
|
async def _sweep_loop() -> None:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await asyncio.sleep(SWEEP_INTERVAL_SECS)
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
async with _LOCK:
|
||||||
|
dead = [t for t, b in _BUNDLES.items() if b.served or b.expires_at <= now]
|
||||||
|
for t in dead:
|
||||||
|
b = _BUNDLES.pop(t)
|
||||||
|
for p in (b.sh_path, b.tgz_path):
|
||||||
|
try:
|
||||||
|
p.unlink()
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
except OSError as exc:
|
||||||
|
log.warning("enroll-bundle sweep unlink failed path=%s err=%s", p, exc)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
log.exception("enroll-bundle sweeper iteration failed")
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_sweeper() -> None:
|
||||||
|
global _SWEEPER_TASK
|
||||||
|
if _SWEEPER_TASK is None or _SWEEPER_TASK.done():
|
||||||
|
_SWEEPER_TASK = asyncio.create_task(_sweep_loop())
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Tarball construction
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _repo_root() -> pathlib.Path:
|
||||||
|
# decnet/web/router/swarm_mgmt/api_enroll_bundle.py -> 4 parents = repo root.
|
||||||
|
return pathlib.Path(__file__).resolve().parents[4]
|
||||||
|
|
||||||
|
|
||||||
|
def _is_excluded(rel: str) -> bool:
|
||||||
|
parts = pathlib.PurePosixPath(rel).parts
|
||||||
|
for pat in _EXCLUDES:
|
||||||
|
if fnmatch.fnmatch(rel, pat):
|
||||||
|
return True
|
||||||
|
for i in range(1, len(parts) + 1):
|
||||||
|
if fnmatch.fnmatch("/".join(parts[:i]), pat):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _render_decnet_ini(master_host: str) -> bytes:
|
||||||
|
return (
|
||||||
|
"; Generated by DECNET agent-enrollment bundle.\n"
|
||||||
|
"[decnet]\n"
|
||||||
|
"mode = agent\n"
|
||||||
|
"disallow-master = true\n"
|
||||||
|
"log-file-path = /var/log/decnet/decnet.log\n"
|
||||||
|
"\n"
|
||||||
|
"[agent]\n"
|
||||||
|
f"master-host = {master_host}\n"
|
||||||
|
"swarm-syslog-port = 6514\n"
|
||||||
|
"agent-port = 8765\n"
|
||||||
|
"agent-dir = /root/.decnet/agent\n"
|
||||||
|
"updater-dir = /root/.decnet/updater\n"
|
||||||
|
).encode()
|
||||||
|
|
||||||
|
|
||||||
|
def _add_bytes(tar: tarfile.TarFile, name: str, data: bytes, mode: int = 0o644) -> None:
|
||||||
|
info = tarfile.TarInfo(name)
|
||||||
|
info.size = len(data)
|
||||||
|
info.mode = mode
|
||||||
|
info.mtime = int(datetime.now(timezone.utc).timestamp())
|
||||||
|
tar.addfile(info, io.BytesIO(data))
|
||||||
|
|
||||||
|
|
||||||
|
def _build_tarball(
|
||||||
|
master_host: str,
|
||||||
|
issued: pki.IssuedCert,
|
||||||
|
services_ini: Optional[str],
|
||||||
|
) -> bytes:
|
||||||
|
"""Gzipped tarball with:
|
||||||
|
- full repo source (minus excludes)
|
||||||
|
- etc/decnet/decnet.ini (pre-baked for mode=agent)
|
||||||
|
- home/.decnet/agent/{ca.crt,worker.crt,worker.key}
|
||||||
|
- services.ini at root if provided
|
||||||
|
"""
|
||||||
|
root = _repo_root()
|
||||||
|
buf = io.BytesIO()
|
||||||
|
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
|
||||||
|
for path in sorted(root.rglob("*")):
|
||||||
|
rel = path.relative_to(root).as_posix()
|
||||||
|
if _is_excluded(rel):
|
||||||
|
continue
|
||||||
|
if path.is_symlink() or path.is_dir():
|
||||||
|
continue
|
||||||
|
tar.add(path, arcname=rel, recursive=False)
|
||||||
|
|
||||||
|
_add_bytes(tar, "etc/decnet/decnet.ini", _render_decnet_ini(master_host))
|
||||||
|
_add_bytes(tar, "home/.decnet/agent/ca.crt", issued.ca_cert_pem)
|
||||||
|
_add_bytes(tar, "home/.decnet/agent/worker.crt", issued.cert_pem)
|
||||||
|
_add_bytes(tar, "home/.decnet/agent/worker.key", issued.key_pem, mode=0o600)
|
||||||
|
|
||||||
|
if services_ini:
|
||||||
|
_add_bytes(tar, "services.ini", services_ini.encode())
|
||||||
|
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
def _render_bootstrap(
|
||||||
|
agent_name: str,
|
||||||
|
master_host: str,
|
||||||
|
tarball_url: str,
|
||||||
|
expires_at: datetime,
|
||||||
|
) -> bytes:
|
||||||
|
tpl_path = pathlib.Path(__file__).resolve().parents[1].parent / "templates" / "enroll_bootstrap.sh.j2"
|
||||||
|
tpl = tpl_path.read_text()
|
||||||
|
now = datetime.now(timezone.utc).replace(microsecond=0).isoformat()
|
||||||
|
rendered = (
|
||||||
|
tpl.replace("{{ agent_name }}", agent_name)
|
||||||
|
.replace("{{ master_host }}", master_host)
|
||||||
|
.replace("{{ tarball_url }}", tarball_url)
|
||||||
|
.replace("{{ generated_at }}", now)
|
||||||
|
.replace("{{ expires_at }}", expires_at.replace(microsecond=0).isoformat())
|
||||||
|
)
|
||||||
|
return rendered.encode()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Endpoints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/enroll-bundle",
|
||||||
|
response_model=EnrollBundleResponse,
|
||||||
|
status_code=status.HTTP_201_CREATED,
|
||||||
|
tags=["Swarm Management"],
|
||||||
|
)
|
||||||
|
async def create_enroll_bundle(
|
||||||
|
req: EnrollBundleRequest,
|
||||||
|
request: Request,
|
||||||
|
admin: dict = Depends(require_admin),
|
||||||
|
repo: BaseRepository = Depends(get_repo),
|
||||||
|
) -> EnrollBundleResponse:
|
||||||
|
import uuid as _uuid
|
||||||
|
|
||||||
|
existing = await repo.get_swarm_host_by_name(req.agent_name)
|
||||||
|
if existing is not None:
|
||||||
|
raise HTTPException(status_code=409, detail=f"Worker '{req.agent_name}' is already enrolled")
|
||||||
|
|
||||||
|
# 1. Issue certs (reuses the same code as /swarm/enroll).
|
||||||
|
ca = pki.ensure_ca()
|
||||||
|
sans = list({req.agent_name, req.master_host})
|
||||||
|
issued = pki.issue_worker_cert(ca, req.agent_name, sans)
|
||||||
|
bundle_dir = pki.DEFAULT_CA_DIR / "workers" / req.agent_name
|
||||||
|
pki.write_worker_bundle(issued, bundle_dir)
|
||||||
|
|
||||||
|
# 2. Register the host row so it shows up in SwarmHosts immediately.
|
||||||
|
host_uuid = str(_uuid.uuid4())
|
||||||
|
await repo.add_swarm_host(
|
||||||
|
{
|
||||||
|
"uuid": host_uuid,
|
||||||
|
"name": req.agent_name,
|
||||||
|
"address": req.master_host, # placeholder; agent overwrites on first heartbeat
|
||||||
|
"agent_port": 8765,
|
||||||
|
"status": "enrolled",
|
||||||
|
"client_cert_fingerprint": issued.fingerprint_sha256,
|
||||||
|
"updater_cert_fingerprint": None,
|
||||||
|
"cert_bundle_path": str(bundle_dir),
|
||||||
|
"enrolled_at": datetime.now(timezone.utc),
|
||||||
|
"notes": "enrolled via UI bundle",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. Render payload + bootstrap.
|
||||||
|
tarball = _build_tarball(req.master_host, issued, req.services_ini)
|
||||||
|
token = secrets.token_urlsafe(24)
|
||||||
|
expires_at = datetime.now(timezone.utc) + BUNDLE_TTL
|
||||||
|
|
||||||
|
BUNDLE_DIR.mkdir(parents=True, exist_ok=True, mode=0o700)
|
||||||
|
sh_path = BUNDLE_DIR / f"{token}.sh"
|
||||||
|
tgz_path = BUNDLE_DIR / f"{token}.tgz"
|
||||||
|
|
||||||
|
base = str(request.base_url).rstrip("/")
|
||||||
|
tarball_url = f"{base}/api/v1/swarm/enroll-bundle/{token}.tgz"
|
||||||
|
bootstrap_url = f"{base}/api/v1/swarm/enroll-bundle/{token}.sh"
|
||||||
|
script = _render_bootstrap(req.agent_name, req.master_host, tarball_url, expires_at)
|
||||||
|
|
||||||
|
tgz_path.write_bytes(tarball)
|
||||||
|
sh_path.write_bytes(script)
|
||||||
|
os.chmod(tgz_path, 0o600)
|
||||||
|
os.chmod(sh_path, 0o600)
|
||||||
|
|
||||||
|
async with _LOCK:
|
||||||
|
_BUNDLES[token] = _Bundle(sh_path=sh_path, tgz_path=tgz_path, expires_at=expires_at)
|
||||||
|
_ensure_sweeper()
|
||||||
|
|
||||||
|
log.info("enroll-bundle created agent=%s master=%s token=%s...", req.agent_name, req.master_host, token[:8])
|
||||||
|
|
||||||
|
return EnrollBundleResponse(
|
||||||
|
token=token,
|
||||||
|
command=f"curl -fsSL {bootstrap_url} | sudo bash",
|
||||||
|
expires_at=expires_at,
|
||||||
|
host_uuid=host_uuid,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _now() -> datetime:
|
||||||
|
# Indirection so tests can monkeypatch.
|
||||||
|
return datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
async def _lookup_live(token: str) -> _Bundle:
|
||||||
|
b = _BUNDLES.get(token)
|
||||||
|
if b is None or b.served or b.expires_at <= _now():
|
||||||
|
raise HTTPException(status_code=404, detail="bundle not found or expired")
|
||||||
|
return b
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/enroll-bundle/{token}.sh",
|
||||||
|
tags=["Swarm Management"],
|
||||||
|
include_in_schema=False,
|
||||||
|
)
|
||||||
|
async def get_bootstrap(token: str) -> Response:
|
||||||
|
async with _LOCK:
|
||||||
|
b = await _lookup_live(token)
|
||||||
|
data = b.sh_path.read_bytes()
|
||||||
|
return Response(content=data, media_type="text/x-shellscript")
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/enroll-bundle/{token}.tgz",
|
||||||
|
tags=["Swarm Management"],
|
||||||
|
include_in_schema=False,
|
||||||
|
)
|
||||||
|
async def get_payload(token: str) -> Response:
|
||||||
|
async with _LOCK:
|
||||||
|
b = await _lookup_live(token)
|
||||||
|
b.served = True
|
||||||
|
data = b.tgz_path.read_bytes()
|
||||||
|
for p in (b.sh_path, b.tgz_path):
|
||||||
|
try:
|
||||||
|
p.unlink()
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
return Response(content=data, media_type="application/gzip")
|
||||||
42
decnet/web/router/swarm_mgmt/api_list_deckies.py
Normal file
42
decnet/web/router/swarm_mgmt/api_list_deckies.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
"""GET /swarm/deckies — admin-gated list of decky shards across the fleet."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends
|
||||||
|
|
||||||
|
from decnet.web.db.models import DeckyShardView
|
||||||
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
from decnet.web.dependencies import get_repo, require_admin
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/deckies", response_model=list[DeckyShardView], tags=["Swarm Management"])
|
||||||
|
async def list_deckies(
|
||||||
|
host_uuid: Optional[str] = None,
|
||||||
|
state: Optional[str] = None,
|
||||||
|
admin: dict = Depends(require_admin),
|
||||||
|
repo: BaseRepository = Depends(get_repo),
|
||||||
|
) -> list[DeckyShardView]:
|
||||||
|
shards = await repo.list_decky_shards(host_uuid)
|
||||||
|
hosts = {h["uuid"]: h for h in await repo.list_swarm_hosts()}
|
||||||
|
|
||||||
|
out: list[DeckyShardView] = []
|
||||||
|
for s in shards:
|
||||||
|
if state and s.get("state") != state:
|
||||||
|
continue
|
||||||
|
host = hosts.get(s["host_uuid"], {})
|
||||||
|
out.append(DeckyShardView(
|
||||||
|
decky_name=s["decky_name"],
|
||||||
|
host_uuid=s["host_uuid"],
|
||||||
|
host_name=host.get("name") or "<unknown>",
|
||||||
|
host_address=host.get("address") or "",
|
||||||
|
host_status=host.get("status") or "unknown",
|
||||||
|
services=s.get("services") or [],
|
||||||
|
state=s.get("state") or "pending",
|
||||||
|
last_error=s.get("last_error"),
|
||||||
|
compose_hash=s.get("compose_hash"),
|
||||||
|
updated_at=s["updated_at"],
|
||||||
|
))
|
||||||
|
return out
|
||||||
26
decnet/web/router/swarm_mgmt/api_list_hosts.py
Normal file
26
decnet/web/router/swarm_mgmt/api_list_hosts.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
"""GET /swarm/hosts — admin-gated list of enrolled workers for the dashboard.
|
||||||
|
|
||||||
|
Thin wrapper over ``repo.list_swarm_hosts()`` — same shape as the
|
||||||
|
unauth'd controller route, but behind ``require_admin``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends
|
||||||
|
|
||||||
|
from decnet.web.db.models import SwarmHostView
|
||||||
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
from decnet.web.dependencies import get_repo, require_admin
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/hosts", response_model=list[SwarmHostView], tags=["Swarm Management"])
|
||||||
|
async def list_hosts(
|
||||||
|
host_status: Optional[str] = None,
|
||||||
|
admin: dict = Depends(require_admin),
|
||||||
|
repo: BaseRepository = Depends(get_repo),
|
||||||
|
) -> list[SwarmHostView]:
|
||||||
|
rows = await repo.list_swarm_hosts(host_status)
|
||||||
|
return [SwarmHostView(**r) for r in rows]
|
||||||
40
decnet/web/templates/enroll_bootstrap.sh.j2
Normal file
40
decnet/web/templates/enroll_bootstrap.sh.j2
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# DECNET bootstrap installer for agent {{ agent_name }} -> master {{ master_host }}.
|
||||||
|
# Fetches the code+certs payload, installs, and starts the agent daemon.
|
||||||
|
# Generated by the master at {{ generated_at }}. Expires {{ expires_at }}.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
[[ $EUID -eq 0 ]] || { echo "decnet-install: must run as root (use sudo)"; exit 1; }
|
||||||
|
for bin in python3 curl tar; do
|
||||||
|
command -v "$bin" >/dev/null || { echo "decnet-install: $bin required"; exit 1; }
|
||||||
|
done
|
||||||
|
|
||||||
|
WORK="$(mktemp -d)"
|
||||||
|
trap 'rm -rf "$WORK"' EXIT
|
||||||
|
|
||||||
|
echo "[DECNET] fetching payload..."
|
||||||
|
curl -fsSL "{{ tarball_url }}" | tar -xz -C "$WORK"
|
||||||
|
|
||||||
|
INSTALL_DIR=/opt/decnet
|
||||||
|
mkdir -p "$INSTALL_DIR"
|
||||||
|
cp -a "$WORK/." "$INSTALL_DIR/"
|
||||||
|
cd "$INSTALL_DIR"
|
||||||
|
|
||||||
|
echo "[DECNET] building venv..."
|
||||||
|
python3 -m venv .venv
|
||||||
|
.venv/bin/pip install -q --upgrade pip
|
||||||
|
.venv/bin/pip install -q -e .
|
||||||
|
|
||||||
|
install -Dm0644 etc/decnet/decnet.ini /etc/decnet/decnet.ini
|
||||||
|
[[ -f services.ini ]] && install -Dm0644 services.ini /etc/decnet/services.ini
|
||||||
|
|
||||||
|
REAL_USER="${SUDO_USER:-root}"
|
||||||
|
REAL_HOME="$(getent passwd "$REAL_USER" | cut -d: -f6)"
|
||||||
|
for f in ca.crt worker.crt worker.key; do
|
||||||
|
install -Dm0600 -o "$REAL_USER" -g "$REAL_USER" \
|
||||||
|
"home/.decnet/agent/$f" "$REAL_HOME/.decnet/agent/$f"
|
||||||
|
done
|
||||||
|
|
||||||
|
ln -sf "$INSTALL_DIR/.venv/bin/decnet" /usr/local/bin/decnet
|
||||||
|
sudo -u "$REAL_USER" /usr/local/bin/decnet agent --daemon
|
||||||
|
echo "[DECNET] agent {{ agent_name }} enrolled -> {{ master_host }}. Forwarder auto-spawned."
|
||||||
0
tests/api/swarm_mgmt/__init__.py
Normal file
0
tests/api/swarm_mgmt/__init__.py
Normal file
205
tests/api/swarm_mgmt/test_enroll_bundle.py
Normal file
205
tests/api/swarm_mgmt/test_enroll_bundle.py
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
"""Agent-enrollment bundle flow: POST → .sh → .tgz (one-shot, TTL, races)."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import io
|
||||||
|
import pathlib
|
||||||
|
import tarfile
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from decnet.swarm import pki
|
||||||
|
from decnet.web.router.swarm_mgmt import api_enroll_bundle as mod
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def isolate_bundle_state(tmp_path: pathlib.Path, monkeypatch):
|
||||||
|
"""Point BUNDLE_DIR + CA into tmp, clear the in-memory registry."""
|
||||||
|
monkeypatch.setattr(mod, "BUNDLE_DIR", tmp_path / "bundles")
|
||||||
|
monkeypatch.setattr(pki, "DEFAULT_CA_DIR", tmp_path / "ca")
|
||||||
|
mod._BUNDLES.clear()
|
||||||
|
if mod._SWEEPER_TASK is not None and not mod._SWEEPER_TASK.done():
|
||||||
|
mod._SWEEPER_TASK.cancel()
|
||||||
|
mod._SWEEPER_TASK = None
|
||||||
|
yield
|
||||||
|
# Cleanup sweeper task between tests so they don't accumulate.
|
||||||
|
if mod._SWEEPER_TASK is not None and not mod._SWEEPER_TASK.done():
|
||||||
|
mod._SWEEPER_TASK.cancel()
|
||||||
|
mod._SWEEPER_TASK = None
|
||||||
|
|
||||||
|
|
||||||
|
async def _post(client, auth_token, **overrides):
|
||||||
|
body = {"master_host": "10.0.0.50", "agent_name": "worker-a"}
|
||||||
|
body.update(overrides)
|
||||||
|
return await client.post(
|
||||||
|
"/api/v1/swarm/enroll-bundle",
|
||||||
|
headers={"Authorization": f"Bearer {auth_token}"},
|
||||||
|
json=body,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_create_bundle_returns_one_liner(client, auth_token):
|
||||||
|
resp = await _post(client, auth_token)
|
||||||
|
assert resp.status_code == 201, resp.text
|
||||||
|
body = resp.json()
|
||||||
|
assert body["token"]
|
||||||
|
assert body["host_uuid"]
|
||||||
|
assert body["command"].startswith("curl -fsSL ")
|
||||||
|
assert body["command"].endswith(" | sudo bash")
|
||||||
|
assert "&&" not in body["command"] # single pipe, no chaining
|
||||||
|
assert body["token"] in body["command"]
|
||||||
|
expires = datetime.fromisoformat(body["expires_at"].replace("Z", "+00:00"))
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
assert timedelta(minutes=4) < expires - now <= timedelta(minutes=5)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_duplicate_agent_name_409(client, auth_token):
|
||||||
|
r1 = await _post(client, auth_token, agent_name="dup-node")
|
||||||
|
assert r1.status_code == 201
|
||||||
|
r2 = await _post(client, auth_token, agent_name="dup-node")
|
||||||
|
assert r2.status_code == 409
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_non_admin_forbidden(client, viewer_token):
|
||||||
|
resp = await _post(client, viewer_token)
|
||||||
|
assert resp.status_code == 403
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_no_auth_401(client):
|
||||||
|
resp = await client.post(
|
||||||
|
"/api/v1/swarm/enroll-bundle",
|
||||||
|
json={"master_host": "10.0.0.50", "agent_name": "worker-a"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 401
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_invalid_agent_name_422(client, auth_token):
|
||||||
|
# Uppercase / underscore not allowed by the regex.
|
||||||
|
resp = await _post(client, auth_token, agent_name="Bad_Name")
|
||||||
|
assert resp.status_code in (400, 422)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_get_bootstrap_contains_expected(client, auth_token):
|
||||||
|
post = await _post(client, auth_token, agent_name="alpha", master_host="master.example")
|
||||||
|
token = post.json()["token"]
|
||||||
|
|
||||||
|
resp = await client.get(f"/api/v1/swarm/enroll-bundle/{token}.sh")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
text = resp.text
|
||||||
|
assert text.startswith("#!/usr/bin/env bash")
|
||||||
|
assert "alpha" in text
|
||||||
|
assert "master.example" in text
|
||||||
|
assert f"/api/v1/swarm/enroll-bundle/{token}.tgz" in text
|
||||||
|
# Script does NOT try to self-read with $0 (that would break under `curl | bash`).
|
||||||
|
assert 'tail -n +' not in text and 'awk' not in text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_get_bootstrap_is_idempotent_until_tgz_served(client, auth_token):
|
||||||
|
token = (await _post(client, auth_token, agent_name="beta")).json()["token"]
|
||||||
|
for _ in range(3):
|
||||||
|
assert (await client.get(f"/api/v1/swarm/enroll-bundle/{token}.sh")).status_code == 200
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_get_tgz_contents(client, auth_token, tmp_path):
|
||||||
|
token = (await _post(
|
||||||
|
client, auth_token,
|
||||||
|
agent_name="gamma", master_host="10.1.2.3",
|
||||||
|
services_ini="[general]\nnet = 10.0.0.0/24\n",
|
||||||
|
)).json()["token"]
|
||||||
|
|
||||||
|
resp = await client.get(f"/api/v1/swarm/enroll-bundle/{token}.tgz")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.headers["content-type"].startswith("application/gzip")
|
||||||
|
|
||||||
|
tf = tarfile.open(fileobj=io.BytesIO(resp.content), mode="r:gz")
|
||||||
|
names = set(tf.getnames())
|
||||||
|
|
||||||
|
# Required files
|
||||||
|
assert "etc/decnet/decnet.ini" in names
|
||||||
|
assert "home/.decnet/agent/ca.crt" in names
|
||||||
|
assert "home/.decnet/agent/worker.crt" in names
|
||||||
|
assert "home/.decnet/agent/worker.key" in names
|
||||||
|
assert "services.ini" in names
|
||||||
|
assert "decnet/cli.py" in names # source shipped
|
||||||
|
assert "pyproject.toml" in names
|
||||||
|
|
||||||
|
# Excluded paths must NOT be shipped
|
||||||
|
for bad in names:
|
||||||
|
assert not bad.startswith("tests/"), f"leaked test file: {bad}"
|
||||||
|
assert not bad.startswith("development/"), f"leaked dev file: {bad}"
|
||||||
|
assert not bad.startswith("wiki-checkout/"), f"leaked wiki file: {bad}"
|
||||||
|
assert "__pycache__" not in bad
|
||||||
|
assert not bad.endswith(".pyc")
|
||||||
|
assert "node_modules" not in bad
|
||||||
|
|
||||||
|
# INI content is correct
|
||||||
|
ini = tf.extractfile("etc/decnet/decnet.ini").read().decode()
|
||||||
|
assert "mode = agent" in ini
|
||||||
|
assert "master-host = 10.1.2.3" in ini
|
||||||
|
|
||||||
|
# Key is mode 0600
|
||||||
|
key_info = tf.getmember("home/.decnet/agent/worker.key")
|
||||||
|
assert (key_info.mode & 0o777) == 0o600
|
||||||
|
|
||||||
|
# Services INI is there
|
||||||
|
assert tf.extractfile("services.ini").read().decode().startswith("[general]")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_tgz_is_one_shot(client, auth_token):
|
||||||
|
token = (await _post(client, auth_token, agent_name="delta")).json()["token"]
|
||||||
|
r1 = await client.get(f"/api/v1/swarm/enroll-bundle/{token}.tgz")
|
||||||
|
assert r1.status_code == 200
|
||||||
|
r2 = await client.get(f"/api/v1/swarm/enroll-bundle/{token}.tgz")
|
||||||
|
assert r2.status_code == 404
|
||||||
|
# .sh also invalidated after .tgz served (the host is up; replay is pointless)
|
||||||
|
r3 = await client.get(f"/api/v1/swarm/enroll-bundle/{token}.sh")
|
||||||
|
assert r3.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_unknown_token_404(client):
|
||||||
|
assert (await client.get("/api/v1/swarm/enroll-bundle/not-a-real-token.sh")).status_code == 404
|
||||||
|
assert (await client.get("/api/v1/swarm/enroll-bundle/not-a-real-token.tgz")).status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_ttl_expiry_returns_404(client, auth_token, monkeypatch):
|
||||||
|
token = (await _post(client, auth_token, agent_name="epsilon")).json()["token"]
|
||||||
|
|
||||||
|
# Jump the clock 6 minutes into the future.
|
||||||
|
future = datetime.now(timezone.utc) + timedelta(minutes=6)
|
||||||
|
monkeypatch.setattr(mod, "_now", lambda: future)
|
||||||
|
|
||||||
|
assert (await client.get(f"/api/v1/swarm/enroll-bundle/{token}.sh")).status_code == 404
|
||||||
|
assert (await client.get(f"/api/v1/swarm/enroll-bundle/{token}.tgz")).status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_concurrent_tgz_exactly_one_wins(client, auth_token):
|
||||||
|
token = (await _post(client, auth_token, agent_name="zeta")).json()["token"]
|
||||||
|
url = f"/api/v1/swarm/enroll-bundle/{token}.tgz"
|
||||||
|
r1, r2 = await asyncio.gather(client.get(url), client.get(url))
|
||||||
|
statuses = sorted([r1.status_code, r2.status_code])
|
||||||
|
assert statuses == [200, 404]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_host_row_persisted_after_enroll(client, auth_token):
|
||||||
|
from decnet.web.dependencies import repo
|
||||||
|
resp = await _post(client, auth_token, agent_name="eta")
|
||||||
|
assert resp.status_code == 201
|
||||||
|
body = resp.json()
|
||||||
|
row = await repo.get_swarm_host_by_uuid(body["host_uuid"])
|
||||||
|
assert row is not None
|
||||||
|
assert row["name"] == "eta"
|
||||||
|
assert row["status"] == "enrolled"
|
||||||
Reference in New Issue
Block a user