From c6f7de30d22b5cd672fea57b73ded37bebc1f5bb Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 19 Apr 2026 04:25:57 -0400 Subject: [PATCH] feat(swarm-mgmt): agent enrollment bundle flow + admin swarm endpoints --- decnet/web/router/__init__.py | 4 + decnet/web/router/swarm_mgmt/__init__.py | 24 ++ .../swarm_mgmt/api_decommission_host.py | 41 ++ .../router/swarm_mgmt/api_enroll_bundle.py | 354 ++++++++++++++++++ .../web/router/swarm_mgmt/api_list_deckies.py | 42 +++ .../web/router/swarm_mgmt/api_list_hosts.py | 26 ++ decnet/web/templates/enroll_bootstrap.sh.j2 | 40 ++ tests/api/swarm_mgmt/__init__.py | 0 tests/api/swarm_mgmt/test_enroll_bundle.py | 205 ++++++++++ 9 files changed, 736 insertions(+) create mode 100644 decnet/web/router/swarm_mgmt/__init__.py create mode 100644 decnet/web/router/swarm_mgmt/api_decommission_host.py create mode 100644 decnet/web/router/swarm_mgmt/api_enroll_bundle.py create mode 100644 decnet/web/router/swarm_mgmt/api_list_deckies.py create mode 100644 decnet/web/router/swarm_mgmt/api_list_hosts.py create mode 100644 decnet/web/templates/enroll_bootstrap.sh.j2 create mode 100644 tests/api/swarm_mgmt/__init__.py create mode 100644 tests/api/swarm_mgmt/test_enroll_bundle.py diff --git a/decnet/web/router/__init__.py b/decnet/web/router/__init__.py index be2f063..ac92e7c 100644 --- a/decnet/web/router/__init__.py +++ b/decnet/web/router/__init__.py @@ -22,6 +22,7 @@ from .config.api_reinit import router as config_reinit_router from .health.api_get_health import router as health_router from .artifacts.api_get_artifact import router as artifacts_router from .swarm_updates import swarm_updates_router +from .swarm_mgmt import swarm_mgmt_router api_router = APIRouter() @@ -64,3 +65,6 @@ api_router.include_router(artifacts_router) # Remote Updates (dashboard → worker updater daemons) api_router.include_router(swarm_updates_router) + +# Swarm Management (dashboard: hosts, deckies, agent enrollment bundles) +api_router.include_router(swarm_mgmt_router) diff --git a/decnet/web/router/swarm_mgmt/__init__.py b/decnet/web/router/swarm_mgmt/__init__.py new file mode 100644 index 0000000..8936f79 --- /dev/null +++ b/decnet/web/router/swarm_mgmt/__init__.py @@ -0,0 +1,24 @@ +"""Swarm management endpoints for the React dashboard. + +These are *not* the unauthenticated /swarm routes mounted on the separate +swarm-controller process (decnet/web/swarm_api.py on port 8770). These +live on the main web API, go through ``require_admin``, and are the +interface the dashboard uses to list hosts, decommission them, list +deckies across the fleet, and generate one-shot agent-enrollment +bundles. + +Mounted under ``/api/v1/swarm`` by the main api router. +""" +from fastapi import APIRouter + +from .api_list_hosts import router as list_hosts_router +from .api_decommission_host import router as decommission_host_router +from .api_list_deckies import router as list_deckies_router +from .api_enroll_bundle import router as enroll_bundle_router + +swarm_mgmt_router = APIRouter(prefix="/swarm") + +swarm_mgmt_router.include_router(list_hosts_router) +swarm_mgmt_router.include_router(decommission_host_router) +swarm_mgmt_router.include_router(list_deckies_router) +swarm_mgmt_router.include_router(enroll_bundle_router) diff --git a/decnet/web/router/swarm_mgmt/api_decommission_host.py b/decnet/web/router/swarm_mgmt/api_decommission_host.py new file mode 100644 index 0000000..e39f055 --- /dev/null +++ b/decnet/web/router/swarm_mgmt/api_decommission_host.py @@ -0,0 +1,41 @@ +"""DELETE /swarm/hosts/{uuid} — decommission a worker from the dashboard.""" +from __future__ import annotations + +import pathlib + +from fastapi import APIRouter, Depends, HTTPException, status + +from decnet.web.db.repository import BaseRepository +from decnet.web.dependencies import get_repo, require_admin + +router = APIRouter() + + +@router.delete( + "/hosts/{uuid}", + status_code=status.HTTP_204_NO_CONTENT, + tags=["Swarm Management"], +) +async def decommission_host( + uuid: str, + admin: dict = Depends(require_admin), + repo: BaseRepository = Depends(get_repo), +) -> None: + row = await repo.get_swarm_host_by_uuid(uuid) + if row is None: + raise HTTPException(status_code=404, detail="host not found") + + await repo.delete_decky_shards_for_host(uuid) + await repo.delete_swarm_host(uuid) + + bundle_dir = pathlib.Path(row.get("cert_bundle_path") or "") + if bundle_dir.is_dir(): + for child in bundle_dir.iterdir(): + try: + child.unlink() + except OSError: + pass + try: + bundle_dir.rmdir() + except OSError: + pass diff --git a/decnet/web/router/swarm_mgmt/api_enroll_bundle.py b/decnet/web/router/swarm_mgmt/api_enroll_bundle.py new file mode 100644 index 0000000..31683c2 --- /dev/null +++ b/decnet/web/router/swarm_mgmt/api_enroll_bundle.py @@ -0,0 +1,354 @@ +"""Agent-enrollment bundles — the Wazuh-style one-liner flow. + +Three endpoints: + POST /swarm/enroll-bundle — admin issues certs + builds payload + GET /swarm/enroll-bundle/{t}.sh — bootstrap script (idempotent until .tgz) + GET /swarm/enroll-bundle/{t}.tgz — tarball payload (one-shot; trips served) + +The operator's paste is a single pipe ``curl -fsSL <.sh> | sudo bash``. +Under the hood the bootstrap curls the ``.tgz`` from the same token. +Both files are rendered + persisted on POST; the ``.tgz`` GET atomically +marks the token served, reads the bytes under the lock, and unlinks both +files so a sweeper cannot race it. Unclaimed tokens expire after 5 min. + +We avoid the single-self-extracting-script pattern because ``bash`` run +via pipe has ``$0 == "bash"`` — there is no file on disk to ``tail`` for +the embedded payload. Two URLs, one paste. +""" +from __future__ import annotations + +import asyncio +import fnmatch +import io +import os +import pathlib +import secrets +import tarfile +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Request, Response, status +from pydantic import BaseModel, Field + +from decnet.logging import get_logger +from decnet.swarm import pki +from decnet.web.db.repository import BaseRepository +from decnet.web.dependencies import get_repo, require_admin + +log = get_logger("swarm_mgmt.enroll_bundle") + +router = APIRouter() + +BUNDLE_TTL = timedelta(minutes=5) +BUNDLE_DIR = pathlib.Path(os.environ.get("DECNET_ENROLL_BUNDLE_DIR", "/tmp/decnet-enroll")) # nosec B108 - short-lived 0600 bundle cache, env-overridable +SWEEP_INTERVAL_SECS = 30 + +# Paths excluded from the bundled tarball. Matches the intent of +# decnet.swarm.tar_tree.DEFAULT_EXCLUDES but narrower — we never want +# tests, dev scaffolding, the master's DB, or the frontend source tree +# shipped to an agent. +_EXCLUDES: tuple[str, ...] = ( + ".venv", ".venv/*", "**/.venv/*", + "__pycache__", "**/__pycache__", "**/__pycache__/*", + ".git", ".git/*", + ".pytest_cache", ".pytest_cache/*", + ".mypy_cache", ".mypy_cache/*", + "*.egg-info", "*.egg-info/*", + "*.pyc", "*.pyo", + "*.db", "*.db-wal", "*.db-shm", "decnet.db*", + "*.log", + "tests", "tests/*", + "development", "development/*", + "wiki-checkout", "wiki-checkout/*", + "decnet_web/node_modules", "decnet_web/node_modules/*", + "decnet_web/src", "decnet_web/src/*", + "decnet-state.json", + "master.log", "master.json", + "decnet.tar", +) + + +# --------------------------------------------------------------------------- +# DTOs +# --------------------------------------------------------------------------- + +class EnrollBundleRequest(BaseModel): + master_host: str = Field(..., min_length=1, max_length=253, + description="IP/host the agent will reach back to") + agent_name: str = Field(..., pattern=r"^[a-z0-9][a-z0-9-]{0,62}$", + description="Worker name (DNS-label safe)") + services_ini: Optional[str] = Field( + default=None, + description="Optional INI text shipped to the agent as /etc/decnet/services.ini", + ) + + +class EnrollBundleResponse(BaseModel): + token: str + command: str + expires_at: datetime + host_uuid: str + + +# --------------------------------------------------------------------------- +# In-memory registry +# --------------------------------------------------------------------------- + +@dataclass +class _Bundle: + sh_path: pathlib.Path + tgz_path: pathlib.Path + expires_at: datetime + served: bool = False + + +_BUNDLES: dict[str, _Bundle] = {} +_LOCK = asyncio.Lock() +_SWEEPER_TASK: Optional[asyncio.Task] = None + + +async def _sweep_loop() -> None: + while True: + try: + await asyncio.sleep(SWEEP_INTERVAL_SECS) + now = datetime.now(timezone.utc) + async with _LOCK: + dead = [t for t, b in _BUNDLES.items() if b.served or b.expires_at <= now] + for t in dead: + b = _BUNDLES.pop(t) + for p in (b.sh_path, b.tgz_path): + try: + p.unlink() + except FileNotFoundError: + pass + except OSError as exc: + log.warning("enroll-bundle sweep unlink failed path=%s err=%s", p, exc) + except asyncio.CancelledError: + raise + except Exception: # noqa: BLE001 + log.exception("enroll-bundle sweeper iteration failed") + + +def _ensure_sweeper() -> None: + global _SWEEPER_TASK + if _SWEEPER_TASK is None or _SWEEPER_TASK.done(): + _SWEEPER_TASK = asyncio.create_task(_sweep_loop()) + + +# --------------------------------------------------------------------------- +# Tarball construction +# --------------------------------------------------------------------------- + +def _repo_root() -> pathlib.Path: + # decnet/web/router/swarm_mgmt/api_enroll_bundle.py -> 4 parents = repo root. + return pathlib.Path(__file__).resolve().parents[4] + + +def _is_excluded(rel: str) -> bool: + parts = pathlib.PurePosixPath(rel).parts + for pat in _EXCLUDES: + if fnmatch.fnmatch(rel, pat): + return True + for i in range(1, len(parts) + 1): + if fnmatch.fnmatch("/".join(parts[:i]), pat): + return True + return False + + +def _render_decnet_ini(master_host: str) -> bytes: + return ( + "; Generated by DECNET agent-enrollment bundle.\n" + "[decnet]\n" + "mode = agent\n" + "disallow-master = true\n" + "log-file-path = /var/log/decnet/decnet.log\n" + "\n" + "[agent]\n" + f"master-host = {master_host}\n" + "swarm-syslog-port = 6514\n" + "agent-port = 8765\n" + "agent-dir = /root/.decnet/agent\n" + "updater-dir = /root/.decnet/updater\n" + ).encode() + + +def _add_bytes(tar: tarfile.TarFile, name: str, data: bytes, mode: int = 0o644) -> None: + info = tarfile.TarInfo(name) + info.size = len(data) + info.mode = mode + info.mtime = int(datetime.now(timezone.utc).timestamp()) + tar.addfile(info, io.BytesIO(data)) + + +def _build_tarball( + master_host: str, + issued: pki.IssuedCert, + services_ini: Optional[str], +) -> bytes: + """Gzipped tarball with: + - full repo source (minus excludes) + - etc/decnet/decnet.ini (pre-baked for mode=agent) + - home/.decnet/agent/{ca.crt,worker.crt,worker.key} + - services.ini at root if provided + """ + root = _repo_root() + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tar: + for path in sorted(root.rglob("*")): + rel = path.relative_to(root).as_posix() + if _is_excluded(rel): + continue + if path.is_symlink() or path.is_dir(): + continue + tar.add(path, arcname=rel, recursive=False) + + _add_bytes(tar, "etc/decnet/decnet.ini", _render_decnet_ini(master_host)) + _add_bytes(tar, "home/.decnet/agent/ca.crt", issued.ca_cert_pem) + _add_bytes(tar, "home/.decnet/agent/worker.crt", issued.cert_pem) + _add_bytes(tar, "home/.decnet/agent/worker.key", issued.key_pem, mode=0o600) + + if services_ini: + _add_bytes(tar, "services.ini", services_ini.encode()) + + return buf.getvalue() + + +def _render_bootstrap( + agent_name: str, + master_host: str, + tarball_url: str, + expires_at: datetime, +) -> bytes: + tpl_path = pathlib.Path(__file__).resolve().parents[1].parent / "templates" / "enroll_bootstrap.sh.j2" + tpl = tpl_path.read_text() + now = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + rendered = ( + tpl.replace("{{ agent_name }}", agent_name) + .replace("{{ master_host }}", master_host) + .replace("{{ tarball_url }}", tarball_url) + .replace("{{ generated_at }}", now) + .replace("{{ expires_at }}", expires_at.replace(microsecond=0).isoformat()) + ) + return rendered.encode() + + +# --------------------------------------------------------------------------- +# Endpoints +# --------------------------------------------------------------------------- + +@router.post( + "/enroll-bundle", + response_model=EnrollBundleResponse, + status_code=status.HTTP_201_CREATED, + tags=["Swarm Management"], +) +async def create_enroll_bundle( + req: EnrollBundleRequest, + request: Request, + admin: dict = Depends(require_admin), + repo: BaseRepository = Depends(get_repo), +) -> EnrollBundleResponse: + import uuid as _uuid + + existing = await repo.get_swarm_host_by_name(req.agent_name) + if existing is not None: + raise HTTPException(status_code=409, detail=f"Worker '{req.agent_name}' is already enrolled") + + # 1. Issue certs (reuses the same code as /swarm/enroll). + ca = pki.ensure_ca() + sans = list({req.agent_name, req.master_host}) + issued = pki.issue_worker_cert(ca, req.agent_name, sans) + bundle_dir = pki.DEFAULT_CA_DIR / "workers" / req.agent_name + pki.write_worker_bundle(issued, bundle_dir) + + # 2. Register the host row so it shows up in SwarmHosts immediately. + host_uuid = str(_uuid.uuid4()) + await repo.add_swarm_host( + { + "uuid": host_uuid, + "name": req.agent_name, + "address": req.master_host, # placeholder; agent overwrites on first heartbeat + "agent_port": 8765, + "status": "enrolled", + "client_cert_fingerprint": issued.fingerprint_sha256, + "updater_cert_fingerprint": None, + "cert_bundle_path": str(bundle_dir), + "enrolled_at": datetime.now(timezone.utc), + "notes": "enrolled via UI bundle", + } + ) + + # 3. Render payload + bootstrap. + tarball = _build_tarball(req.master_host, issued, req.services_ini) + token = secrets.token_urlsafe(24) + expires_at = datetime.now(timezone.utc) + BUNDLE_TTL + + BUNDLE_DIR.mkdir(parents=True, exist_ok=True, mode=0o700) + sh_path = BUNDLE_DIR / f"{token}.sh" + tgz_path = BUNDLE_DIR / f"{token}.tgz" + + base = str(request.base_url).rstrip("/") + tarball_url = f"{base}/api/v1/swarm/enroll-bundle/{token}.tgz" + bootstrap_url = f"{base}/api/v1/swarm/enroll-bundle/{token}.sh" + script = _render_bootstrap(req.agent_name, req.master_host, tarball_url, expires_at) + + tgz_path.write_bytes(tarball) + sh_path.write_bytes(script) + os.chmod(tgz_path, 0o600) + os.chmod(sh_path, 0o600) + + async with _LOCK: + _BUNDLES[token] = _Bundle(sh_path=sh_path, tgz_path=tgz_path, expires_at=expires_at) + _ensure_sweeper() + + log.info("enroll-bundle created agent=%s master=%s token=%s...", req.agent_name, req.master_host, token[:8]) + + return EnrollBundleResponse( + token=token, + command=f"curl -fsSL {bootstrap_url} | sudo bash", + expires_at=expires_at, + host_uuid=host_uuid, + ) + + +def _now() -> datetime: + # Indirection so tests can monkeypatch. + return datetime.now(timezone.utc) + + +async def _lookup_live(token: str) -> _Bundle: + b = _BUNDLES.get(token) + if b is None or b.served or b.expires_at <= _now(): + raise HTTPException(status_code=404, detail="bundle not found or expired") + return b + + +@router.get( + "/enroll-bundle/{token}.sh", + tags=["Swarm Management"], + include_in_schema=False, +) +async def get_bootstrap(token: str) -> Response: + async with _LOCK: + b = await _lookup_live(token) + data = b.sh_path.read_bytes() + return Response(content=data, media_type="text/x-shellscript") + + +@router.get( + "/enroll-bundle/{token}.tgz", + tags=["Swarm Management"], + include_in_schema=False, +) +async def get_payload(token: str) -> Response: + async with _LOCK: + b = await _lookup_live(token) + b.served = True + data = b.tgz_path.read_bytes() + for p in (b.sh_path, b.tgz_path): + try: + p.unlink() + except FileNotFoundError: + pass + return Response(content=data, media_type="application/gzip") diff --git a/decnet/web/router/swarm_mgmt/api_list_deckies.py b/decnet/web/router/swarm_mgmt/api_list_deckies.py new file mode 100644 index 0000000..9ef9a48 --- /dev/null +++ b/decnet/web/router/swarm_mgmt/api_list_deckies.py @@ -0,0 +1,42 @@ +"""GET /swarm/deckies — admin-gated list of decky shards across the fleet.""" +from __future__ import annotations + +from typing import Optional + +from fastapi import APIRouter, Depends + +from decnet.web.db.models import DeckyShardView +from decnet.web.db.repository import BaseRepository +from decnet.web.dependencies import get_repo, require_admin + +router = APIRouter() + + +@router.get("/deckies", response_model=list[DeckyShardView], tags=["Swarm Management"]) +async def list_deckies( + host_uuid: Optional[str] = None, + state: Optional[str] = None, + admin: dict = Depends(require_admin), + repo: BaseRepository = Depends(get_repo), +) -> list[DeckyShardView]: + shards = await repo.list_decky_shards(host_uuid) + hosts = {h["uuid"]: h for h in await repo.list_swarm_hosts()} + + out: list[DeckyShardView] = [] + for s in shards: + if state and s.get("state") != state: + continue + host = hosts.get(s["host_uuid"], {}) + out.append(DeckyShardView( + decky_name=s["decky_name"], + host_uuid=s["host_uuid"], + host_name=host.get("name") or "", + host_address=host.get("address") or "", + host_status=host.get("status") or "unknown", + services=s.get("services") or [], + state=s.get("state") or "pending", + last_error=s.get("last_error"), + compose_hash=s.get("compose_hash"), + updated_at=s["updated_at"], + )) + return out diff --git a/decnet/web/router/swarm_mgmt/api_list_hosts.py b/decnet/web/router/swarm_mgmt/api_list_hosts.py new file mode 100644 index 0000000..81c3a9e --- /dev/null +++ b/decnet/web/router/swarm_mgmt/api_list_hosts.py @@ -0,0 +1,26 @@ +"""GET /swarm/hosts — admin-gated list of enrolled workers for the dashboard. + +Thin wrapper over ``repo.list_swarm_hosts()`` — same shape as the +unauth'd controller route, but behind ``require_admin``. +""" +from __future__ import annotations + +from typing import Optional + +from fastapi import APIRouter, Depends + +from decnet.web.db.models import SwarmHostView +from decnet.web.db.repository import BaseRepository +from decnet.web.dependencies import get_repo, require_admin + +router = APIRouter() + + +@router.get("/hosts", response_model=list[SwarmHostView], tags=["Swarm Management"]) +async def list_hosts( + host_status: Optional[str] = None, + admin: dict = Depends(require_admin), + repo: BaseRepository = Depends(get_repo), +) -> list[SwarmHostView]: + rows = await repo.list_swarm_hosts(host_status) + return [SwarmHostView(**r) for r in rows] diff --git a/decnet/web/templates/enroll_bootstrap.sh.j2 b/decnet/web/templates/enroll_bootstrap.sh.j2 new file mode 100644 index 0000000..ec3bf59 --- /dev/null +++ b/decnet/web/templates/enroll_bootstrap.sh.j2 @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# DECNET bootstrap installer for agent {{ agent_name }} -> master {{ master_host }}. +# Fetches the code+certs payload, installs, and starts the agent daemon. +# Generated by the master at {{ generated_at }}. Expires {{ expires_at }}. +set -euo pipefail + +[[ $EUID -eq 0 ]] || { echo "decnet-install: must run as root (use sudo)"; exit 1; } +for bin in python3 curl tar; do + command -v "$bin" >/dev/null || { echo "decnet-install: $bin required"; exit 1; } +done + +WORK="$(mktemp -d)" +trap 'rm -rf "$WORK"' EXIT + +echo "[DECNET] fetching payload..." +curl -fsSL "{{ tarball_url }}" | tar -xz -C "$WORK" + +INSTALL_DIR=/opt/decnet +mkdir -p "$INSTALL_DIR" +cp -a "$WORK/." "$INSTALL_DIR/" +cd "$INSTALL_DIR" + +echo "[DECNET] building venv..." +python3 -m venv .venv +.venv/bin/pip install -q --upgrade pip +.venv/bin/pip install -q -e . + +install -Dm0644 etc/decnet/decnet.ini /etc/decnet/decnet.ini +[[ -f services.ini ]] && install -Dm0644 services.ini /etc/decnet/services.ini + +REAL_USER="${SUDO_USER:-root}" +REAL_HOME="$(getent passwd "$REAL_USER" | cut -d: -f6)" +for f in ca.crt worker.crt worker.key; do + install -Dm0600 -o "$REAL_USER" -g "$REAL_USER" \ + "home/.decnet/agent/$f" "$REAL_HOME/.decnet/agent/$f" +done + +ln -sf "$INSTALL_DIR/.venv/bin/decnet" /usr/local/bin/decnet +sudo -u "$REAL_USER" /usr/local/bin/decnet agent --daemon +echo "[DECNET] agent {{ agent_name }} enrolled -> {{ master_host }}. Forwarder auto-spawned." diff --git a/tests/api/swarm_mgmt/__init__.py b/tests/api/swarm_mgmt/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/api/swarm_mgmt/test_enroll_bundle.py b/tests/api/swarm_mgmt/test_enroll_bundle.py new file mode 100644 index 0000000..6e9065d --- /dev/null +++ b/tests/api/swarm_mgmt/test_enroll_bundle.py @@ -0,0 +1,205 @@ +"""Agent-enrollment bundle flow: POST → .sh → .tgz (one-shot, TTL, races).""" +from __future__ import annotations + +import asyncio +import io +import pathlib +import tarfile +from datetime import datetime, timedelta, timezone + +import pytest + +from decnet.swarm import pki +from decnet.web.router.swarm_mgmt import api_enroll_bundle as mod + + +@pytest.fixture(autouse=True) +def isolate_bundle_state(tmp_path: pathlib.Path, monkeypatch): + """Point BUNDLE_DIR + CA into tmp, clear the in-memory registry.""" + monkeypatch.setattr(mod, "BUNDLE_DIR", tmp_path / "bundles") + monkeypatch.setattr(pki, "DEFAULT_CA_DIR", tmp_path / "ca") + mod._BUNDLES.clear() + if mod._SWEEPER_TASK is not None and not mod._SWEEPER_TASK.done(): + mod._SWEEPER_TASK.cancel() + mod._SWEEPER_TASK = None + yield + # Cleanup sweeper task between tests so they don't accumulate. + if mod._SWEEPER_TASK is not None and not mod._SWEEPER_TASK.done(): + mod._SWEEPER_TASK.cancel() + mod._SWEEPER_TASK = None + + +async def _post(client, auth_token, **overrides): + body = {"master_host": "10.0.0.50", "agent_name": "worker-a"} + body.update(overrides) + return await client.post( + "/api/v1/swarm/enroll-bundle", + headers={"Authorization": f"Bearer {auth_token}"}, + json=body, + ) + + +@pytest.mark.anyio +async def test_create_bundle_returns_one_liner(client, auth_token): + resp = await _post(client, auth_token) + assert resp.status_code == 201, resp.text + body = resp.json() + assert body["token"] + assert body["host_uuid"] + assert body["command"].startswith("curl -fsSL ") + assert body["command"].endswith(" | sudo bash") + assert "&&" not in body["command"] # single pipe, no chaining + assert body["token"] in body["command"] + expires = datetime.fromisoformat(body["expires_at"].replace("Z", "+00:00")) + now = datetime.now(timezone.utc) + assert timedelta(minutes=4) < expires - now <= timedelta(minutes=5) + + +@pytest.mark.anyio +async def test_duplicate_agent_name_409(client, auth_token): + r1 = await _post(client, auth_token, agent_name="dup-node") + assert r1.status_code == 201 + r2 = await _post(client, auth_token, agent_name="dup-node") + assert r2.status_code == 409 + + +@pytest.mark.anyio +async def test_non_admin_forbidden(client, viewer_token): + resp = await _post(client, viewer_token) + assert resp.status_code == 403 + + +@pytest.mark.anyio +async def test_no_auth_401(client): + resp = await client.post( + "/api/v1/swarm/enroll-bundle", + json={"master_host": "10.0.0.50", "agent_name": "worker-a"}, + ) + assert resp.status_code == 401 + + +@pytest.mark.anyio +async def test_invalid_agent_name_422(client, auth_token): + # Uppercase / underscore not allowed by the regex. + resp = await _post(client, auth_token, agent_name="Bad_Name") + assert resp.status_code in (400, 422) + + +@pytest.mark.anyio +async def test_get_bootstrap_contains_expected(client, auth_token): + post = await _post(client, auth_token, agent_name="alpha", master_host="master.example") + token = post.json()["token"] + + resp = await client.get(f"/api/v1/swarm/enroll-bundle/{token}.sh") + assert resp.status_code == 200 + text = resp.text + assert text.startswith("#!/usr/bin/env bash") + assert "alpha" in text + assert "master.example" in text + assert f"/api/v1/swarm/enroll-bundle/{token}.tgz" in text + # Script does NOT try to self-read with $0 (that would break under `curl | bash`). + assert 'tail -n +' not in text and 'awk' not in text + + +@pytest.mark.anyio +async def test_get_bootstrap_is_idempotent_until_tgz_served(client, auth_token): + token = (await _post(client, auth_token, agent_name="beta")).json()["token"] + for _ in range(3): + assert (await client.get(f"/api/v1/swarm/enroll-bundle/{token}.sh")).status_code == 200 + + +@pytest.mark.anyio +async def test_get_tgz_contents(client, auth_token, tmp_path): + token = (await _post( + client, auth_token, + agent_name="gamma", master_host="10.1.2.3", + services_ini="[general]\nnet = 10.0.0.0/24\n", + )).json()["token"] + + resp = await client.get(f"/api/v1/swarm/enroll-bundle/{token}.tgz") + assert resp.status_code == 200 + assert resp.headers["content-type"].startswith("application/gzip") + + tf = tarfile.open(fileobj=io.BytesIO(resp.content), mode="r:gz") + names = set(tf.getnames()) + + # Required files + assert "etc/decnet/decnet.ini" in names + assert "home/.decnet/agent/ca.crt" in names + assert "home/.decnet/agent/worker.crt" in names + assert "home/.decnet/agent/worker.key" in names + assert "services.ini" in names + assert "decnet/cli.py" in names # source shipped + assert "pyproject.toml" in names + + # Excluded paths must NOT be shipped + for bad in names: + assert not bad.startswith("tests/"), f"leaked test file: {bad}" + assert not bad.startswith("development/"), f"leaked dev file: {bad}" + assert not bad.startswith("wiki-checkout/"), f"leaked wiki file: {bad}" + assert "__pycache__" not in bad + assert not bad.endswith(".pyc") + assert "node_modules" not in bad + + # INI content is correct + ini = tf.extractfile("etc/decnet/decnet.ini").read().decode() + assert "mode = agent" in ini + assert "master-host = 10.1.2.3" in ini + + # Key is mode 0600 + key_info = tf.getmember("home/.decnet/agent/worker.key") + assert (key_info.mode & 0o777) == 0o600 + + # Services INI is there + assert tf.extractfile("services.ini").read().decode().startswith("[general]") + + +@pytest.mark.anyio +async def test_tgz_is_one_shot(client, auth_token): + token = (await _post(client, auth_token, agent_name="delta")).json()["token"] + r1 = await client.get(f"/api/v1/swarm/enroll-bundle/{token}.tgz") + assert r1.status_code == 200 + r2 = await client.get(f"/api/v1/swarm/enroll-bundle/{token}.tgz") + assert r2.status_code == 404 + # .sh also invalidated after .tgz served (the host is up; replay is pointless) + r3 = await client.get(f"/api/v1/swarm/enroll-bundle/{token}.sh") + assert r3.status_code == 404 + + +@pytest.mark.anyio +async def test_unknown_token_404(client): + assert (await client.get("/api/v1/swarm/enroll-bundle/not-a-real-token.sh")).status_code == 404 + assert (await client.get("/api/v1/swarm/enroll-bundle/not-a-real-token.tgz")).status_code == 404 + + +@pytest.mark.anyio +async def test_ttl_expiry_returns_404(client, auth_token, monkeypatch): + token = (await _post(client, auth_token, agent_name="epsilon")).json()["token"] + + # Jump the clock 6 minutes into the future. + future = datetime.now(timezone.utc) + timedelta(minutes=6) + monkeypatch.setattr(mod, "_now", lambda: future) + + assert (await client.get(f"/api/v1/swarm/enroll-bundle/{token}.sh")).status_code == 404 + assert (await client.get(f"/api/v1/swarm/enroll-bundle/{token}.tgz")).status_code == 404 + + +@pytest.mark.anyio +async def test_concurrent_tgz_exactly_one_wins(client, auth_token): + token = (await _post(client, auth_token, agent_name="zeta")).json()["token"] + url = f"/api/v1/swarm/enroll-bundle/{token}.tgz" + r1, r2 = await asyncio.gather(client.get(url), client.get(url)) + statuses = sorted([r1.status_code, r2.status_code]) + assert statuses == [200, 404] + + +@pytest.mark.anyio +async def test_host_row_persisted_after_enroll(client, auth_token): + from decnet.web.dependencies import repo + resp = await _post(client, auth_token, agent_name="eta") + assert resp.status_code == 201 + body = resp.json() + row = await repo.get_swarm_host_by_uuid(body["host_uuid"]) + assert row is not None + assert row["name"] == "eta" + assert row["status"] == "enrolled"