feat(web): Remote Updates API — dashboard endpoints for pushing code to workers
Adds /api/v1/swarm-updates/{hosts,push,push-self,rollback} behind
require_admin. Reuses the existing UpdaterClient + tar_working_tree + the
per-host asyncio.gather pattern from api_deploy_swarm.py; tarball is
built exactly once per /push request and fanned out to every selected
worker. /hosts filters out decommissioned hosts and agent-only
enrollments (no updater bundle = not a target).
Connection drops during /update-self are treated as success — the
updater re-execs itself mid-response, so httpx always raises.
Pydantic models live in decnet/web/db/models.py (single source of
truth). 24 tests cover happy paths, rollback, transport failures,
include_self ordering (skip on rolled-back agents), validation, and
RBAC gating.
This commit is contained in:
@@ -373,3 +373,72 @@ class SwarmHostHealth(BaseModel):
|
||||
|
||||
class SwarmCheckResponse(BaseModel):
|
||||
results: list[SwarmHostHealth]
|
||||
|
||||
|
||||
# --- Remote Updates (master → worker /updater) DTOs ---
|
||||
# Powers the dashboard's Remote Updates page. The master dashboard calls
|
||||
# these (auth-gated) endpoints; internally they fan out to each worker's
|
||||
# updater daemon over mTLS via UpdaterClient.
|
||||
|
||||
class HostReleaseInfo(BaseModel):
|
||||
host_uuid: str
|
||||
host_name: str
|
||||
address: str
|
||||
reachable: bool
|
||||
# These fields mirror the updater's /health payload when reachable; they
|
||||
# are all Optional so an unreachable host still serializes cleanly.
|
||||
agent_status: Optional[str] = None
|
||||
current_sha: Optional[str] = None
|
||||
previous_sha: Optional[str] = None
|
||||
releases: list[dict[str, Any]] = PydanticField(default_factory=list)
|
||||
detail: Optional[str] = None # populated when unreachable
|
||||
|
||||
|
||||
class HostReleasesResponse(BaseModel):
|
||||
hosts: list[HostReleaseInfo]
|
||||
|
||||
|
||||
class PushUpdateRequest(BaseModel):
|
||||
host_uuids: Optional[list[str]] = PydanticField(
|
||||
default=None,
|
||||
description="Target specific hosts; mutually exclusive with 'all'.",
|
||||
)
|
||||
all: bool = PydanticField(default=False, description="Target every non-decommissioned host with an updater bundle.")
|
||||
include_self: bool = PydanticField(
|
||||
default=False,
|
||||
description="After a successful /update, also push /update-self to upgrade the updater itself.",
|
||||
)
|
||||
exclude: list[str] = PydanticField(
|
||||
default_factory=list,
|
||||
description="Additional tarball exclude globs (on top of the built-in defaults).",
|
||||
)
|
||||
|
||||
|
||||
class PushUpdateResult(BaseModel):
|
||||
host_uuid: str
|
||||
host_name: str
|
||||
# updated = /update 200. rolled-back = /update 409 (auto-recovered).
|
||||
# failed = transport error or non-200/409 response. self-updated = /update-self succeeded.
|
||||
status: Literal["updated", "rolled-back", "failed", "self-updated", "self-failed"]
|
||||
http_status: Optional[int] = None
|
||||
sha: Optional[str] = None
|
||||
detail: Optional[str] = None
|
||||
stderr: Optional[str] = None
|
||||
|
||||
|
||||
class PushUpdateResponse(BaseModel):
|
||||
sha: str
|
||||
tarball_bytes: int
|
||||
results: list[PushUpdateResult]
|
||||
|
||||
|
||||
class RollbackRequest(BaseModel):
|
||||
host_uuid: str = PydanticField(..., description="Host to roll back to its previous release slot.")
|
||||
|
||||
|
||||
class RollbackResponse(BaseModel):
|
||||
host_uuid: str
|
||||
host_name: str
|
||||
status: Literal["rolled-back", "failed"]
|
||||
http_status: Optional[int] = None
|
||||
detail: Optional[str] = None
|
||||
|
||||
@@ -21,6 +21,7 @@ from .config.api_manage_users import router as config_users_router
|
||||
from .config.api_reinit import router as config_reinit_router
|
||||
from .health.api_get_health import router as health_router
|
||||
from .artifacts.api_get_artifact import router as artifacts_router
|
||||
from .swarm_updates import swarm_updates_router
|
||||
|
||||
api_router = APIRouter()
|
||||
|
||||
@@ -60,3 +61,6 @@ api_router.include_router(config_reinit_router)
|
||||
|
||||
# Artifacts (captured attacker file drops)
|
||||
api_router.include_router(artifacts_router)
|
||||
|
||||
# Remote Updates (dashboard → worker updater daemons)
|
||||
api_router.include_router(swarm_updates_router)
|
||||
|
||||
23
decnet/web/router/swarm_updates/__init__.py
Normal file
23
decnet/web/router/swarm_updates/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""Remote Updates — master dashboard's surface for pushing code to workers.
|
||||
|
||||
These are *not* the swarm-controller's /swarm routes (those run on a
|
||||
separate process, auth-free, internal-only). They live on the main web
|
||||
API, go through ``require_admin``, and are the interface the React
|
||||
dashboard calls to fan updates out to worker ``decnet updater`` daemons
|
||||
via ``UpdaterClient``.
|
||||
|
||||
Mounted under ``/api/v1/swarm-updates`` by the main api router.
|
||||
"""
|
||||
from fastapi import APIRouter
|
||||
|
||||
from .api_list_host_releases import router as list_host_releases_router
|
||||
from .api_push_update import router as push_update_router
|
||||
from .api_push_update_self import router as push_update_self_router
|
||||
from .api_rollback_host import router as rollback_host_router
|
||||
|
||||
swarm_updates_router = APIRouter(prefix="/swarm-updates")
|
||||
|
||||
swarm_updates_router.include_router(list_host_releases_router)
|
||||
swarm_updates_router.include_router(push_update_router)
|
||||
swarm_updates_router.include_router(push_update_self_router)
|
||||
swarm_updates_router.include_router(rollback_host_router)
|
||||
82
decnet/web/router/swarm_updates/api_list_host_releases.py
Normal file
82
decnet/web/router/swarm_updates/api_list_host_releases.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""GET /swarm-updates/hosts — per-host updater health + release slots.
|
||||
|
||||
Fans out an ``UpdaterClient.health()`` probe to every enrolled host that
|
||||
has an updater bundle. Each probe is isolated: a single unreachable host
|
||||
never fails the whole list (that's normal partial-failure behaviour for
|
||||
a fleet view).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm.updater_client import UpdaterClient
|
||||
from decnet.web.db.models import HostReleaseInfo, HostReleasesResponse
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
from decnet.web.dependencies import get_repo, require_admin
|
||||
|
||||
log = get_logger("swarm_updates.list")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _extract_shas(releases: list[dict[str, Any]]) -> tuple[str | None, str | None]:
|
||||
"""Pick the (current, previous) SHA from the updater's releases list.
|
||||
|
||||
The updater reports releases as ``[{"slot": "active"|"prev", "sha": ...,
|
||||
...}]`` in no guaranteed order, so pull by slot name rather than index.
|
||||
"""
|
||||
current = next((r.get("sha") for r in releases if r.get("slot") == "active"), None)
|
||||
previous = next((r.get("sha") for r in releases if r.get("slot") == "prev"), None)
|
||||
return current, previous
|
||||
|
||||
|
||||
async def _probe_host(host: dict[str, Any]) -> HostReleaseInfo:
|
||||
try:
|
||||
async with UpdaterClient(host=host) as u:
|
||||
body = await u.health()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return HostReleaseInfo(
|
||||
host_uuid=host["uuid"],
|
||||
host_name=host["name"],
|
||||
address=host["address"],
|
||||
reachable=False,
|
||||
detail=f"{type(exc).__name__}: {exc}",
|
||||
)
|
||||
releases = body.get("releases") or []
|
||||
current, previous = _extract_shas(releases)
|
||||
return HostReleaseInfo(
|
||||
host_uuid=host["uuid"],
|
||||
host_name=host["name"],
|
||||
address=host["address"],
|
||||
reachable=True,
|
||||
agent_status=body.get("agent_status") or body.get("status"),
|
||||
current_sha=current,
|
||||
previous_sha=previous,
|
||||
releases=releases,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/hosts",
|
||||
response_model=HostReleasesResponse,
|
||||
tags=["Swarm Updates"],
|
||||
)
|
||||
async def api_list_host_releases(
|
||||
admin: dict = Depends(require_admin),
|
||||
repo: BaseRepository = Depends(get_repo),
|
||||
) -> HostReleasesResponse:
|
||||
rows = await repo.list_swarm_hosts()
|
||||
# Only hosts actually capable of receiving updates — decommissioned
|
||||
# hosts and agent-only enrollments are filtered out.
|
||||
targets = [
|
||||
r for r in rows
|
||||
if r.get("status") != "decommissioned" and r.get("updater_cert_fingerprint")
|
||||
]
|
||||
if not targets:
|
||||
return HostReleasesResponse(hosts=[])
|
||||
results = await asyncio.gather(*(_probe_host(h) for h in targets))
|
||||
return HostReleasesResponse(hosts=list(results))
|
||||
152
decnet/web/router/swarm_updates/api_push_update.py
Normal file
152
decnet/web/router/swarm_updates/api_push_update.py
Normal file
@@ -0,0 +1,152 @@
|
||||
"""POST /swarm-updates/push — fan a tarball of the master's tree to workers.
|
||||
|
||||
Mirrors the ``decnet swarm update`` CLI flow: build the tarball once,
|
||||
dispatch concurrently, collect per-host statuses. Returns HTTP 200 even
|
||||
when individual hosts failed — the operator reads per-host ``status``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import pathlib
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm.tar_tree import detect_git_sha, tar_working_tree
|
||||
from decnet.swarm.updater_client import UpdaterClient
|
||||
from decnet.web.db.models import PushUpdateRequest, PushUpdateResponse, PushUpdateResult
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
from decnet.web.dependencies import get_repo, require_admin
|
||||
|
||||
log = get_logger("swarm_updates.push")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _master_tree_root() -> pathlib.Path:
|
||||
"""Resolve the master's install tree to tar.
|
||||
|
||||
Walks up from this file: ``decnet/web/router/swarm_updates/`` → 3 parents
|
||||
lands on the repo root. Matches the layout shipped via ``pip install -e .``
|
||||
and the dev checkout at ``~/Tools/DECNET``.
|
||||
"""
|
||||
return pathlib.Path(__file__).resolve().parents[4]
|
||||
|
||||
|
||||
def _classify_update(status_code: int) -> str:
|
||||
if status_code == 200:
|
||||
return "updated"
|
||||
if status_code == 409:
|
||||
return "rolled-back"
|
||||
return "failed"
|
||||
|
||||
|
||||
async def _resolve_targets(
|
||||
repo: BaseRepository,
|
||||
req: PushUpdateRequest,
|
||||
) -> list[dict[str, Any]]:
|
||||
if req.all == bool(req.host_uuids):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Specify exactly one of host_uuids or all=true.",
|
||||
)
|
||||
rows = await repo.list_swarm_hosts()
|
||||
rows = [r for r in rows if r.get("updater_cert_fingerprint")]
|
||||
if req.all:
|
||||
targets = [r for r in rows if r.get("status") != "decommissioned"]
|
||||
else:
|
||||
wanted = set(req.host_uuids or [])
|
||||
targets = [r for r in rows if r["uuid"] in wanted]
|
||||
missing = wanted - {r["uuid"] for r in targets}
|
||||
if missing:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Unknown or updater-less host(s): {sorted(missing)}",
|
||||
)
|
||||
if not targets:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="No targets: no enrolled hosts have an updater bundle.",
|
||||
)
|
||||
return targets
|
||||
|
||||
|
||||
async def _push_one(
|
||||
host: dict[str, Any],
|
||||
tarball: bytes,
|
||||
sha: str,
|
||||
include_self: bool,
|
||||
) -> PushUpdateResult:
|
||||
try:
|
||||
async with UpdaterClient(host=host) as u:
|
||||
r = await u.update(tarball, sha=sha)
|
||||
body = r.json() if r.content else {}
|
||||
status = _classify_update(r.status_code)
|
||||
stderr = body.get("stderr") if isinstance(body, dict) else None
|
||||
|
||||
if include_self and r.status_code == 200:
|
||||
# Agent first, updater second — a broken updater push must never
|
||||
# strand the fleet on an old agent.
|
||||
try:
|
||||
rs = await u.update_self(tarball, sha=sha)
|
||||
self_ok = rs.status_code in (200, 0) # 0 = connection dropped (expected)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
# Connection drop on update-self is expected and not an error.
|
||||
self_ok = _is_expected_connection_drop(exc)
|
||||
if not self_ok:
|
||||
return PushUpdateResult(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status="self-failed", http_status=r.status_code, sha=sha,
|
||||
detail=f"agent updated OK but self-update failed: {exc}",
|
||||
stderr=stderr,
|
||||
)
|
||||
status = "self-updated" if self_ok else "self-failed"
|
||||
|
||||
return PushUpdateResult(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status=status, http_status=r.status_code, sha=sha,
|
||||
detail=body.get("error") or body.get("probe") if isinstance(body, dict) else None,
|
||||
stderr=stderr,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.exception("swarm_updates.push failed host=%s", host.get("name"))
|
||||
return PushUpdateResult(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status="failed",
|
||||
detail=f"{type(exc).__name__}: {exc}",
|
||||
)
|
||||
|
||||
|
||||
def _is_expected_connection_drop(exc: BaseException) -> bool:
|
||||
"""update-self re-execs the updater mid-response; httpx raises on the drop."""
|
||||
import httpx
|
||||
return isinstance(exc, (httpx.RemoteProtocolError, httpx.ReadError, httpx.ConnectError))
|
||||
|
||||
|
||||
@router.post(
|
||||
"/push",
|
||||
response_model=PushUpdateResponse,
|
||||
tags=["Swarm Updates"],
|
||||
)
|
||||
async def api_push_update(
|
||||
req: PushUpdateRequest,
|
||||
admin: dict = Depends(require_admin),
|
||||
repo: BaseRepository = Depends(get_repo),
|
||||
) -> PushUpdateResponse:
|
||||
targets = await _resolve_targets(repo, req)
|
||||
tree_root = _master_tree_root()
|
||||
sha = detect_git_sha(tree_root)
|
||||
tarball = tar_working_tree(tree_root, extra_excludes=req.exclude)
|
||||
log.info(
|
||||
"swarm_updates.push sha=%s tarball=%d hosts=%d include_self=%s",
|
||||
sha or "(not a git repo)", len(tarball), len(targets), req.include_self,
|
||||
)
|
||||
results = await asyncio.gather(
|
||||
*(_push_one(h, tarball, sha, req.include_self) for h in targets)
|
||||
)
|
||||
return PushUpdateResponse(
|
||||
sha=sha,
|
||||
tarball_bytes=len(tarball),
|
||||
results=list(results),
|
||||
)
|
||||
92
decnet/web/router/swarm_updates/api_push_update_self.py
Normal file
92
decnet/web/router/swarm_updates/api_push_update_self.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""POST /swarm-updates/push-self — push only to workers' /update-self.
|
||||
|
||||
Use case: the agent is fine but the updater itself needs an upgrade (e.g.
|
||||
a fix to ``executor.py``). Uploading only ``/update-self`` avoids a
|
||||
redundant agent restart on healthy workers.
|
||||
|
||||
No auto-rollback: the updater re-execs itself on success, so a broken
|
||||
push leaves the worker on the old code — verified by polling ``/health``
|
||||
after the request returns.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm.tar_tree import detect_git_sha, tar_working_tree
|
||||
from decnet.swarm.updater_client import UpdaterClient
|
||||
from decnet.web.db.models import PushUpdateRequest, PushUpdateResponse, PushUpdateResult
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
from decnet.web.dependencies import get_repo, require_admin
|
||||
|
||||
from .api_push_update import _is_expected_connection_drop, _master_tree_root, _resolve_targets
|
||||
|
||||
log = get_logger("swarm_updates.push_self")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
async def _push_self_one(host: dict[str, Any], tarball: bytes, sha: str) -> PushUpdateResult:
|
||||
try:
|
||||
async with UpdaterClient(host=host) as u:
|
||||
try:
|
||||
r = await u.update_self(tarball, sha=sha)
|
||||
http_status = r.status_code
|
||||
body = r.json() if r.content else {}
|
||||
ok = http_status == 200
|
||||
detail = (body.get("error") or body.get("probe")) if isinstance(body, dict) else None
|
||||
stderr = body.get("stderr") if isinstance(body, dict) else None
|
||||
except Exception as exc: # noqa: BLE001
|
||||
# Connection drops during self-update are expected — the updater
|
||||
# re-execs itself mid-response.
|
||||
if _is_expected_connection_drop(exc):
|
||||
return PushUpdateResult(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status="self-updated", sha=sha,
|
||||
detail="updater re-exec dropped connection (expected)",
|
||||
)
|
||||
raise
|
||||
return PushUpdateResult(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status="self-updated" if ok else "self-failed",
|
||||
http_status=http_status, sha=sha,
|
||||
detail=detail, stderr=stderr,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.exception("swarm_updates.push_self failed host=%s", host.get("name"))
|
||||
return PushUpdateResult(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status="self-failed",
|
||||
detail=f"{type(exc).__name__}: {exc}",
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/push-self",
|
||||
response_model=PushUpdateResponse,
|
||||
tags=["Swarm Updates"],
|
||||
)
|
||||
async def api_push_update_self(
|
||||
req: PushUpdateRequest,
|
||||
admin: dict = Depends(require_admin),
|
||||
repo: BaseRepository = Depends(get_repo),
|
||||
) -> PushUpdateResponse:
|
||||
targets = await _resolve_targets(repo, req)
|
||||
tree_root = _master_tree_root()
|
||||
sha = detect_git_sha(tree_root)
|
||||
tarball = tar_working_tree(tree_root, extra_excludes=req.exclude)
|
||||
log.info(
|
||||
"swarm_updates.push_self sha=%s tarball=%d hosts=%d",
|
||||
sha or "(not a git repo)", len(tarball), len(targets),
|
||||
)
|
||||
results = await asyncio.gather(
|
||||
*(_push_self_one(h, tarball, sha) for h in targets)
|
||||
)
|
||||
return PushUpdateResponse(
|
||||
sha=sha,
|
||||
tarball_bytes=len(tarball),
|
||||
results=list(results),
|
||||
)
|
||||
70
decnet/web/router/swarm_updates/api_rollback_host.py
Normal file
70
decnet/web/router/swarm_updates/api_rollback_host.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""POST /swarm-updates/rollback — manual rollback on a single host.
|
||||
|
||||
Calls the worker updater's ``/rollback`` which swaps the ``current``
|
||||
symlink back to ``releases/prev``. Fails with 404 if the target has no
|
||||
previous release slot.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm.updater_client import UpdaterClient
|
||||
from decnet.web.db.models import RollbackRequest, RollbackResponse
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
from decnet.web.dependencies import get_repo, require_admin
|
||||
|
||||
log = get_logger("swarm_updates.rollback")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post(
|
||||
"/rollback",
|
||||
response_model=RollbackResponse,
|
||||
tags=["Swarm Updates"],
|
||||
)
|
||||
async def api_rollback_host(
|
||||
req: RollbackRequest,
|
||||
admin: dict = Depends(require_admin),
|
||||
repo: BaseRepository = Depends(get_repo),
|
||||
) -> RollbackResponse:
|
||||
host = await repo.get_swarm_host_by_uuid(req.host_uuid)
|
||||
if host is None:
|
||||
raise HTTPException(status_code=404, detail=f"Unknown host: {req.host_uuid}")
|
||||
if not host.get("updater_cert_fingerprint"):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Host '{host['name']}' has no updater bundle — nothing to roll back.",
|
||||
)
|
||||
|
||||
try:
|
||||
async with UpdaterClient(host=host) as u:
|
||||
r = await u.rollback()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.exception("swarm_updates.rollback transport failure host=%s", host["name"])
|
||||
return RollbackResponse(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status="failed",
|
||||
detail=f"{type(exc).__name__}: {exc}",
|
||||
)
|
||||
|
||||
body = r.json() if r.content else {}
|
||||
if r.status_code == 404:
|
||||
# No previous release — surface as 404 so the UI can render the
|
||||
# "nothing to roll back" state distinctly from a transport error.
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=body.get("detail") if isinstance(body, dict) else "No previous release on worker.",
|
||||
)
|
||||
if r.status_code != 200:
|
||||
return RollbackResponse(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status="failed", http_status=r.status_code,
|
||||
detail=(body.get("error") or body.get("detail")) if isinstance(body, dict) else None,
|
||||
)
|
||||
return RollbackResponse(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status="rolled-back", http_status=r.status_code,
|
||||
detail=body.get("status") if isinstance(body, dict) else None,
|
||||
)
|
||||
0
tests/api/swarm_updates/__init__.py
Normal file
0
tests/api/swarm_updates/__init__.py
Normal file
151
tests/api/swarm_updates/conftest.py
Normal file
151
tests/api/swarm_updates/conftest.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""Shared fixtures for /api/v1/swarm-updates tests.
|
||||
|
||||
The tests never talk to a real worker — ``UpdaterClient`` is monkeypatched
|
||||
to a recording fake. That keeps the tests fast and lets us assert call
|
||||
shapes (tarball-once, per-host dispatch, include_self ordering) without
|
||||
standing up TLS endpoints.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid as _uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
|
||||
async def _add_host(
|
||||
name: str,
|
||||
address: str = "10.0.0.1",
|
||||
*,
|
||||
with_updater: bool = True,
|
||||
status: str = "enrolled",
|
||||
) -> dict[str, Any]:
|
||||
uuid = str(_uuid.uuid4())
|
||||
await repo.add_swarm_host({
|
||||
"uuid": uuid,
|
||||
"name": name,
|
||||
"address": address,
|
||||
"agent_port": 8765,
|
||||
"status": status,
|
||||
"client_cert_fingerprint": "abc123",
|
||||
"updater_cert_fingerprint": "def456" if with_updater else None,
|
||||
"cert_bundle_path": f"/tmp/{name}",
|
||||
"enrolled_at": datetime.now(timezone.utc),
|
||||
"notes": None,
|
||||
})
|
||||
return {"uuid": uuid, "name": name, "address": address}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def add_host():
|
||||
return _add_host
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_updater(monkeypatch):
|
||||
"""Install a fake ``UpdaterClient`` + tar builder into every route module.
|
||||
|
||||
The returned ``Fake`` exposes hooks so individual tests decide per-host
|
||||
behaviour: response codes, exceptions, update-self outcomes, etc.
|
||||
"""
|
||||
|
||||
class FakeResponse:
|
||||
def __init__(self, status_code: int, body: dict[str, Any] | None = None):
|
||||
self.status_code = status_code
|
||||
self._body = body or {}
|
||||
self.content = b"payload"
|
||||
|
||||
def json(self) -> dict[str, Any]:
|
||||
return self._body
|
||||
|
||||
class FakeUpdaterClient:
|
||||
calls: list[tuple[str, str, dict]] = [] # (host_name, method, kwargs)
|
||||
health_responses: dict[str, dict[str, Any]] = {}
|
||||
update_responses: dict[str, FakeResponse | BaseException] = {}
|
||||
update_self_responses: dict[str, FakeResponse | BaseException] = {}
|
||||
rollback_responses: dict[str, FakeResponse | BaseException] = {}
|
||||
|
||||
def __init__(self, host=None, **_kw):
|
||||
self._name = host["name"] if host else "?"
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc):
|
||||
return None
|
||||
|
||||
async def health(self):
|
||||
FakeUpdaterClient.calls.append((self._name, "health", {}))
|
||||
resp = FakeUpdaterClient.health_responses.get(self._name)
|
||||
if isinstance(resp, BaseException):
|
||||
raise resp
|
||||
return resp or {"status": "ok", "releases": []}
|
||||
|
||||
async def update(self, tarball, sha=""):
|
||||
FakeUpdaterClient.calls.append((self._name, "update", {"tarball": tarball, "sha": sha}))
|
||||
resp = FakeUpdaterClient.update_responses.get(self._name, FakeResponse(200, {"probe": "ok"}))
|
||||
if isinstance(resp, BaseException):
|
||||
raise resp
|
||||
return resp
|
||||
|
||||
async def update_self(self, tarball, sha=""):
|
||||
FakeUpdaterClient.calls.append((self._name, "update_self", {"tarball": tarball, "sha": sha}))
|
||||
resp = FakeUpdaterClient.update_self_responses.get(self._name, FakeResponse(200))
|
||||
if isinstance(resp, BaseException):
|
||||
raise resp
|
||||
return resp
|
||||
|
||||
async def rollback(self):
|
||||
FakeUpdaterClient.calls.append((self._name, "rollback", {}))
|
||||
resp = FakeUpdaterClient.rollback_responses.get(self._name, FakeResponse(200, {"status": "rolled back"}))
|
||||
if isinstance(resp, BaseException):
|
||||
raise resp
|
||||
return resp
|
||||
|
||||
# Reset class-level state each test — fixtures are function-scoped but
|
||||
# the class dicts survive otherwise.
|
||||
FakeUpdaterClient.calls = []
|
||||
FakeUpdaterClient.health_responses = {}
|
||||
FakeUpdaterClient.update_responses = {}
|
||||
FakeUpdaterClient.update_self_responses = {}
|
||||
FakeUpdaterClient.rollback_responses = {}
|
||||
|
||||
for mod in (
|
||||
"decnet.web.router.swarm_updates.api_list_host_releases",
|
||||
"decnet.web.router.swarm_updates.api_push_update",
|
||||
"decnet.web.router.swarm_updates.api_push_update_self",
|
||||
"decnet.web.router.swarm_updates.api_rollback_host",
|
||||
):
|
||||
monkeypatch.setattr(f"{mod}.UpdaterClient", FakeUpdaterClient)
|
||||
|
||||
# Stub the tarball builders so tests don't spend seconds re-tarring the
|
||||
# repo on every assertion. The byte contents don't matter for the route
|
||||
# contract — the updater side is faked.
|
||||
monkeypatch.setattr(
|
||||
"decnet.web.router.swarm_updates.api_push_update.tar_working_tree",
|
||||
lambda root, extra_excludes=None: b"tarball-bytes",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"decnet.web.router.swarm_updates.api_push_update.detect_git_sha",
|
||||
lambda root: "deadbeef",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"decnet.web.router.swarm_updates.api_push_update_self.tar_working_tree",
|
||||
lambda root, extra_excludes=None: b"tarball-bytes",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"decnet.web.router.swarm_updates.api_push_update_self.detect_git_sha",
|
||||
lambda root: "deadbeef",
|
||||
)
|
||||
|
||||
return {"client": FakeUpdaterClient, "Response": FakeResponse}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def connection_drop_exc():
|
||||
"""A realistic 'updater re-exec mid-response' exception."""
|
||||
return httpx.RemoteProtocolError("server disconnected")
|
||||
69
tests/api/swarm_updates/test_list_host_releases.py
Normal file
69
tests/api/swarm_updates/test_list_host_releases.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""GET /api/v1/swarm-updates/hosts — per-host updater health fan-out."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_admin_lists_reachable_and_unreachable_hosts(
|
||||
client, auth_token, add_host, fake_updater,
|
||||
):
|
||||
await add_host("alpha", "10.0.0.1")
|
||||
await add_host("beta", "10.0.0.2")
|
||||
|
||||
fake_updater["client"].health_responses = {
|
||||
"alpha": {
|
||||
"status": "ok",
|
||||
"agent_status": "ok",
|
||||
"releases": [
|
||||
{"slot": "active", "sha": "aaaa111", "healthy": True},
|
||||
{"slot": "prev", "sha": "0000000", "healthy": True},
|
||||
],
|
||||
},
|
||||
"beta": RuntimeError("TLS handshake failed"),
|
||||
}
|
||||
|
||||
resp = await client.get(
|
||||
"/api/v1/swarm-updates/hosts",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
hosts = {h["host_name"]: h for h in resp.json()["hosts"]}
|
||||
assert hosts["alpha"]["reachable"] is True
|
||||
assert hosts["alpha"]["current_sha"] == "aaaa111"
|
||||
assert hosts["alpha"]["previous_sha"] == "0000000"
|
||||
assert hosts["beta"]["reachable"] is False
|
||||
assert "TLS handshake" in hosts["beta"]["detail"]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_decommissioned_and_agent_only_hosts_are_excluded(
|
||||
client, auth_token, add_host, fake_updater,
|
||||
):
|
||||
await add_host("good", "10.0.0.1", with_updater=True)
|
||||
await add_host("gone", "10.0.0.2", with_updater=True, status="decommissioned")
|
||||
await add_host("agentonly", "10.0.0.3", with_updater=False)
|
||||
|
||||
resp = await client.get(
|
||||
"/api/v1/swarm-updates/hosts",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
names = {h["host_name"] for h in resp.json()["hosts"]}
|
||||
assert names == {"good"}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_viewer_is_forbidden(client, viewer_token, add_host, fake_updater):
|
||||
await add_host("alpha")
|
||||
resp = await client.get(
|
||||
"/api/v1/swarm-updates/hosts",
|
||||
headers={"Authorization": f"Bearer {viewer_token}"},
|
||||
)
|
||||
assert resp.status_code == 403
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_unauth_returns_401(client):
|
||||
resp = await client.get("/api/v1/swarm-updates/hosts")
|
||||
assert resp.status_code == 401
|
||||
176
tests/api/swarm_updates/test_push_update.py
Normal file
176
tests/api/swarm_updates/test_push_update.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""POST /api/v1/swarm-updates/push — happy paths, rollback, validation."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_push_to_single_host_success(client, auth_token, add_host, fake_updater):
|
||||
h = await add_host("alpha")
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"host_uuids": [h["uuid"]]},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["sha"] == "deadbeef"
|
||||
assert body["tarball_bytes"] == len(b"tarball-bytes")
|
||||
assert body["results"][0]["status"] == "updated"
|
||||
assert body["results"][0]["host_name"] == "alpha"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_push_reports_rollback_on_409(client, auth_token, add_host, fake_updater):
|
||||
h = await add_host("alpha")
|
||||
Resp = fake_updater["Response"]
|
||||
fake_updater["client"].update_responses = {
|
||||
"alpha": Resp(409, {"error": "probe timed out", "stderr": "boom", "rolled_back": True}),
|
||||
}
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"host_uuids": [h["uuid"]]},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
result = resp.json()["results"][0]
|
||||
assert result["status"] == "rolled-back"
|
||||
assert result["http_status"] == 409
|
||||
assert result["stderr"] == "boom"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_push_all_aggregates_mixed_results(client, auth_token, add_host, fake_updater):
|
||||
await add_host("alpha", "10.0.0.1")
|
||||
await add_host("beta", "10.0.0.2")
|
||||
Resp = fake_updater["Response"]
|
||||
fake_updater["client"].update_responses = {
|
||||
"alpha": Resp(200, {"probe": "ok"}),
|
||||
"beta": RuntimeError("connect timeout"),
|
||||
}
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"all": True},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
statuses = {r["host_name"]: r["status"] for r in resp.json()["results"]}
|
||||
assert statuses == {"alpha": "updated", "beta": "failed"}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_tarball_built_once_across_multi_host_push(
|
||||
client, auth_token, add_host, fake_updater, monkeypatch,
|
||||
):
|
||||
await add_host("alpha", "10.0.0.1")
|
||||
await add_host("beta", "10.0.0.2")
|
||||
calls = {"count": 0}
|
||||
|
||||
def counted(root, extra_excludes=None):
|
||||
calls["count"] += 1
|
||||
return b"tarball-bytes"
|
||||
|
||||
monkeypatch.setattr(
|
||||
"decnet.web.router.swarm_updates.api_push_update.tar_working_tree", counted,
|
||||
)
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"all": True},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert calls["count"] == 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_include_self_only_runs_update_self_on_success(
|
||||
client, auth_token, add_host, fake_updater,
|
||||
):
|
||||
await add_host("alpha", "10.0.0.1")
|
||||
await add_host("beta", "10.0.0.2")
|
||||
Resp = fake_updater["Response"]
|
||||
fake_updater["client"].update_responses = {
|
||||
"alpha": Resp(200, {"probe": "ok"}),
|
||||
"beta": Resp(409, {"error": "bad", "rolled_back": True}),
|
||||
}
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"all": True, "include_self": True},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
results = {r["host_name"]: r for r in resp.json()["results"]}
|
||||
assert results["alpha"]["status"] == "self-updated"
|
||||
assert results["beta"]["status"] == "rolled-back"
|
||||
# update_self must NOT have been called on beta (rolled-back agent).
|
||||
methods_called = [(name, m) for name, m, _ in fake_updater["client"].calls]
|
||||
assert ("beta", "update_self") not in methods_called
|
||||
assert ("alpha", "update_self") in methods_called
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_include_self_tolerates_expected_connection_drop(
|
||||
client, auth_token, add_host, fake_updater, connection_drop_exc,
|
||||
):
|
||||
await add_host("alpha", "10.0.0.1")
|
||||
fake_updater["client"].update_self_responses = {
|
||||
"alpha": connection_drop_exc,
|
||||
}
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"all": True, "include_self": True},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["results"][0]["status"] == "self-updated"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_host_and_all_are_mutually_exclusive(
|
||||
client, auth_token, add_host, fake_updater,
|
||||
):
|
||||
h = await add_host("alpha")
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"host_uuids": [h["uuid"]], "all": True},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_neither_host_nor_all_rejected(client, auth_token, fake_updater):
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_unknown_host_uuid_returns_404(client, auth_token, fake_updater):
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"host_uuids": ["nonexistent"]},
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_viewer_is_forbidden(client, viewer_token, add_host, fake_updater):
|
||||
h = await add_host("alpha")
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push",
|
||||
headers={"Authorization": f"Bearer {viewer_token}"},
|
||||
json={"host_uuids": [h["uuid"]]},
|
||||
)
|
||||
assert resp.status_code == 403
|
||||
67
tests/api/swarm_updates/test_push_update_self.py
Normal file
67
tests/api/swarm_updates/test_push_update_self.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""POST /api/v1/swarm-updates/push-self — updater-only upgrade path."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_push_self_only_calls_update_self(client, auth_token, add_host, fake_updater):
|
||||
await add_host("alpha")
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push-self",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"all": True},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["results"][0]["status"] == "self-updated"
|
||||
methods = [m for _, m, _ in fake_updater["client"].calls]
|
||||
assert "update" not in methods
|
||||
assert "update_self" in methods
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_push_self_reports_failure(client, auth_token, add_host, fake_updater):
|
||||
await add_host("alpha")
|
||||
Resp = fake_updater["Response"]
|
||||
fake_updater["client"].update_self_responses = {
|
||||
"alpha": Resp(500, {"error": "pip failed", "stderr": "no module named typer"}),
|
||||
}
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push-self",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"all": True},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
result = resp.json()["results"][0]
|
||||
assert result["status"] == "self-failed"
|
||||
assert result["http_status"] == 500
|
||||
assert "typer" in (result["stderr"] or "")
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_push_self_treats_connection_drop_as_success(
|
||||
client, auth_token, add_host, fake_updater, connection_drop_exc,
|
||||
):
|
||||
await add_host("alpha")
|
||||
fake_updater["client"].update_self_responses = {"alpha": connection_drop_exc}
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push-self",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"all": True},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["results"][0]["status"] == "self-updated"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_viewer_is_forbidden(client, viewer_token, add_host, fake_updater):
|
||||
await add_host("alpha")
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/push-self",
|
||||
headers={"Authorization": f"Bearer {viewer_token}"},
|
||||
json={"all": True},
|
||||
)
|
||||
assert resp.status_code == 403
|
||||
86
tests/api/swarm_updates/test_rollback_host.py
Normal file
86
tests/api/swarm_updates/test_rollback_host.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""POST /api/v1/swarm-updates/rollback — single-host manual rollback."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_rollback_happy_path(client, auth_token, add_host, fake_updater):
|
||||
h = await add_host("alpha")
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/rollback",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"host_uuid": h["uuid"]},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["status"] == "rolled-back"
|
||||
assert body["host_name"] == "alpha"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_rollback_404_when_no_previous(client, auth_token, add_host, fake_updater):
|
||||
h = await add_host("alpha")
|
||||
Resp = fake_updater["Response"]
|
||||
fake_updater["client"].rollback_responses = {
|
||||
"alpha": Resp(404, {"detail": "no previous release"}),
|
||||
}
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/rollback",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"host_uuid": h["uuid"]},
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
assert "no previous" in resp.json()["detail"].lower()
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_rollback_transport_failure_reported(client, auth_token, add_host, fake_updater):
|
||||
h = await add_host("alpha")
|
||||
fake_updater["client"].rollback_responses = {"alpha": RuntimeError("TLS handshake failed")}
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/rollback",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"host_uuid": h["uuid"]},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["status"] == "failed"
|
||||
assert "TLS handshake" in body["detail"]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_rollback_unknown_host(client, auth_token, fake_updater):
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/rollback",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"host_uuid": "nonexistent"},
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_rollback_on_agent_only_host_rejected(
|
||||
client, auth_token, add_host, fake_updater,
|
||||
):
|
||||
h = await add_host("alpha", with_updater=False)
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/rollback",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"host_uuid": h["uuid"]},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_viewer_is_forbidden(client, viewer_token, add_host, fake_updater):
|
||||
h = await add_host("alpha")
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm-updates/rollback",
|
||||
headers={"Authorization": f"Bearer {viewer_token}"},
|
||||
json={"host_uuid": h["uuid"]},
|
||||
)
|
||||
assert resp.status_code == 403
|
||||
Reference in New Issue
Block a user