feat(web): Remote Updates API — dashboard endpoints for pushing code to workers

Adds /api/v1/swarm-updates/{hosts,push,push-self,rollback} behind
require_admin. Reuses the existing UpdaterClient + tar_working_tree + the
per-host asyncio.gather pattern from api_deploy_swarm.py; tarball is
built exactly once per /push request and fanned out to every selected
worker. /hosts filters out decommissioned hosts and agent-only
enrollments (no updater bundle = not a target).

Connection drops during /update-self are treated as success — the
updater re-execs itself mid-response, so httpx always raises.

Pydantic models live in decnet/web/db/models.py (single source of
truth). 24 tests cover happy paths, rollback, transport failures,
include_self ordering (skip on rolled-back agents), validation, and
RBAC gating.
This commit is contained in:
2026-04-19 01:01:09 -04:00
parent f5a5fec607
commit a266d6b17e
13 changed files with 1041 additions and 0 deletions

View File

@@ -0,0 +1,70 @@
"""POST /swarm-updates/rollback — manual rollback on a single host.
Calls the worker updater's ``/rollback`` which swaps the ``current``
symlink back to ``releases/prev``. Fails with 404 if the target has no
previous release slot.
"""
from __future__ import annotations
from fastapi import APIRouter, Depends, HTTPException
from decnet.logging import get_logger
from decnet.swarm.updater_client import UpdaterClient
from decnet.web.db.models import RollbackRequest, RollbackResponse
from decnet.web.db.repository import BaseRepository
from decnet.web.dependencies import get_repo, require_admin
log = get_logger("swarm_updates.rollback")
router = APIRouter()
@router.post(
"/rollback",
response_model=RollbackResponse,
tags=["Swarm Updates"],
)
async def api_rollback_host(
req: RollbackRequest,
admin: dict = Depends(require_admin),
repo: BaseRepository = Depends(get_repo),
) -> RollbackResponse:
host = await repo.get_swarm_host_by_uuid(req.host_uuid)
if host is None:
raise HTTPException(status_code=404, detail=f"Unknown host: {req.host_uuid}")
if not host.get("updater_cert_fingerprint"):
raise HTTPException(
status_code=400,
detail=f"Host '{host['name']}' has no updater bundle — nothing to roll back.",
)
try:
async with UpdaterClient(host=host) as u:
r = await u.rollback()
except Exception as exc: # noqa: BLE001
log.exception("swarm_updates.rollback transport failure host=%s", host["name"])
return RollbackResponse(
host_uuid=host["uuid"], host_name=host["name"],
status="failed",
detail=f"{type(exc).__name__}: {exc}",
)
body = r.json() if r.content else {}
if r.status_code == 404:
# No previous release — surface as 404 so the UI can render the
# "nothing to roll back" state distinctly from a transport error.
raise HTTPException(
status_code=404,
detail=body.get("detail") if isinstance(body, dict) else "No previous release on worker.",
)
if r.status_code != 200:
return RollbackResponse(
host_uuid=host["uuid"], host_name=host["name"],
status="failed", http_status=r.status_code,
detail=(body.get("error") or body.get("detail")) if isinstance(body, dict) else None,
)
return RollbackResponse(
host_uuid=host["uuid"], host_name=host["name"],
status="rolled-back", http_status=r.status_code,
detail=body.get("status") if isinstance(body, dict) else None,
)