feat(web): Remote Updates API — dashboard endpoints for pushing code to workers
Adds /api/v1/swarm-updates/{hosts,push,push-self,rollback} behind
require_admin. Reuses the existing UpdaterClient + tar_working_tree + the
per-host asyncio.gather pattern from api_deploy_swarm.py; tarball is
built exactly once per /push request and fanned out to every selected
worker. /hosts filters out decommissioned hosts and agent-only
enrollments (no updater bundle = not a target).
Connection drops during /update-self are treated as success — the
updater re-execs itself mid-response, so httpx always raises.
Pydantic models live in decnet/web/db/models.py (single source of
truth). 24 tests cover happy paths, rollback, transport failures,
include_self ordering (skip on rolled-back agents), validation, and
RBAC gating.
This commit is contained in:
92
decnet/web/router/swarm_updates/api_push_update_self.py
Normal file
92
decnet/web/router/swarm_updates/api_push_update_self.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""POST /swarm-updates/push-self — push only to workers' /update-self.
|
||||
|
||||
Use case: the agent is fine but the updater itself needs an upgrade (e.g.
|
||||
a fix to ``executor.py``). Uploading only ``/update-self`` avoids a
|
||||
redundant agent restart on healthy workers.
|
||||
|
||||
No auto-rollback: the updater re-execs itself on success, so a broken
|
||||
push leaves the worker on the old code — verified by polling ``/health``
|
||||
after the request returns.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm.tar_tree import detect_git_sha, tar_working_tree
|
||||
from decnet.swarm.updater_client import UpdaterClient
|
||||
from decnet.web.db.models import PushUpdateRequest, PushUpdateResponse, PushUpdateResult
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
from decnet.web.dependencies import get_repo, require_admin
|
||||
|
||||
from .api_push_update import _is_expected_connection_drop, _master_tree_root, _resolve_targets
|
||||
|
||||
log = get_logger("swarm_updates.push_self")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
async def _push_self_one(host: dict[str, Any], tarball: bytes, sha: str) -> PushUpdateResult:
|
||||
try:
|
||||
async with UpdaterClient(host=host) as u:
|
||||
try:
|
||||
r = await u.update_self(tarball, sha=sha)
|
||||
http_status = r.status_code
|
||||
body = r.json() if r.content else {}
|
||||
ok = http_status == 200
|
||||
detail = (body.get("error") or body.get("probe")) if isinstance(body, dict) else None
|
||||
stderr = body.get("stderr") if isinstance(body, dict) else None
|
||||
except Exception as exc: # noqa: BLE001
|
||||
# Connection drops during self-update are expected — the updater
|
||||
# re-execs itself mid-response.
|
||||
if _is_expected_connection_drop(exc):
|
||||
return PushUpdateResult(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status="self-updated", sha=sha,
|
||||
detail="updater re-exec dropped connection (expected)",
|
||||
)
|
||||
raise
|
||||
return PushUpdateResult(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status="self-updated" if ok else "self-failed",
|
||||
http_status=http_status, sha=sha,
|
||||
detail=detail, stderr=stderr,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.exception("swarm_updates.push_self failed host=%s", host.get("name"))
|
||||
return PushUpdateResult(
|
||||
host_uuid=host["uuid"], host_name=host["name"],
|
||||
status="self-failed",
|
||||
detail=f"{type(exc).__name__}: {exc}",
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/push-self",
|
||||
response_model=PushUpdateResponse,
|
||||
tags=["Swarm Updates"],
|
||||
)
|
||||
async def api_push_update_self(
|
||||
req: PushUpdateRequest,
|
||||
admin: dict = Depends(require_admin),
|
||||
repo: BaseRepository = Depends(get_repo),
|
||||
) -> PushUpdateResponse:
|
||||
targets = await _resolve_targets(repo, req)
|
||||
tree_root = _master_tree_root()
|
||||
sha = detect_git_sha(tree_root)
|
||||
tarball = tar_working_tree(tree_root, extra_excludes=req.exclude)
|
||||
log.info(
|
||||
"swarm_updates.push_self sha=%s tarball=%d hosts=%d",
|
||||
sha or "(not a git repo)", len(tarball), len(targets),
|
||||
)
|
||||
results = await asyncio.gather(
|
||||
*(_push_self_one(h, tarball, sha) for h in targets)
|
||||
)
|
||||
return PushUpdateResponse(
|
||||
sha=sha,
|
||||
tarball_bytes=len(tarball),
|
||||
results=list(results),
|
||||
)
|
||||
Reference in New Issue
Block a user