feat(swarm-mgmt): probe-on-read for GET /swarm/hosts heartbeat + status

This commit is contained in:
2026-04-19 05:26:35 -04:00
parent ff4c993617
commit bc5f43c3f7

View File

@@ -1,21 +1,54 @@
"""GET /swarm/hosts — admin-gated list of enrolled workers for the dashboard.
Thin wrapper over ``repo.list_swarm_hosts()`` — same shape as the
unauth'd controller route, but behind ``require_admin``.
Fans out an ``AgentClient.health()`` probe to each host on every call and
updates ``status`` / ``last_heartbeat`` as a side effect. This mirrors how
``/swarm-updates/hosts`` probes the updater daemon — the SwarmHosts page
polls this endpoint, so probe-on-read is what drives heartbeat freshness
in the UI. No separate scheduler needed.
"""
from __future__ import annotations
from typing import Optional
import asyncio
from datetime import datetime, timezone
from typing import Any, Optional
from fastapi import APIRouter, Depends
from decnet.logging import get_logger
from decnet.swarm.client import AgentClient
from decnet.web.db.models import SwarmHostView
from decnet.web.db.repository import BaseRepository
from decnet.web.dependencies import get_repo, require_admin
log = get_logger("swarm_mgmt.list_hosts")
router = APIRouter()
async def _probe_and_update(
host: dict[str, Any], repo: BaseRepository
) -> dict[str, Any]:
"""Best-effort mTLS probe. Skips hosts with no address yet (pending first
connect-back) so we don't pollute the DB with 'unreachable' on fresh
enrollments that haven't fetched the tarball."""
if not host.get("address"):
return host
try:
async with AgentClient(host=host) as agent:
await agent.health()
patch = {"status": "active", "last_heartbeat": datetime.now(timezone.utc)}
except Exception as exc: # noqa: BLE001
log.debug("swarm/hosts probe unreachable host=%s err=%s", host.get("name"), exc)
patch = {"status": "unreachable"}
try:
await repo.update_swarm_host(host["uuid"], patch)
except Exception as exc: # noqa: BLE001
log.warning("swarm/hosts could not persist probe result host=%s err=%s", host.get("name"), exc)
return host
host.update(patch)
return host
@router.get("/hosts", response_model=list[SwarmHostView], tags=["Swarm Management"])
async def list_hosts(
host_status: Optional[str] = None,
@@ -23,4 +56,5 @@ async def list_hosts(
repo: BaseRepository = Depends(get_repo),
) -> list[SwarmHostView]:
rows = await repo.list_swarm_hosts(host_status)
return [SwarmHostView(**r) for r in rows]
probed = await asyncio.gather(*(_probe_and_update(r, repo) for r in rows))
return [SwarmHostView(**r) for r in probed]