feat(swarm-mgmt): probe-on-read for GET /swarm/hosts heartbeat + status
This commit is contained in:
@@ -1,21 +1,54 @@
|
|||||||
"""GET /swarm/hosts — admin-gated list of enrolled workers for the dashboard.
|
"""GET /swarm/hosts — admin-gated list of enrolled workers for the dashboard.
|
||||||
|
|
||||||
Thin wrapper over ``repo.list_swarm_hosts()`` — same shape as the
|
Fans out an ``AgentClient.health()`` probe to each host on every call and
|
||||||
unauth'd controller route, but behind ``require_admin``.
|
updates ``status`` / ``last_heartbeat`` as a side effect. This mirrors how
|
||||||
|
``/swarm-updates/hosts`` probes the updater daemon — the SwarmHosts page
|
||||||
|
polls this endpoint, so probe-on-read is what drives heartbeat freshness
|
||||||
|
in the UI. No separate scheduler needed.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Optional
|
import asyncio
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends
|
from fastapi import APIRouter, Depends
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.swarm.client import AgentClient
|
||||||
from decnet.web.db.models import SwarmHostView
|
from decnet.web.db.models import SwarmHostView
|
||||||
from decnet.web.db.repository import BaseRepository
|
from decnet.web.db.repository import BaseRepository
|
||||||
from decnet.web.dependencies import get_repo, require_admin
|
from decnet.web.dependencies import get_repo, require_admin
|
||||||
|
|
||||||
|
log = get_logger("swarm_mgmt.list_hosts")
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
async def _probe_and_update(
|
||||||
|
host: dict[str, Any], repo: BaseRepository
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Best-effort mTLS probe. Skips hosts with no address yet (pending first
|
||||||
|
connect-back) so we don't pollute the DB with 'unreachable' on fresh
|
||||||
|
enrollments that haven't fetched the tarball."""
|
||||||
|
if not host.get("address"):
|
||||||
|
return host
|
||||||
|
try:
|
||||||
|
async with AgentClient(host=host) as agent:
|
||||||
|
await agent.health()
|
||||||
|
patch = {"status": "active", "last_heartbeat": datetime.now(timezone.utc)}
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.debug("swarm/hosts probe unreachable host=%s err=%s", host.get("name"), exc)
|
||||||
|
patch = {"status": "unreachable"}
|
||||||
|
try:
|
||||||
|
await repo.update_swarm_host(host["uuid"], patch)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.warning("swarm/hosts could not persist probe result host=%s err=%s", host.get("name"), exc)
|
||||||
|
return host
|
||||||
|
host.update(patch)
|
||||||
|
return host
|
||||||
|
|
||||||
|
|
||||||
@router.get("/hosts", response_model=list[SwarmHostView], tags=["Swarm Management"])
|
@router.get("/hosts", response_model=list[SwarmHostView], tags=["Swarm Management"])
|
||||||
async def list_hosts(
|
async def list_hosts(
|
||||||
host_status: Optional[str] = None,
|
host_status: Optional[str] = None,
|
||||||
@@ -23,4 +56,5 @@ async def list_hosts(
|
|||||||
repo: BaseRepository = Depends(get_repo),
|
repo: BaseRepository = Depends(get_repo),
|
||||||
) -> list[SwarmHostView]:
|
) -> list[SwarmHostView]:
|
||||||
rows = await repo.list_swarm_hosts(host_status)
|
rows = await repo.list_swarm_hosts(host_status)
|
||||||
return [SwarmHostView(**r) for r in rows]
|
probed = await asyncio.gather(*(_probe_and_update(r, repo) for r in rows))
|
||||||
|
return [SwarmHostView(**r) for r in probed]
|
||||||
|
|||||||
Reference in New Issue
Block a user