feat(swarm): AgentClient topology apply/teardown/state methods

Three new RPCs mirroring the existing deploy/teardown/status pattern:

- apply_topology(hydrated, version_hash) — long-timeout (600s) for
  image pulls + compose up.
- teardown_topology(topology_id) — 300s timeout; enough for a
  stubborn compose-down without hanging a heartbeat.
- get_topology_state() — short control-plane read for reconcile.

The per-call timeout swap uses the same trick as .deploy().

Step 5 of the agent <-> topology integration.
This commit is contained in:
2026-04-21 01:26:21 -04:00
parent 13cb0ff38e
commit 5f8a746d6e
2 changed files with 172 additions and 0 deletions

View File

@@ -34,6 +34,11 @@ log = get_logger("swarm.client")
# later iteration if the default proves too short.
_TIMEOUT_DEPLOY = httpx.Timeout(connect=10.0, read=600.0, write=30.0, pool=5.0)
_TIMEOUT_CONTROL = httpx.Timeout(connect=5.0, read=15.0, write=5.0, pool=5.0)
# Topology apply pulls images + runs compose on the agent — same ball-park
# as a fleet deploy. Teardown is faster but still long enough we can't
# reuse the control timeout.
_TIMEOUT_TOPOLOGY_APPLY = httpx.Timeout(connect=10.0, read=600.0, write=30.0, pool=5.0)
_TIMEOUT_TOPOLOGY_TEARDOWN = httpx.Timeout(connect=10.0, read=300.0, write=30.0, pool=5.0)
@dataclass(frozen=True)
@@ -191,6 +196,51 @@ class AgentClient:
resp.raise_for_status()
return resp.json()
# ------------------------------------------------------------ topology
async def apply_topology(
self,
hydrated: dict[str, Any],
version_hash: str,
) -> dict[str, Any]:
"""Push a hydrated topology to the agent for local materialisation.
The agent independently computes ``canonical_hash(hydrated)`` and
returns 400 if it disagrees with *version_hash* — that's how we
catch serialisation drift before half-creating bridges.
"""
old = self._require_client().timeout
self._require_client().timeout = _TIMEOUT_TOPOLOGY_APPLY
try:
resp = await self._require_client().post(
"/topology/apply",
json={"hydrated": hydrated, "version_hash": version_hash},
)
finally:
self._require_client().timeout = old
resp.raise_for_status()
return resp.json()
async def teardown_topology(self, topology_id: str) -> dict[str, Any]:
"""Ask the agent to dismantle the named topology."""
old = self._require_client().timeout
self._require_client().timeout = _TIMEOUT_TOPOLOGY_TEARDOWN
try:
resp = await self._require_client().post(
"/topology/teardown",
json={"topology_id": topology_id},
)
finally:
self._require_client().timeout = old
resp.raise_for_status()
return resp.json()
async def get_topology_state(self) -> dict[str, Any]:
"""Snapshot of the agent's applied topology + live docker state."""
resp = await self._require_client().get("/topology/state")
resp.raise_for_status()
return resp.json()
# -------------------------------------------------------------- diagnostics
def __repr__(self) -> str:

View File

@@ -0,0 +1,122 @@
"""AgentClient topology methods — unit tests with a mock httpx transport.
Avoids the full uvicorn+mTLS setup used by the roundtrip test; we just
need to prove the client emits the right verb/path/body and surfaces
HTTP errors the way the caller expects.
"""
from __future__ import annotations
import json
import httpx
import pytest
from decnet.swarm.client import AgentClient, MasterIdentity
class _StubIdentity:
"""Satisfies the MasterIdentity shape without requiring real files."""
def _client_with_transport(handler) -> AgentClient:
"""Build an AgentClient whose internal httpx client is backed by
:class:`httpx.MockTransport`. Bypasses _build_client so no real
cert IO happens."""
identity = MasterIdentity(
key_path="/nope/key", # type: ignore[arg-type]
cert_path="/nope/cert", # type: ignore[arg-type]
ca_cert_path="/nope/ca", # type: ignore[arg-type]
)
client = AgentClient(
address="127.0.0.1",
agent_port=8765,
identity=identity,
)
client._client = httpx.AsyncClient(
base_url="https://127.0.0.1:8765",
transport=httpx.MockTransport(handler),
)
return client
@pytest.mark.anyio
async def test_apply_topology_sends_body() -> None:
captured: dict = {}
def handler(request: httpx.Request) -> httpx.Response:
captured["url"] = str(request.url)
captured["body"] = json.loads(request.content)
return httpx.Response(
200, json={"status": "applied", "version_hash": "h"}
)
agent = _client_with_transport(handler)
try:
out = await agent.apply_topology({"topology": {"id": "t1"}}, "h")
finally:
await agent._client.aclose()
assert out == {"status": "applied", "version_hash": "h"}
assert captured["url"].endswith("/topology/apply")
assert captured["body"] == {
"hydrated": {"topology": {"id": "t1"}},
"version_hash": "h",
}
@pytest.mark.anyio
async def test_apply_topology_raises_on_409() -> None:
def handler(_req: httpx.Request) -> httpx.Response:
return httpx.Response(409, json={"detail": "already applied"})
agent = _client_with_transport(handler)
try:
with pytest.raises(httpx.HTTPStatusError) as ei:
await agent.apply_topology({"topology": {"id": "t2"}}, "h")
assert ei.value.response.status_code == 409
finally:
await agent._client.aclose()
@pytest.mark.anyio
async def test_teardown_topology_sends_body() -> None:
captured: dict = {}
def handler(request: httpx.Request) -> httpx.Response:
captured["body"] = json.loads(request.content)
captured["url"] = str(request.url)
return httpx.Response(200, json={"status": "torn_down", "topology_id": "t1"})
agent = _client_with_transport(handler)
try:
out = await agent.teardown_topology("t1")
finally:
await agent._client.aclose()
assert out["status"] == "torn_down"
assert captured["body"] == {"topology_id": "t1"}
assert captured["url"].endswith("/topology/teardown")
@pytest.mark.anyio
async def test_get_topology_state_returns_snapshot() -> None:
def handler(request: httpx.Request) -> httpx.Response:
assert request.method == "GET"
return httpx.Response(
200,
json={
"topology_id": "t1",
"applied_version_hash": "h",
"applied_at": 1,
"last_error": None,
"observed": {"bridges": [], "containers": []},
},
)
agent = _client_with_transport(handler)
try:
snap = await agent.get_topology_state()
finally:
await agent._client.aclose()
assert snap["topology_id"] == "t1"
assert snap["applied_version_hash"] == "h"