fix(fleet): reset stale host_uuid on carried-over deckies before dispatch
Deckies merged in from a prior deployment's saved state kept their original host_uuid — which dispatch_decnet_config then 404'd on if that host had since been decommissioned or re-enrolled at a different uuid. Before round-robin assignment, drop any host_uuid that isn't in the live swarm_hosts set so orphaned entries get reassigned instead of exploding with 'unknown host_uuid'.
This commit is contained in:
@@ -123,6 +123,14 @@ async def api_deploy_deckies(req: DeployIniRequest, admin: dict = Depends(requir
|
|||||||
]
|
]
|
||||||
|
|
||||||
if swarm_hosts:
|
if swarm_hosts:
|
||||||
|
# Carry-over from a prior deployment may reference a host_uuid that's
|
||||||
|
# since been decommissioned / re-enrolled at a new uuid. Drop any
|
||||||
|
# assignment that isn't in the currently-reachable set, then round-
|
||||||
|
# robin-fill the blanks — otherwise dispatch 404s on a dead uuid.
|
||||||
|
live_uuids = {h["uuid"] for h in swarm_hosts}
|
||||||
|
for d in config.deckies:
|
||||||
|
if d.host_uuid and d.host_uuid not in live_uuids:
|
||||||
|
d.host_uuid = None
|
||||||
unassigned = [d for d in config.deckies if not d.host_uuid]
|
unassigned = [d for d in config.deckies if not d.host_uuid]
|
||||||
for i, d in enumerate(unassigned):
|
for i, d in enumerate(unassigned):
|
||||||
d.host_uuid = swarm_hosts[i % len(swarm_hosts)]["uuid"]
|
d.host_uuid = swarm_hosts[i % len(swarm_hosts)]["uuid"]
|
||||||
|
|||||||
@@ -88,6 +88,72 @@ async def test_deploy_automode_shards_when_swarm_host_enrolled(client, auth_toke
|
|||||||
await repo.delete_swarm_host("host-A")
|
await repo.delete_swarm_host("host-A")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_deploy_automode_resets_stale_host_uuid(client, auth_token, monkeypatch):
|
||||||
|
"""Deckies carried over from prior state must not be dispatched to a host
|
||||||
|
uuid that no longer exists — reset + round-robin against live hosts."""
|
||||||
|
monkeypatch.setenv("DECNET_MODE", "master")
|
||||||
|
for row in await repo.list_swarm_hosts():
|
||||||
|
await repo.delete_swarm_host(row["uuid"])
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
await repo.add_swarm_host({
|
||||||
|
"uuid": "host-LIVE",
|
||||||
|
"name": "live",
|
||||||
|
"address": "10.0.0.60",
|
||||||
|
"agent_port": 8765,
|
||||||
|
"status": "active",
|
||||||
|
"client_cert_fingerprint": "a" * 64,
|
||||||
|
"updater_cert_fingerprint": None,
|
||||||
|
"cert_bundle_path": "/tmp/live",
|
||||||
|
"enrolled_at": datetime.now(timezone.utc),
|
||||||
|
"notes": "",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Prior state: decky-old is assigned to a now-decommissioned host.
|
||||||
|
await repo.set_state("deployment", {
|
||||||
|
"config": {
|
||||||
|
"mode": "swarm",
|
||||||
|
"interface": "eth0",
|
||||||
|
"subnet": "192.168.1.0/24",
|
||||||
|
"gateway": "192.168.1.1",
|
||||||
|
"deckies": [{
|
||||||
|
"name": "decky-old",
|
||||||
|
"ip": "192.168.1.50",
|
||||||
|
"services": ["ssh"],
|
||||||
|
"distro": "debian",
|
||||||
|
"base_image": "debian:bookworm-slim",
|
||||||
|
"hostname": "decky-old",
|
||||||
|
"host_uuid": "ghost-uuid",
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
"compose_path": "",
|
||||||
|
})
|
||||||
|
|
||||||
|
fake_response = SwarmDeployResponse(results=[
|
||||||
|
SwarmHostResult(host_uuid="host-LIVE", host_name="live", ok=True, detail={})
|
||||||
|
])
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"decnet.web.router.fleet.api_deploy_deckies.dispatch_decnet_config",
|
||||||
|
new=AsyncMock(return_value=fake_response),
|
||||||
|
) as mock_dispatch:
|
||||||
|
ini = "[decky-new]\nservices = ssh\n"
|
||||||
|
resp = await client.post(
|
||||||
|
"/api/v1/deckies/deploy",
|
||||||
|
json={"ini_content": ini},
|
||||||
|
headers={"Authorization": f"Bearer {auth_token}"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
dispatched = mock_dispatch.await_args.args[0]
|
||||||
|
# Both the carried-over decky and the new one must point at the live host.
|
||||||
|
assert {d.host_uuid for d in dispatched.deckies} == {"host-LIVE"}
|
||||||
|
|
||||||
|
await repo.delete_swarm_host("host-LIVE")
|
||||||
|
await repo.set_state("deployment", None)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.anyio
|
@pytest.mark.anyio
|
||||||
async def test_deployment_mode_endpoint(client, auth_token, monkeypatch):
|
async def test_deployment_mode_endpoint(client, auth_token, monkeypatch):
|
||||||
monkeypatch.setenv("DECNET_MODE", "master")
|
monkeypatch.setenv("DECNET_MODE", "master")
|
||||||
|
|||||||
Reference in New Issue
Block a user