fix(fleet): INI fully replaces prior decky state on redeploy
Submitting an INI with a single [decky1] was silently redeploying the deckies from the *previous* deploy too. POST /deckies/deploy merged the new INI into the stored DecnetConfig by name, so a 1-decky INI on top of a prior 3-decky run still pushed 3 deckies to the worker. Those stale decky2/decky3 kept their old IPs, collided on the parent NIC, and the agent failed with 'Address already in use' — the deploy the operator never asked for. The INI is the source of truth for which deckies exist this deploy. Full replace: config.deckies = list(new_decky_configs). Operators who want to add more deckies should list them all in the INI. Update the deploy-limit test to reflect the new replace semantics, and add a regression test asserting prior state is dropped.
This commit is contained in:
@@ -93,23 +93,22 @@ async def api_deploy_deckies(req: DeployIniRequest, admin: dict = Depends(requir
|
||||
mutate_interval=ini.mutate_interval or DEFAULT_MUTATE_INTERVAL,
|
||||
)
|
||||
|
||||
# Merge deckies
|
||||
existing_deckies_map = {d.name: d for d in config.deckies}
|
||||
for new_decky in new_decky_configs:
|
||||
existing_deckies_map[new_decky.name] = new_decky
|
||||
# The INI is the source of truth for *which* deckies exist this deploy.
|
||||
# The old "merge with prior state" behaviour meant submitting `[decky1]`
|
||||
# after a 3-decky run silently redeployed decky2/decky3 too — and then
|
||||
# collided on their stale IPs ("Address already in use"). Full replace
|
||||
# matches what the operator sees in the submitted config.
|
||||
config.deckies = list(new_decky_configs)
|
||||
|
||||
# Enforce deployment limit
|
||||
limits_state = await repo.get_state("config_limits")
|
||||
deployment_limit = limits_state.get("deployment_limit", 10) if limits_state else 10
|
||||
if len(existing_deckies_map) > deployment_limit:
|
||||
if len(config.deckies) > deployment_limit:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"Deployment would result in {len(existing_deckies_map)} deckies, "
|
||||
detail=f"Deployment would result in {len(config.deckies)} deckies, "
|
||||
f"exceeding the configured limit of {deployment_limit}",
|
||||
)
|
||||
|
||||
config.deckies = list(existing_deckies_map.values())
|
||||
|
||||
# Auto-mode: if we're a master with at least one enrolled/active SWARM
|
||||
# host, shard the deckies across those workers instead of spawning docker
|
||||
# containers on the master itself. Round-robin assignment over deckies
|
||||
|
||||
@@ -19,11 +19,16 @@ def mock_network():
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_deploy_respects_limit(client, auth_token, mock_state_file):
|
||||
"""Deploy should reject if total deckies would exceed limit."""
|
||||
"""Deploy should reject if the *submitted* INI exceeds the limit.
|
||||
The INI is the source of truth — prior state is fully replaced — so the
|
||||
check runs on the new decky count alone."""
|
||||
await repo.set_state("config_limits", {"deployment_limit": 1})
|
||||
await repo.set_state("deployment", mock_state_file)
|
||||
|
||||
ini = """[decky-new]
|
||||
ini = """[decky-a]
|
||||
services = ssh
|
||||
|
||||
[decky-b]
|
||||
services = ssh
|
||||
"""
|
||||
resp = await client.post(
|
||||
@@ -31,11 +36,33 @@ services = ssh
|
||||
json={"ini_content": ini},
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
)
|
||||
# 2 existing + 1 new = 3 > limit of 1
|
||||
# 2 new deckies > limit of 1
|
||||
assert resp.status_code == 409
|
||||
assert "limit" in resp.json()["detail"].lower()
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_deploy_replaces_prior_state(client, auth_token, mock_state_file):
|
||||
"""Submitting an INI with 1 decky must not silently re-include the 2
|
||||
deckies from prior state (that caused the 'Address already in use'
|
||||
regression when stale decky2/decky3 redeployed on stale IPs)."""
|
||||
await repo.set_state("config_limits", {"deployment_limit": 10})
|
||||
await repo.set_state("deployment", mock_state_file)
|
||||
|
||||
ini = """[only-decky]
|
||||
services = ssh
|
||||
"""
|
||||
resp = await client.post(
|
||||
"/api/v1/deckies/deploy",
|
||||
json={"ini_content": ini},
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
persisted = await repo.get_state("deployment")
|
||||
names = [d["name"] for d in persisted["config"]["deckies"]]
|
||||
assert names == ["only-decky"]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_deploy_within_limit(client, auth_token, mock_state_file):
|
||||
"""Deploy should succeed when within limit."""
|
||||
|
||||
Reference in New Issue
Block a user