fix(fleet): read existing fleet from fleet_deckies, not State["deployment"] (BUG-2)

The web deploy collision-guard read the existing fleet from the DB
State["deployment"] key, while the UI/get_deckies() read decnet-state.json.
A fleet established via CLI/seed lands in neither path the guard consulted,
so existing_deckies was empty, the additive guard ran blind, and the
reconciler tore the running fleet down to the single submitted decky
(BUG-2: silent fleet wipe, HTTP 202, no warning).

Converge both reads on fleet_deckies — the engine-mirrored table written on
every deploy/teardown (CLI and web), which fleet/reconciler.py already
documents as the store the orchestrator, dashboard, and REST API see. Each
row's decky_config column is a full DeckyConfig dump, so it rehydrates
losslessly into the collision-guard input. The handler also commits the
intended fleet to fleet_deckies synchronously so rapid sequential deploys
read a current fleet and the dashboard observes the new shape immediately.

State["deployment"] is retained for now — the mutate handlers and the
mutator engine still coordinate through it; consolidating them is tracked
in development/ADR-001-FLEET-SOURCE-OF-TRUTH.md (open question 7).

Tests seed fleet_deckies directly (also modelling the CLI-seeded scenario)
rather than chaining real deploys through the skipped contract-test path.
This commit is contained in:
2026-06-12 23:52:20 -04:00
parent 408810b3e2
commit ab1151ee7f
7 changed files with 415 additions and 105 deletions

View File

@@ -2,6 +2,8 @@
import pytest
from unittest.mock import patch
from decnet.config import DeckyConfig
from decnet.web.db.models import LOCAL_HOST_SENTINEL
from decnet.web.dependencies import repo
@@ -15,70 +17,93 @@ def contract_test_mode(monkeypatch):
def mock_network():
"""Mock network detection so deploy doesn't call `ip addr show`."""
with patch("decnet.web.router.fleet.api_deploy_deckies.get_host_ip", return_value="192.168.1.100"):
yield
with patch("decnet.web.router.fleet.api_deploy_deckies.detect_interface", return_value="eth0"):
with patch("decnet.web.router.fleet.api_deploy_deckies.detect_subnet", return_value=("192.168.1.0/24", "192.168.1.1")):
yield
async def _clear_fleet() -> None:
for row in await repo.list_fleet_deckies():
await repo.delete_fleet_decky(
host_uuid=row.get("host_uuid") or LOCAL_HOST_SENTINEL,
name=row["name"],
)
async def _seed_fleet(name: str, ip: str) -> None:
cfg = DeckyConfig(
name=name, ip=ip, services=["ssh"], distro="debian",
base_image="debian", hostname=name,
)
await repo.upsert_fleet_decky({
"host_uuid": LOCAL_HOST_SENTINEL,
"name": name,
"services": ["ssh"],
"decky_config": cfg.model_dump(mode="json"),
"decky_ip": ip,
"state": "running",
})
@pytest.fixture(autouse=True)
async def _isolate_fleet():
await _clear_fleet()
yield
await _clear_fleet()
@pytest.mark.anyio
async def test_deploy_respects_limit(client, auth_token, mock_state_file):
"""Deploy should reject if the *submitted* INI exceeds the limit.
The INI is the source of truth — prior state is fully replaced — so the
check runs on the new decky count alone."""
async def test_deploy_respects_limit(client, auth_token):
"""The limit counts the WHOLE resulting fleet — existing (from
fleet_deckies) plus the submitted INI — not the INI alone. One existing
decky + one submitted, against a limit of 1, must be rejected."""
await repo.set_state("config_limits", {"deployment_limit": 1})
await repo.set_state("deployment", mock_state_file)
await _seed_fleet("decky-existing", "192.168.1.10")
ini = """[decky-a]
services = ssh
[decky-b]
services = ssh
"""
ini = "[decky-new]\nservices = ssh\n"
resp = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": ini},
headers={"Authorization": f"Bearer {auth_token}"},
)
# 2 new deckies > limit of 1
# existing(1) + new(1) = 2 > limit 1
assert resp.status_code == 409
assert "limit" in resp.json()["detail"].lower()
@pytest.mark.anyio
async def test_deploy_replaces_prior_state(client, auth_token, mock_state_file):
"""Submitting an INI with 1 decky must not silently re-include the 2
deckies from prior state (that caused the 'Address already in use'
regression when stale decky2/decky3 redeployed on stale IPs)."""
async def test_deploy_replaces_prior_state(client, auth_token):
"""replace_fleet=True drops the prior fleet rather than silently
re-including it (the 'Address already in use' regression came from stale
deckies redeploying on stale IPs). After replace, the committed fleet is
exactly the submitted INI."""
await repo.set_state("config_limits", {"deployment_limit": 10})
await repo.set_state("deployment", mock_state_file)
await _seed_fleet("test-decky-1", "192.168.1.10")
await _seed_fleet("test-decky-2", "192.168.1.11")
ini = """[only-decky]
services = ssh
"""
ini = "[only-decky]\nservices = ssh\n"
resp = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": ini},
json={"ini_content": ini, "replace_fleet": True},
headers={"Authorization": f"Bearer {auth_token}"},
)
assert resp.status_code == 202
persisted = await repo.get_state("deployment")
names = [d["name"] for d in persisted["config"]["deckies"]]
assert names == ["only-decky"]
assert resp.status_code == 202, resp.text
names = {d["name"] for d in await repo.get_deckies()}
assert names == {"only-decky"}
@pytest.mark.anyio
async def test_deploy_within_limit(client, auth_token, mock_state_file):
"""Deploy should succeed when within limit."""
async def test_deploy_within_limit(client, auth_token):
"""Deploy should succeed when the resulting fleet is within limit."""
await repo.set_state("config_limits", {"deployment_limit": 100})
await repo.set_state("deployment", mock_state_file)
await _seed_fleet("decky-existing", "192.168.1.10")
ini = """[decky-new]
services = ssh
"""
ini = "[decky-new]\nservices = ssh\n"
resp = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": ini},
headers={"Authorization": f"Bearer {auth_token}"},
)
# Should not fail due to limit
if resp.status_code == 409:
assert "limit" not in resp.json()["detail"].lower()
else:

View File

@@ -211,6 +211,47 @@ def mock_state_file(patch_state_file: Path):
patch_state_file.write_text(json.dumps(_test_state))
yield _test_state
@pytest.fixture
async def mock_fleet_deckies():
"""Seed fleet_deckies with two deckies — the store get_deckies() reads
under the Option-D source-of-truth model (development/ADR-001-...md).
Mirrors the data mock_state_file used to put in decnet-state.json."""
from decnet.config import DeckyConfig
from decnet.web.db.models import LOCAL_HOST_SENTINEL
from decnet.web.dependencies import repo
async def _clear() -> None:
for row in await repo.list_fleet_deckies():
await repo.delete_fleet_decky(
host_uuid=row.get("host_uuid") or LOCAL_HOST_SENTINEL,
name=row["name"],
)
specs = [
("test-decky-1", "192.168.1.10", ["ssh"], "debian", "test-host-1",
{"ssh": {"banner": "SSH-2.0-OpenSSH_8.9"}}, "deaddeck"),
("test-decky-2", "192.168.1.11", ["http"], "ubuntu", "test-host-2",
{}, None),
]
await _clear()
for name, ip, services, distro, hostname, svc_cfg, arche in specs:
cfg = DeckyConfig(
name=name, ip=ip, services=services, distro=distro,
base_image=distro, hostname=hostname,
service_config=svc_cfg, archetype=arche,
)
await repo.upsert_fleet_decky({
"host_uuid": LOCAL_HOST_SENTINEL,
"name": name,
"services": services,
"decky_config": cfg.model_dump(mode="json"),
"decky_ip": ip,
"state": "running",
})
yield
await _clear()
# Share fuzz settings across API tests
# FUZZ_EXAMPLES: keep low for dev speed; bump via HYPOTHESIS_MAX_EXAMPLES env var in CI
_FUZZ_EXAMPLES = int(_os.environ.get("HYPOTHESIS_MAX_EXAMPLES", "10"))

View File

@@ -5,6 +5,13 @@ Default behaviour (replace_fleet=False) appends the INI to the existing
fleet so the wizard's "deploy one more decky" submit no longer wipes
prior deckies. replace_fleet=True preserves the historical
set-desired-state semantics for CLI / declarative callers.
The existing fleet is read from fleet_deckies — the engine-mirrored table
written on every deploy/teardown (CLI or web), per the source-of-truth
model in fleet/reconciler.py. These tests seed fleet_deckies directly,
which also models the BUG-2 scenario: a fleet established out of band
(CLI/seed) that the web deploy guard must see and append to rather than
wipe. See development/ADR-001-FLEET-SOURCE-OF-TRUTH.md.
"""
from __future__ import annotations
@@ -12,6 +19,8 @@ from unittest.mock import patch
import pytest
from decnet.config import DeckyConfig
from decnet.web.db.models import LOCAL_HOST_SENTINEL
from decnet.web.dependencies import repo
@@ -28,96 +37,111 @@ def mock_network():
yield
async def _clear_fleet() -> None:
for row in await repo.list_fleet_deckies():
await repo.delete_fleet_decky(
host_uuid=row.get("host_uuid") or LOCAL_HOST_SENTINEL,
name=row["name"],
)
async def _seed_fleet(name: str, *, ip: str = "192.168.1.10", services=("ssh",)) -> None:
"""Insert a decky into fleet_deckies, as the engine mirror does on a
CLI/web deploy. Stamps a full DeckyConfig into decky_config so the deploy
guard can rehydrate it."""
cfg = DeckyConfig(
name=name,
ip=ip,
services=list(services),
distro="debian",
base_image="debian:bookworm-slim",
hostname=name,
)
await repo.upsert_fleet_decky({
"host_uuid": LOCAL_HOST_SENTINEL,
"name": name,
"services": list(services),
"decky_config": cfg.model_dump(mode="json"),
"decky_ip": ip,
"state": "running",
})
@pytest.fixture(autouse=True)
async def _isolate_state():
for row in await repo.list_swarm_hosts():
await repo.delete_swarm_host(row["uuid"])
await repo.set_state("deployment", None)
await _clear_fleet()
yield
await repo.set_state("deployment", None)
await _clear_fleet()
@pytest.mark.anyio
async def test_additive_default_appends_to_existing_fleet(client, auth_token, monkeypatch):
"""Two sequential deploys with replace_fleet unset → both deckies in state."""
async def test_additive_onto_existing_fleet_appends_not_wipes(client, auth_token, monkeypatch):
"""BUG-2 regression: an additive web deploy onto a fleet established out
of band (CLI/seed → fleet_deckies) appends rather than wiping it.
Previously the guard read State["deployment"] (empty for a CLI-seeded
fleet), so existing_deckies was [] and the reconciler tore the running
fleet down to the single submitted decky."""
monkeypatch.setenv("DECNET_MODE", "master")
await _seed_fleet("decky-01", ip="192.168.1.10")
r1 = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": "[decky-01]\nservices = ssh\n"},
headers={"Authorization": f"Bearer {auth_token}"},
)
assert r1.status_code == 202, r1.text
r2 = await client.post(
r = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": "[decky-02]\nservices = http\n"},
headers={"Authorization": f"Bearer {auth_token}"},
)
assert r2.status_code == 202, r2.text
assert r.status_code == 202, r.text
committed = await repo.get_state("deployment")
assert committed is not None
names = {d["name"] for d in committed["config"]["deckies"]}
names = {d["name"] for d in await repo.get_deckies()}
assert names == {"decky-01", "decky-02"}
@pytest.mark.anyio
async def test_additive_name_collision_returns_409(client, auth_token, monkeypatch):
"""Re-submitting an existing decky name without replace_fleet → 409."""
"""Submitting a decky whose name already exists in the fleet without
replace_fleet → 409."""
monkeypatch.setenv("DECNET_MODE", "master")
await _seed_fleet("decky-01")
r1 = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": "[decky-01]\nservices = ssh\n"},
headers={"Authorization": f"Bearer {auth_token}"},
)
assert r1.status_code == 202, r1.text
r2 = await client.post(
r = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": "[decky-01]\nservices = http\n"},
headers={"Authorization": f"Bearer {auth_token}"},
)
assert r2.status_code == 409, r2.text
assert "decky-01" in r2.json()["detail"]
assert "replace_fleet" in r2.json()["detail"]
assert r.status_code == 409, r.text
assert "decky-01" in r.json()["detail"]
assert "replace_fleet" in r.json()["detail"]
@pytest.mark.anyio
async def test_additive_ip_collision_returns_409(client, auth_token, monkeypatch):
"""A new decky pinned to an IP already in use → 409 with the IP."""
"""A new decky pinned to an IP already in use by the existing fleet → 409
with the IP."""
monkeypatch.setenv("DECNET_MODE", "master")
await _seed_fleet("decky-01", ip="192.168.1.50")
r1 = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": "[decky-01]\nservices = ssh\nip = 192.168.1.50\n"},
headers={"Authorization": f"Bearer {auth_token}"},
)
assert r1.status_code == 202, r1.text
r2 = await client.post(
r = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": "[decky-02]\nservices = http\nip = 192.168.1.50\n"},
headers={"Authorization": f"Bearer {auth_token}"},
)
assert r2.status_code == 409, r2.text
assert "192.168.1.50" in r2.json()["detail"]
assert r.status_code == 409, r.text
assert "192.168.1.50" in r.json()["detail"]
@pytest.mark.anyio
async def test_replace_fleet_true_overwrites_existing(client, auth_token, monkeypatch):
"""replace_fleet=True preserves the historical full-replace semantics."""
"""replace_fleet=True preserves the historical full-replace semantics:
the existing fleet is dropped and the committed inventory is exactly the
submitted INI."""
monkeypatch.setenv("DECNET_MODE", "master")
await _seed_fleet("decky-01")
r1 = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": "[decky-01]\nservices = ssh\n"},
headers={"Authorization": f"Bearer {auth_token}"},
)
assert r1.status_code == 202, r1.text
r2 = await client.post(
r = await client.post(
"/api/v1/deckies/deploy",
json={
"ini_content": "[decky-02]\nservices = http\n",
@@ -125,11 +149,9 @@ async def test_replace_fleet_true_overwrites_existing(client, auth_token, monkey
},
headers={"Authorization": f"Bearer {auth_token}"},
)
assert r2.status_code == 202, r2.text
assert r.status_code == 202, r.text
committed = await repo.get_state("deployment")
assert committed is not None
names = {d["name"] for d in committed["config"]["deckies"]}
names = {d["name"] for d in await repo.get_deckies()}
assert names == {"decky-02"}
@@ -139,25 +161,16 @@ async def test_additive_lifecycle_ids_scoped_to_new_deckies(client, auth_token,
the caller submitted, not carryover. Operators polling
/deckies/lifecycle?ids=... see exactly what this call deployed."""
monkeypatch.setenv("DECNET_MODE", "master")
await _seed_fleet("decky-01", ip="192.168.1.10")
await _seed_fleet("decky-02", ip="192.168.1.11")
r1 = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": "[decky-01]\nservices = ssh\n[decky-02]\nservices = http\n"},
headers={"Authorization": f"Bearer {auth_token}"},
)
assert r1.status_code == 202, r1.text
assert len(r1.json()["lifecycle_ids"]) == 2
r2 = await client.post(
r = await client.post(
"/api/v1/deckies/deploy",
json={"ini_content": "[decky-03]\nservices = ssh\n"},
headers={"Authorization": f"Bearer {auth_token}"},
)
assert r2.status_code == 202, r2.text
body2 = r2.json()
assert len(body2["lifecycle_ids"]) == 1
assert r.status_code == 202, r.text
assert len(r.json()["lifecycle_ids"]) == 1
committed = await repo.get_state("deployment")
assert committed is not None
names = {d["name"] for d in committed["config"]["deckies"]}
names = {d["name"] for d in await repo.get_deckies()}
assert names == {"decky-01", "decky-02", "decky-03"}

View File

@@ -5,7 +5,7 @@ from hypothesis import given, settings, strategies as st
from ..conftest import _FUZZ_SETTINGS
@pytest.mark.anyio
async def test_get_deckies_endpoint(mock_state_file, client: httpx.AsyncClient, auth_token: str):
async def test_get_deckies_endpoint(mock_fleet_deckies, client: httpx.AsyncClient, auth_token: str):
_response = await client.get("/api/v1/deckies", headers={"Authorization": f"Bearer {auth_token}"})
assert _response.status_code == 200
_data = _response.json()