merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

View File

@@ -0,0 +1,198 @@
"""Allocator unit + integration tests."""
from __future__ import annotations
import pytest
from decnet.topology.allocator import (
AllocatorExhausted,
IPAllocator,
SubnetAllocator,
reserved_subnets,
)
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.persistence import persist, transition_status
from decnet.topology.status import TopologyStatus
from decnet.web.db.factory import get_repository
# --------------------------------------------------------------------- IPAllocator
def test_ip_allocator_sequential_skips_gateway():
a = IPAllocator("10.0.0.0/29") # hosts: .1 .. .6; .1 is gateway
got = [a.next_free() for _ in range(5)]
assert got == ["10.0.0.2", "10.0.0.3", "10.0.0.4", "10.0.0.5", "10.0.0.6"]
def test_ip_allocator_reserve_release_roundtrip():
a = IPAllocator("10.0.0.0/29")
a.reserve("10.0.0.3")
assert not a.is_free("10.0.0.3")
a.release("10.0.0.3")
assert a.is_free("10.0.0.3")
def test_ip_allocator_reserve_rejects_gateway():
a = IPAllocator("10.0.0.0/29")
with pytest.raises(ValueError):
a.reserve("10.0.0.1")
def test_ip_allocator_reserve_rejects_out_of_subnet():
a = IPAllocator("10.0.0.0/29")
with pytest.raises(ValueError):
a.reserve("10.0.0.100")
def test_ip_allocator_next_free_after_reserve_skips():
a = IPAllocator("10.0.0.0/29")
a.reserve("10.0.0.2")
assert a.next_free() == "10.0.0.3"
def test_ip_allocator_exhaustion_raises():
a = IPAllocator("10.0.0.0/30") # hosts: .1 .. .2; .1 gateway → only .2 usable
assert a.next_free() == "10.0.0.2"
with pytest.raises(AllocatorExhausted):
a.next_free()
# --------------------------------------------------------------------- SubnetAllocator
def test_subnet_allocator_sequential():
s = SubnetAllocator("172.20")
assert s.next_free() == "172.20.0.0/24"
assert s.next_free() == "172.20.1.0/24"
assert s.next_free() == "172.20.2.0/24"
def test_subnet_allocator_skips_reserved():
s = SubnetAllocator("172.20", reserved={"172.20.0.0/24", "172.20.1.0/24"})
assert s.next_free() == "172.20.2.0/24"
def test_subnet_allocator_reserve_is_idempotent():
s = SubnetAllocator("172.20")
s.reserve("172.20.0.0/24")
assert s.next_free() == "172.20.1.0/24"
def test_subnet_allocator_exhaustion_raises():
reserved = {f"10.0.{i}.0/24" for i in range(256)}
s = SubnetAllocator("10.0", reserved=reserved)
with pytest.raises(AllocatorExhausted):
s.next_free()
def test_subnet_allocator_accepts_cidr_base():
"""Full-CIDR base form is equivalent to the legacy two-octet form."""
s = SubnetAllocator("172.20.0.0/16")
assert s.next_free() == "172.20.0.0/24"
assert s.next_free() == "172.20.1.0/24"
def test_subnet_allocator_slash12_yields_more_than_256_slots():
"""The whole point of widening: a /12 base must outlast a single /16."""
s = SubnetAllocator("172.16.0.0/12")
# Burn the first 256 /24s. With a /16 base this is exhaustion; with
# /12 we should roll into 172.17.x.x without raising.
for _ in range(256):
s.next_free()
nxt = s.next_free()
assert nxt.startswith("172.17.")
assert nxt.endswith(".0/24")
def test_subnet_allocator_slash12_total_capacity_is_4096():
s = SubnetAllocator("172.16.0.0/12")
count = 0
try:
while True:
s.next_free()
count += 1
except AllocatorExhausted:
pass
assert count == 4096
def test_subnet_allocator_rejects_narrower_than_slash24():
with pytest.raises(ValueError, match="narrower than /24"):
SubnetAllocator("192.168.1.0/25")
def test_subnet_allocator_exhausted_message_uses_parent_cidr():
s = SubnetAllocator("172.20.0.0/24") # exactly one slot
s.next_free()
with pytest.raises(AllocatorExhausted, match="172.20.0.0/24"):
s.next_free()
# --------------------------------------------------------------------- reserved_subnets
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="alloc",
depth=1,
branching_factor=1,
deckies_per_lan_min=1,
deckies_per_lan_max=1,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=3,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "alloc.db"))
await r.initialize()
return r
@pytest.mark.anyio
async def test_reserved_subnets_includes_pending_and_active(repo):
plan_a = generate(_cfg(name="a"))
tid_a = await persist(repo, plan_a) # pending
plan_b = generate(_cfg(name="b", subnet_base_prefix="172.21"))
tid_b = await persist(repo, plan_b)
await transition_status(repo, tid_b, TopologyStatus.DEPLOYING)
# DEPLOYING → ACTIVE
await transition_status(repo, tid_b, TopologyStatus.ACTIVE)
claimed = await reserved_subnets(repo)
for lan in plan_a.lans:
assert lan.subnet in claimed
for lan in plan_b.lans:
assert lan.subnet in claimed
@pytest.mark.anyio
async def test_reserved_subnets_excludes_torn_down(repo):
plan = generate(_cfg(name="gone"))
tid = await persist(repo, plan)
# pending → torn_down is legal
await transition_status(repo, tid, TopologyStatus.TORN_DOWN)
claimed = await reserved_subnets(repo)
for lan in plan.lans:
assert lan.subnet not in claimed
@pytest.mark.anyio
async def test_generate_respects_reserved(repo):
plan_a = generate(_cfg(name="a"))
await persist(repo, plan_a)
claimed = await reserved_subnets(repo)
# Second topology on the same base, told about reservations: must
# pick subnets not in the first one's set.
plan_b = generate(_cfg(name="b"), reserved_subnets=claimed)
b_subnets = {lan.subnet for lan in plan_b.lans}
a_subnets = {lan.subnet for lan in plan_a.lans}
assert b_subnets.isdisjoint(a_subnets)

View File

@@ -0,0 +1,135 @@
"""MazeNET compose-generator + teardown-order tests."""
from __future__ import annotations
import pytest
from decnet.engine.deployer import _teardown_order
from decnet.topology.compose import (
_container_name,
_network_name,
generate_topology_compose,
)
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.persistence import hydrate, persist
from decnet.web.db.factory import get_repository
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="cmp",
depth=2,
branching_factor=2,
deckies_per_lan_min=1,
deckies_per_lan_max=1,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=9,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "compose.db"))
await r.initialize()
return r
@pytest.mark.anyio
async def test_compose_has_one_network_per_lan(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
data = generate_topology_compose(hydrated)
assert set(data["networks"].keys()) == {
_network_name(tid, lan.name) for lan in plan.lans
}
for net in data["networks"].values():
assert net["external"] is True
@pytest.mark.anyio
async def test_compose_multi_home_bridge_decky(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
data = generate_topology_compose(hydrated)
# Every bridge decky (multi-homed) must list ≥2 networks in its base.
for decky in hydrated["deckies"]:
cfg = decky["decky_config"]
base = data["services"][cfg["name"]]
assert base["container_name"] == _container_name(tid, cfg["name"])
assert len(base["networks"]) == len(cfg["ips_by_lan"])
for lan_name, ip in cfg["ips_by_lan"].items():
net_key = _network_name(tid, lan_name)
assert base["networks"][net_key]["ipv4_address"] == ip
@pytest.mark.anyio
async def test_compose_forwards_l3_sets_sysctl(repo):
# Force every bridge to forward L3, then assert at least one base has it.
plan = generate(_cfg(bridge_forward_probability=1.0))
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
data = generate_topology_compose(hydrated)
forwarders = [
d for d in hydrated["deckies"]
if d["decky_config"].get("forwards_l3")
]
assert forwarders, "expected at least one forwarding bridge decky"
for d in forwarders:
base = data["services"][d["decky_config"]["name"]]
assert base["sysctls"]["net.ipv4.ip_forward"] == 1
assert "NET_ADMIN" in base["cap_add"]
@pytest.mark.anyio
async def test_compose_labels_service_containers_for_collector(repo):
"""Service fragments must carry ``decnet.topology.service=true`` so
the host-side collector picks up their log streams — the old fleet
state file never mentions topology containers."""
plan = generate(_cfg())
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
data = generate_topology_compose(hydrated)
service_keys = [
k for k in data["services"]
if "-" in k and k not in {d["decky_config"]["name"] for d in hydrated["deckies"]}
]
assert service_keys, "expected at least one service container"
for k in service_keys:
labels = data["services"][k].get("labels") or {}
assert labels.get("decnet.topology.service") == "true", (
f"service {k!r} missing collector-discovery label: {labels}"
)
assert labels.get("decnet.topology.id") == tid
assert "decnet.topology.decky" in labels
assert "decnet.topology.service_name" in labels
# Base containers get their own label (role=base) but MUST NOT carry
# the service marker — otherwise the collector double-attaches.
base_keys = {d["decky_config"]["name"] for d in hydrated["deckies"]}
for k in base_keys:
labels = data["services"][k].get("labels") or {}
assert labels.get("decnet.topology.role") == "base"
assert labels.get("decnet.topology.service") != "true"
def test_teardown_order_is_leaf_first():
lans = [
{"name": "LAN-00"},
{"name": "LAN-01"},
{"name": "LAN-02"},
{"name": "LAN-03"},
]
order = _teardown_order(lans)
assert order == ["LAN-03", "LAN-02", "LAN-01", "LAN-00"]
# DMZ is last — nothing should be torn down after LAN-00.
assert order[-1] == "LAN-00"

View File

@@ -0,0 +1,118 @@
"""Optimistic-concurrency (version) checks on topology child mutations."""
from __future__ import annotations
import pytest
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.persistence import persist
from decnet.topology.status import VersionConflict
from decnet.web.db.factory import get_repository
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="ver",
depth=1,
branching_factor=1,
deckies_per_lan_min=1,
deckies_per_lan_max=1,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=2,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "ver.db"))
await r.initialize()
return r
@pytest.mark.anyio
async def test_version_starts_at_one_after_persist(repo):
plan = generate(_cfg())
# persist() adds LANs/deckies/edges without expected_version, so
# the version token stays at 1.
tid = await persist(repo, plan)
topo = await repo.get_topology(tid)
assert topo["version"] == 1
@pytest.mark.anyio
async def test_happy_path_two_sequential_writes(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
await repo.add_lan(
{"topology_id": tid, "name": "LAN-A", "subnet": "10.9.0.0/24", "is_dmz": False},
expected_version=1,
)
assert (await repo.get_topology(tid))["version"] == 2
await repo.add_lan(
{"topology_id": tid, "name": "LAN-B", "subnet": "10.9.1.0/24", "is_dmz": False},
expected_version=2,
)
assert (await repo.get_topology(tid))["version"] == 3
@pytest.mark.anyio
async def test_stale_expected_version_raises(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
await repo.add_lan(
{"topology_id": tid, "name": "LAN-A", "subnet": "10.8.0.0/24", "is_dmz": False},
expected_version=1,
)
with pytest.raises(VersionConflict) as ei:
await repo.add_lan(
{"topology_id": tid, "name": "LAN-B", "subnet": "10.8.1.0/24", "is_dmz": False},
expected_version=1, # stale
)
assert ei.value.current == 2
assert ei.value.expected == 1
@pytest.mark.anyio
async def test_no_expected_version_skips_check(repo):
"""Existing callers (persist) don't pass expected_version and must
continue to work without version bumps."""
plan = generate(_cfg())
tid = await persist(repo, plan)
before = (await repo.get_topology(tid))["version"]
await repo.add_lan(
{"topology_id": tid, "name": "LAN-X", "subnet": "10.7.0.0/24", "is_dmz": False}
)
after = (await repo.get_topology(tid))["version"]
assert before == after # no bump when version not asserted
@pytest.mark.anyio
async def test_update_topology_decky_bumps_version(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
decky = (await repo.list_topology_deckies(tid))[0]
await repo.update_topology_decky(
decky["uuid"],
{"decky_config": {"name": decky["name"], "services": ["ssh"],
"ips_by_lan": decky["decky_config"]["ips_by_lan"],
"forwards_l3": False,
"service_config": {"ssh": {"password": "x"}}}},
expected_version=1,
)
assert (await repo.get_topology(tid))["version"] == 2
@pytest.mark.anyio
async def test_update_lan_bumps_version(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
lan = (await repo.list_lans_for_topology(tid))[0]
await repo.update_lan(lan["id"], {"name": "LAN-RENAMED"}, expected_version=1)
assert (await repo.get_topology(tid))["version"] == 2

View File

@@ -0,0 +1,236 @@
"""Deploy/teardown integration tests for MazeNET topologies.
Docker-touching paths live behind ``@pytest.mark.live`` per
feedback_skip_heavy_tests.md. The non-live path here exercises dry-run
deploy (compose file is written, repo status is left untouched) and the
state-machine around failure/teardown using a stub repo.
"""
from __future__ import annotations
from pathlib import Path
from unittest.mock import patch
import pytest
from decnet.engine.deployer import (
_teardown_order,
_topology_compose_path,
deploy_topology,
teardown_topology,
)
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.persistence import persist
from decnet.topology.status import TopologyStatus
from decnet.web.db.factory import get_repository
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="dep",
depth=2,
branching_factor=2,
deckies_per_lan_min=1,
deckies_per_lan_max=1,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=11,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "dep.db"))
await r.initialize()
return r
@pytest.mark.anyio
async def test_dry_run_writes_compose_and_preserves_pending(repo, tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
plan = generate(_cfg())
tid = await persist(repo, plan)
await deploy_topology(repo, tid, dry_run=True)
compose_path = _topology_compose_path(tid)
assert compose_path.exists(), "dry run must emit a compose file"
topo = await repo.get_topology(tid)
assert topo["status"] == TopologyStatus.PENDING, (
"dry run must not transition status"
)
@pytest.mark.anyio
async def test_deploy_failure_transitions_to_failed(repo, tmp_path, monkeypatch):
"""If compose-up fails, status lands at FAILED with the reason logged."""
monkeypatch.chdir(tmp_path)
plan = generate(_cfg())
tid = await persist(repo, plan)
class _BoomClient:
def __init__(self):
self.networks = self
def list(self, names=None, filters=None): # noqa: ARG002
return []
def create(self, *a, **kw): # noqa: ARG002
raise RuntimeError("boom: docker daemon unreachable")
with patch("decnet.engine.deployer.docker.from_env", return_value=_BoomClient()):
with pytest.raises(RuntimeError, match="boom"):
await deploy_topology(repo, tid)
topo = await repo.get_topology(tid)
assert topo["status"] == TopologyStatus.FAILED
events = await repo.list_topology_status_events(tid)
# Events are returned newest-first.
last = events[0]
assert last["to_status"] == TopologyStatus.FAILED
assert "boom" in (last["reason"] or "")
@pytest.mark.anyio
async def test_deploy_failure_rolls_back_created_networks(repo, tmp_path, monkeypatch):
"""Networks created before the failing op must be removed on rollback.
Reproduces the ``Pool overlaps`` regression: a failed deploy left
partial networks alive and the next deploy hit an IPAM conflict."""
monkeypatch.chdir(tmp_path)
plan = generate(_cfg())
tid = await persist(repo, plan)
class _PartialClient:
def __init__(self):
self.networks = self
self.created: list[str] = []
self.removed: list[str] = []
self._call = 0
self._created_objs: dict[str, _FakeNet] = {}
def list(self, names=None, filters=None): # noqa: ARG002
if not names:
return []
return [self._created_objs[n] for n in names if n in self._created_objs]
def create(self, name, *a, **kw): # noqa: ARG002
self._call += 1
# Succeed on the first N-1 creates, blow up on the last.
if self._call >= 2:
raise RuntimeError("boom: pool overlap")
self.created.append(name)
obj = _FakeNet(name, self)
self._created_objs[name] = obj
return obj
class _FakeNet:
def __init__(self, name, client):
self.name = name
self.id = f"id-{name}"
self.attrs = {"Containers": {}}
self._client = client
def remove(self):
self._client.removed.append(self.name)
self._client._created_objs.pop(self.name, None)
fake = _PartialClient()
with patch("decnet.engine.deployer.docker.from_env", return_value=fake):
with patch("decnet.engine.deployer._compose") as mock_down:
with pytest.raises(RuntimeError, match="boom"):
await deploy_topology(repo, tid)
# compose down is invoked only when compose was actually started
# OR a partial compose file exists; create_bridge_network failed
# before write_topology_compose, so _compose should not have run.
mock_down.assert_not_called()
# Every network created this attempt must have been removed on rollback.
assert set(fake.removed) == set(fake.created)
topo = await repo.get_topology(tid)
assert topo["status"] == TopologyStatus.FAILED
@pytest.mark.anyio
async def test_teardown_from_failed_marks_torn_down(repo, tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
plan = generate(_cfg())
tid = await persist(repo, plan)
# Drive it into FAILED directly via the legal path.
from decnet.topology.persistence import transition_status
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
await transition_status(repo, tid, TopologyStatus.FAILED, reason="test")
class _StubClient:
def __init__(self):
self.networks = self
def list(self, names=None, filters=None): # noqa: ARG002
return []
with patch("decnet.engine.deployer.docker.from_env", return_value=_StubClient()):
await teardown_topology(repo, tid)
topo = await repo.get_topology(tid)
assert topo["status"] == TopologyStatus.TORN_DOWN
def test_teardown_order_is_stable():
lans = [{"name": f"LAN-{i:02d}"} for i in range(5)]
assert _teardown_order(lans) == [
"LAN-04", "LAN-03", "LAN-02", "LAN-01", "LAN-00",
]
@pytest.mark.live
@pytest.mark.anyio
async def test_deploy_and_teardown_against_real_docker(repo, tmp_path, monkeypatch):
"""End-to-end: create real Docker bridge networks, verify, tear down.
Skipped on CI; run locally with ``pytest -m live tests/topology``.
Does NOT run ``docker compose up`` — that's exercised by the flat
fleet tests. This test covers the topology-specific paths only
(LAN network creation, multi-home bridge wiring, teardown order).
"""
monkeypatch.chdir(tmp_path)
docker = pytest.importorskip("docker")
try:
client = docker.from_env()
client.ping()
except Exception as exc: # pragma: no cover - environment-specific
pytest.skip(f"docker daemon not reachable: {exc}")
plan = generate(_cfg(depth=1, branching_factor=1))
tid = await persist(repo, plan)
from decnet.topology.compose import _network_name
try:
await deploy_topology(repo, tid, dry_run=True)
# Dry run doesn't create networks. Now exercise the real path by
# creating just the networks (no compose up) and tearing down.
from decnet.network import create_bridge_network, remove_bridge_network
for lan in plan.lans:
create_bridge_network(
client,
_network_name(tid, lan.name),
lan.subnet,
internal=not lan.is_dmz,
)
existing = {n.name for n in client.networks.list()}
for lan in plan.lans:
assert _network_name(tid, lan.name) in existing
finally:
for lan in plan.lans:
remove_bridge_network(client, _network_name(tid, lan.name))
remaining = {n.name for n in client.networks.list()}
for lan in plan.lans:
assert _network_name(tid, lan.name) not in remaining
# Compose artifact cleanup
p = _topology_compose_path(tid)
if p.exists():
p.unlink()
# Sanity: Path roundtrip still resolvable
assert isinstance(Path(str(p)), Path)

View File

@@ -0,0 +1,168 @@
"""Agent-branch routing inside deploy_topology / teardown_topology.
Exercises the target_host_uuid branch added in Step 6. We never hit a
real agent — AgentClient is swapped out for a recording fake so we
assert the right hydrated blob + version hash are forwarded and the
master's status machine advances as expected.
"""
from __future__ import annotations
from typing import Any
import pytest
from decnet.engine import deployer as _deployer
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.hashing import canonical_hash
from decnet.topology.persistence import persist
from decnet.topology.status import TopologyStatus
from decnet.web.db.factory import get_repository
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="agent-branch",
mode="agent",
depth=1,
branching_factor=1,
deckies_per_lan_min=1,
deckies_per_lan_max=1,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=7,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "agent-branch.db"))
await r.initialize()
return r
async def _seed_host(repo, uuid_: str = "h-1") -> None:
await repo.add_swarm_host(
{
"uuid": uuid_,
"name": f"host-{uuid_}",
"address": "10.9.9.9",
"agent_port": 8765,
"status": "active",
"client_cert_fingerprint": "a" * 64,
"cert_bundle_path": "/tmp/ignored",
}
)
class _FakeAgentClient:
"""Records every call; never touches the network."""
instances: list["_FakeAgentClient"] = []
def __init__(self, *, host: dict[str, Any]) -> None:
self.host = host
self.calls: list[tuple[str, tuple, dict]] = []
_FakeAgentClient.instances.append(self)
async def __aenter__(self) -> "_FakeAgentClient":
return self
async def __aexit__(self, *_exc) -> None:
return None
async def apply_topology(self, hydrated, version_hash):
self.calls.append(("apply", (hydrated, version_hash), {}))
return {"status": "applied", "version_hash": version_hash}
async def teardown_topology(self, topology_id):
self.calls.append(("teardown", (topology_id,), {}))
return {"status": "torn_down", "topology_id": topology_id}
@pytest.fixture
def fake_agent(monkeypatch: pytest.MonkeyPatch):
_FakeAgentClient.instances.clear()
# Patch the import site inside the local functions; they do
# `from decnet.swarm.client import AgentClient` at call time.
import decnet.swarm.client as _swarm_client
monkeypatch.setattr(_swarm_client, "AgentClient", _FakeAgentClient)
return _FakeAgentClient
@pytest.mark.anyio
async def test_deploy_on_agent_routes_via_agent_client(repo, fake_agent) -> None:
await _seed_host(repo, "h-deploy")
plan = generate(_cfg())
tid = await persist(repo, plan, target_host_uuid="h-deploy")
await _deployer.deploy_topology(repo, tid)
# Exactly one AgentClient, one apply call.
assert len(fake_agent.instances) == 1
inst = fake_agent.instances[0]
assert inst.host["uuid"] == "h-deploy"
assert len(inst.calls) == 1
verb, (hydrated, version_hash), _ = inst.calls[0]
assert verb == "apply"
assert hydrated["topology"]["id"] == tid
assert version_hash == canonical_hash(hydrated)
topo = await repo.get_topology(tid)
assert topo["status"] == TopologyStatus.ACTIVE
@pytest.mark.anyio
async def test_deploy_on_agent_failure_marks_failed(repo, monkeypatch) -> None:
await _seed_host(repo, "h-fail")
plan = generate(_cfg(name="agent-fail"))
tid = await persist(repo, plan, target_host_uuid="h-fail")
class _BoomClient(_FakeAgentClient):
async def apply_topology(self, hydrated, version_hash):
raise RuntimeError("agent refused")
import decnet.swarm.client as _swarm_client
monkeypatch.setattr(_swarm_client, "AgentClient", _BoomClient)
with pytest.raises(RuntimeError, match="agent refused"):
await _deployer.deploy_topology(repo, tid)
topo = await repo.get_topology(tid)
assert topo["status"] == TopologyStatus.FAILED
@pytest.mark.anyio
async def test_deploy_on_agent_unknown_host_raises(repo, fake_agent) -> None:
plan = generate(_cfg(name="agent-missing"))
tid = await persist(repo, plan, target_host_uuid="nope")
with pytest.raises(ValueError, match="unknown swarm host"):
await _deployer.deploy_topology(repo, tid)
# No AgentClient should ever be constructed for a nonexistent host.
assert fake_agent.instances == []
@pytest.mark.anyio
async def test_teardown_on_agent_routes_via_agent_client(repo, fake_agent) -> None:
await _seed_host(repo, "h-teardown")
plan = generate(_cfg(name="agent-down"))
tid = await persist(repo, plan, target_host_uuid="h-teardown")
# Seed into an ACTIVE state the teardown will accept.
from decnet.topology.persistence import transition_status
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
await transition_status(repo, tid, TopologyStatus.ACTIVE)
await _deployer.teardown_topology(repo, tid)
inst = fake_agent.instances[-1]
assert inst.host["uuid"] == "h-teardown"
assert inst.calls == [("teardown", (tid,), {})]
topo = await repo.get_topology(tid)
assert topo["status"] == TopologyStatus.TORN_DOWN

View File

@@ -0,0 +1,132 @@
"""Pre-deploy mutation repo methods: pending-only, version-aware."""
from __future__ import annotations
import pytest
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.persistence import persist, transition_status
from decnet.topology.status import TopologyNotEditable, TopologyStatus
from decnet.web.db.factory import get_repository
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="edit",
depth=1,
branching_factor=1,
deckies_per_lan_min=2,
deckies_per_lan_max=2,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=6,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "edit.db"))
await r.initialize()
return r
@pytest.mark.anyio
async def test_add_lan_to_pending_bumps_version(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
await repo.add_lan(
{"topology_id": tid, "name": "LAN-NEW", "subnet": "10.55.0.0/24", "is_dmz": False},
expected_version=1,
)
topo = await repo.get_topology(tid)
assert topo["version"] == 2
lans = {l["name"] for l in await repo.list_lans_for_topology(tid)}
assert "LAN-NEW" in lans
@pytest.mark.anyio
async def test_update_decky_roundtrips_service_config(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
decky = (await repo.list_topology_deckies(tid))[0]
patch = dict(decky["decky_config"])
patch["service_config"] = {"ssh": {"password": "megapassword"}}
await repo.update_topology_decky(
decky["uuid"], {"decky_config": patch}, expected_version=1,
)
fresh = next(
d for d in await repo.list_topology_deckies(tid)
if d["uuid"] == decky["uuid"]
)
assert fresh["decky_config"]["service_config"]["ssh"]["password"] == "megapassword"
@pytest.mark.anyio
async def test_update_decky_rejected_on_active_topology(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
decky = (await repo.list_topology_deckies(tid))[0]
# pending → deploying → active
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
await transition_status(repo, tid, TopologyStatus.ACTIVE)
with pytest.raises(TopologyNotEditable) as ei:
await repo.update_topology_decky(
decky["uuid"],
{"decky_config": decky["decky_config"]},
enforce_pending=True,
)
assert ei.value.status == TopologyStatus.ACTIVE
@pytest.mark.anyio
async def test_delete_lan_with_home_decky_refused(repo):
"""A LAN whose decky has no other edge cannot be deleted — it'd orphan."""
plan = generate(_cfg(depth=1, branching_factor=1, deckies_per_lan_max=1, deckies_per_lan_min=1))
tid = await persist(repo, plan)
lan = (await repo.list_lans_for_topology(tid))[0]
with pytest.raises(ValueError, match="orphaned"):
await repo.delete_lan(lan["id"])
@pytest.mark.anyio
async def test_delete_edge_leaves_decky_intact(repo):
"""Deleting one bridge edge of a multi-homed decky should succeed."""
# depth=1 branching=1 gives DMZ(LAN-00) + LAN-01 with a bridge decky.
plan = generate(_cfg())
tid = await persist(repo, plan)
edges = await repo.list_topology_edges(tid)
bridge_edges = [e for e in edges if e["is_bridge"]]
assert bridge_edges, "generator should produce at least one bridge edge"
# Delete exactly one — the bridge decky should keep at least one edge.
edge = bridge_edges[0]
before_deckies = {d["uuid"] for d in await repo.list_topology_deckies(tid)}
await repo.delete_topology_edge(edge["id"])
after_deckies = {d["uuid"] for d in await repo.list_topology_deckies(tid)}
assert before_deckies == after_deckies
remaining = await repo.list_topology_edges(tid)
assert edge["id"] not in {e["id"] for e in remaining}
@pytest.mark.anyio
async def test_delete_decky_cascades_edges(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
decky = (await repo.list_topology_deckies(tid))[0]
await repo.delete_topology_decky(decky["uuid"])
# No edge pointing to the removed decky remains.
remaining = await repo.list_topology_edges(tid)
assert decky["uuid"] not in {e["decky_uuid"] for e in remaining}
@pytest.mark.anyio
async def test_delete_edge_rejected_on_active(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
edges = await repo.list_topology_edges(tid)
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
await transition_status(repo, tid, TopologyStatus.ACTIVE)
with pytest.raises(TopologyNotEditable):
await repo.delete_topology_edge(edges[0]["id"])

View File

@@ -0,0 +1,137 @@
"""MazeNET generator determinism + DAG shape tests."""
from __future__ import annotations
from collections import Counter
import pytest
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="test",
depth=3,
branching_factor=2,
deckies_per_lan_min=2,
deckies_per_lan_max=2,
bridge_forward_probability=1.0,
cross_edge_probability=0.0,
randomize_services=True,
seed=42,
)
base.update(kw)
return TopologyConfig(**base)
def test_seed_is_deterministic():
a = generate(_cfg())
b = generate(_cfg())
# Same structure: same LAN names, same decky names, same edge set.
assert [lan.name for lan in a.lans] == [lan.name for lan in b.lans]
assert [d.name for d in a.deckies] == [d.name for d in b.deckies]
assert [(d.name, sorted(d.services)) for d in a.deckies] == [
(d.name, sorted(d.services)) for d in b.deckies
]
assert sorted((e.decky_name, e.lan_name) for e in a.edges) == sorted(
(e.decky_name, e.lan_name) for e in b.edges
)
def test_different_seed_yields_different_structure():
a = generate(_cfg(seed=1))
b = generate(_cfg(seed=2))
# With modest depth/branching, at least one of structure, service
# assignment, or edge count will differ — fail only if everything is
# byte-identical, which would indicate the seed is being ignored.
a_sig = (
[lan.name for lan in a.lans],
[(d.name, sorted(d.services)) for d in a.deckies],
sorted((e.decky_name, e.lan_name) for e in a.edges),
)
b_sig = (
[lan.name for lan in b.lans],
[(d.name, sorted(d.services)) for d in b.deckies],
sorted((e.decky_name, e.lan_name) for e in b.edges),
)
assert a_sig != b_sig
def test_dmz_is_exactly_one_lan():
t = generate(_cfg())
dmz = [lan for lan in t.lans if lan.is_dmz]
assert len(dmz) == 1
assert dmz[0].parent is None
assert dmz[0].name == "LAN-00"
def test_every_non_dmz_lan_has_exactly_one_bridge_into_parent():
t = generate(_cfg(branching_factor=2, depth=3))
# For each non-DMZ LAN, find the decky that is multi-homed to its parent.
for lan in t.lans:
if lan.is_dmz:
continue
bridges_to_parent = [
d for d in t.deckies
if lan.name in d.ips_by_lan and lan.parent in d.ips_by_lan
]
assert len(bridges_to_parent) >= 1, (
f"{lan.name} has no bridge into parent {lan.parent}"
)
def test_cross_edge_probability_zero_yields_tree():
"""With cross_edge_probability=0, a decky is bridged only to its home
LAN and (if it's the chosen bridge) its parent LAN — never to a
sibling or cousin. Validates by checking no decky is connected to
both a parent AND a non-parent non-home LAN."""
t = generate(_cfg(cross_edge_probability=0.0))
lans_by_name = {lan.name: lan for lan in t.lans}
for d in t.deckies:
if len(d.ips_by_lan) <= 1:
continue
# Home LAN = first membership. Other memberships must all be
# the parent of the home LAN, i.e. a single parent bridge.
home = next(iter(d.ips_by_lan))
others = [name for name in list(d.ips_by_lan.keys())[1:]]
parent = lans_by_name[home].parent
assert all(o == parent for o in others), (
f"tree mode but decky {d.name} bridges {home}{others} (parent={parent})"
)
def test_cross_edge_probability_one_produces_cross_edges_over_runs():
"""With probability=1, every non-DMZ LAN rolls a cross-edge (may be
skipped if no valid peer), so across a moderately branching topology
we expect ≥1 cross-edge."""
t = generate(_cfg(cross_edge_probability=1.0, depth=3, branching_factor=3))
lans_by_name = {lan.name: lan for lan in t.lans}
cross_edges = 0
for d in t.deckies:
if len(d.ips_by_lan) < 2:
continue
home = next(iter(d.ips_by_lan))
others = list(d.ips_by_lan.keys())[1:]
parent = lans_by_name[home].parent
for o in others:
if o != parent:
cross_edges += 1
assert cross_edges >= 1
def test_every_decky_has_at_least_one_edge():
t = generate(_cfg())
edge_deckies = Counter(e.decky_name for e in t.edges)
for d in t.deckies:
assert edge_deckies[d.name] >= 1
def test_dmz_has_exactly_one_decky():
t = generate(_cfg(deckies_per_lan_min=5, deckies_per_lan_max=5))
dmz_edges = [e for e in t.edges if e.lan_name == "LAN-00"]
# The DMZ LAN itself gets 1 decky + possibly acts as parent for
# bridge deckies from LAN-01/LAN-02 etc. The "home" decky count
# should be exactly 1.
home_only = [e for e in dmz_edges if not e.is_bridge]
assert len(home_only) == 1

View File

@@ -0,0 +1,80 @@
"""Tests for :mod:`decnet.topology.hashing`."""
from __future__ import annotations
import copy
from decnet.topology.hashing import canonical_hash
def _sample() -> dict:
return {
"topology": {
"id": "t1",
"name": "n",
"mode": "agent",
"target_host_uuid": "h1",
"status": "deploying",
"version": 3,
"created_at": "2026-04-21T00:00:00+00:00",
},
"lans": [
{"id": "l1", "name": "dmz", "subnet": "10.0.0.0/24", "is_dmz": True,
"x": 40, "y": 40},
],
"deckies": [
{
"uuid": "d1",
"name": "gw",
"services": ["ssh"],
"decky_config": {"archetype": "deaddeck", "forwards_l3": True},
"state": "pending",
"x": 10,
"y": 20,
}
],
"edges": [
{"id": "e1", "decky_uuid": "d1", "lan_id": "l1",
"is_bridge": True, "forwards_l3": True},
],
}
def test_hash_is_stable() -> None:
assert canonical_hash(_sample()) == canonical_hash(_sample())
def test_key_order_does_not_matter() -> None:
a = _sample()
b = {
"edges": a["edges"],
"deckies": a["deckies"],
"lans": a["lans"],
"topology": a["topology"],
}
assert canonical_hash(a) == canonical_hash(b)
def test_volatile_fields_ignored() -> None:
a = _sample()
b = copy.deepcopy(a)
b["topology"]["status"] = "active"
b["topology"]["version"] = 99
b["topology"]["status_changed_at"] = "2099-01-01T00:00:00+00:00"
b["deckies"][0]["last_error"] = "transient"
b["deckies"][0]["x"] = 9999
b["lans"][0]["y"] = 12345
assert canonical_hash(a) == canonical_hash(b)
def test_behavioural_change_flips_hash() -> None:
a = _sample()
b = copy.deepcopy(a)
b["deckies"][0]["services"] = ["ssh", "http"]
assert canonical_hash(a) != canonical_hash(b)
def test_input_is_not_mutated() -> None:
a = _sample()
snapshot = copy.deepcopy(a)
_ = canonical_hash(a)
assert a == snapshot

View File

@@ -0,0 +1,58 @@
"""Layout coordinate roundtrips for LAN and TopologyDecky."""
from __future__ import annotations
import pytest
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.persistence import hydrate, persist
from decnet.web.db.factory import get_repository
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="layout",
depth=1,
branching_factor=1,
deckies_per_lan_min=1,
deckies_per_lan_max=1,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=4,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "layout.db"))
await r.initialize()
return r
@pytest.mark.anyio
async def test_coords_roundtrip_when_set(repo):
plan = generate(_cfg())
plan.lans[0].x = 10.5
plan.lans[0].y = -3.25
plan.deckies[0].x = 42.0
plan.deckies[0].y = 7.5
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
lan = next(l for l in hydrated["lans"] if l["name"] == plan.lans[0].name)
assert lan["x"] == 10.5 and lan["y"] == -3.25
d = next(d for d in hydrated["deckies"] if d["name"] == plan.deckies[0].name)
assert d["x"] == 42.0 and d["y"] == 7.5
@pytest.mark.anyio
async def test_coords_default_to_none(repo):
plan = generate(_cfg())
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
for lan in hydrated["lans"]:
assert lan["x"] is None and lan["y"] is None
for d in hydrated["deckies"]:
assert d["x"] is None and d["y"] is None

View File

@@ -0,0 +1,452 @@
"""Step 7 — topology_mutations queue + mutator reconciler branch."""
from __future__ import annotations
import asyncio
import json
import pytest
from decnet.bus import topics as _topics
from decnet.bus.fake import FakeBus
from decnet.mutator import engine as _engine
from decnet.mutator.ops import (
MutationError,
apply_add_decky,
apply_add_lan,
apply_update_decky,
)
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.persistence import persist, transition_status
from decnet.topology.status import TopologyStatus, VersionConflict
from decnet.web.db.factory import get_repository
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="mut",
depth=1,
branching_factor=1,
deckies_per_lan_min=2,
deckies_per_lan_max=2,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=9,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "mut.db"))
await r.initialize()
return r
async def _make_active(repo) -> str:
plan = generate(_cfg())
tid = await persist(repo, plan)
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
await transition_status(repo, tid, TopologyStatus.ACTIVE)
return tid
# --------------------------------------------------------------------- queue
@pytest.mark.anyio
async def test_enqueue_bumps_topology_version(repo):
tid = await _make_active(repo)
before = (await repo.get_topology(tid))["version"]
mid = await repo.enqueue_topology_mutation(
tid, "add_lan", {"name": "LAN-X", "subnet": "172.20.77.0/24"},
expected_version=before,
)
topo = await repo.get_topology(tid)
assert topo["version"] == before + 1
rows = await repo.list_topology_mutations(tid)
assert rows[0]["id"] == mid
assert rows[0]["state"] == "pending"
@pytest.mark.anyio
async def test_enqueue_version_conflict(repo):
tid = await _make_active(repo)
await repo.enqueue_topology_mutation(
tid, "add_lan", {"name": "LAN-X", "subnet": "172.20.77.0/24"},
expected_version=1,
)
with pytest.raises(VersionConflict):
await repo.enqueue_topology_mutation(
tid, "add_lan", {"name": "LAN-Y", "subnet": "172.20.78.0/24"},
expected_version=1, # stale — version is now 2
)
@pytest.mark.anyio
async def test_claim_next_mutation_is_atomic_single_winner(repo):
"""Two simulated watch loops; only one claims the row."""
tid = await _make_active(repo)
await repo.enqueue_topology_mutation(
tid, "add_lan", {"name": "LAN-X"},
)
# Sequential simulated races: because the claim is a single SQL
# UPDATE with ``WHERE state='pending'``, the second call observes
# state='applying' and returns None rather than re-claiming.
first = await repo.claim_next_mutation(tid)
second = await repo.claim_next_mutation(tid)
assert first is not None
assert second is None
assert first["state"] == "applying"
@pytest.mark.anyio
async def test_claim_none_when_empty(repo):
tid = await _make_active(repo)
assert await repo.claim_next_mutation(tid) is None
@pytest.mark.anyio
async def test_mark_applied_and_failed(repo):
tid = await _make_active(repo)
mid1 = await repo.enqueue_topology_mutation(tid, "add_lan", {"name": "A"})
mid2 = await repo.enqueue_topology_mutation(tid, "add_lan", {"name": "B"})
await repo.claim_next_mutation(tid)
await repo.mark_mutation_applied(mid1)
await repo.claim_next_mutation(tid)
await repo.mark_mutation_failed(mid2, "boom")
by_id = {r["id"]: r for r in await repo.list_topology_mutations(tid)}
assert by_id[mid1]["state"] == "applied"
assert by_id[mid2]["state"] == "failed"
assert by_id[mid2]["reason"] == "boom"
# --------------------------------------------------------------- guard query
@pytest.mark.anyio
async def test_guard_false_without_pending_or_live(repo):
# No topologies at all.
assert await repo.has_pending_topology_mutation() is False
# Pending topology with a mutation (but not live) — guard stays False.
plan = generate(_cfg())
tid = await persist(repo, plan)
# enqueue_topology_mutation doesn't require status, but pending
# topologies don't trip the guard.
await repo.enqueue_topology_mutation(tid, "add_lan", {"name": "Z"})
assert await repo.has_pending_topology_mutation() is False
@pytest.mark.anyio
async def test_guard_true_with_live_pending(repo):
tid = await _make_active(repo)
await repo.enqueue_topology_mutation(tid, "add_lan", {"name": "Z"})
assert await repo.has_pending_topology_mutation() is True
# After claiming, the pending row becomes applying — guard drops.
await repo.claim_next_mutation(tid)
assert await repo.has_pending_topology_mutation() is False
# ---------------------------------------------------------------------- ops
@pytest.mark.anyio
async def test_apply_add_lan_persists(repo):
tid = await _make_active(repo)
await apply_add_lan(
repo, tid, {"name": "LAN-MUT", "subnet": "172.20.55.0/24"}
)
names = {l["name"] for l in await repo.list_lans_for_topology(tid)}
assert "LAN-MUT" in names
@pytest.mark.anyio
async def test_apply_add_decky_creates_and_attaches(repo):
"""add_decky creates a new decky row + home-LAN edge in one op."""
tid = await _make_active(repo)
lans = await repo.list_lans_for_topology(tid)
home_lan = lans[0]
await apply_add_decky(
repo, tid,
{
"name": "new-decky-mut",
"lan": home_lan["name"],
"services": ["ssh"],
"archetype": "deaddeck",
},
)
deckies = await repo.list_topology_deckies(tid)
new = next((d for d in deckies if d["decky_config"]["name"] == "new-decky-mut"), None)
assert new is not None
assert new["services"] == ["ssh"]
assert new["decky_config"]["archetype"] == "deaddeck"
assert home_lan["name"] in new["decky_config"]["ips_by_lan"]
edges = await repo.list_topology_edges(tid)
assert any(e["decky_uuid"] == new["uuid"] and e["lan_id"] == home_lan["id"] for e in edges)
@pytest.mark.anyio
async def test_apply_add_decky_rejects_duplicate_name(repo):
tid = await _make_active(repo)
lans = await repo.list_lans_for_topology(tid)
existing = (await repo.list_topology_deckies(tid))[0]
with pytest.raises(MutationError, match="already exists"):
await apply_add_decky(
repo, tid,
{"name": existing["decky_config"]["name"], "lan": lans[0]["name"]},
)
@pytest.mark.anyio
async def test_apply_add_decky_rejects_missing_lan(repo):
tid = await _make_active(repo)
with pytest.raises(MutationError, match="not found"):
await apply_add_decky(
repo, tid, {"name": "orphan-decky", "lan": "nonexistent-lan"},
)
@pytest.mark.anyio
async def test_apply_update_decky_replaces_services(repo):
"""Top-level ``services`` payload key replaces the decky's services list."""
tid = await _make_active(repo)
decky = (await repo.list_topology_deckies(tid))[0]
await apply_update_decky(
repo, tid,
{
"decky": decky["decky_config"]["name"],
"services": ["ssh", "http"],
},
)
updated = next(
d for d in await repo.list_topology_deckies(tid)
if d["uuid"] == decky["uuid"]
)
assert sorted(updated["services"]) == ["http", "ssh"]
@pytest.mark.anyio
async def test_apply_rejected_on_validator_error(repo):
"""Unknown service name must trip the post-apply validator."""
tid = await _make_active(repo)
decky = (await repo.list_topology_deckies(tid))[0]
with pytest.raises(MutationError):
await apply_update_decky(
repo, tid,
{
"decky": decky["decky_config"]["name"],
# service_config for an undeclared service trips
# SERVICE_CFG_UNDECLARED in the post-apply invariants.
"patch": {"service_config": {"telnet": {"banner": "x"}}},
},
)
# ----------------------------------------------------------- reconciler flow
@pytest.mark.anyio
async def test_reconcile_applies_pending_mutation(repo):
tid = await _make_active(repo)
await repo.enqueue_topology_mutation(
tid, "add_lan",
{"name": "LAN-RECON", "subnet": "172.20.44.0/24"},
)
drained = await _engine.reconcile_topologies(repo)
assert drained == 1
names = {l["name"] for l in await repo.list_lans_for_topology(tid)}
assert "LAN-RECON" in names
# Mutation row is now applied.
state = {r["state"] for r in await repo.list_topology_mutations(tid)}
assert state == {"applied"}
@pytest.mark.anyio
async def test_reconcile_failed_mutation_degrades_topology(repo):
tid = await _make_active(repo)
existing = (await repo.list_lans_for_topology(tid))[0]["name"]
# Validator will reject duplicate LAN name → failure path.
await repo.enqueue_topology_mutation(
tid, "add_lan", {"name": existing, "subnet": "172.20.88.0/24"},
)
drained = await _engine.reconcile_topologies(repo)
assert drained == 0
mut = (await repo.list_topology_mutations(tid))[0]
assert mut["state"] == "failed"
topo = await repo.get_topology(tid)
assert topo["status"] == TopologyStatus.DEGRADED
# ----------------------------------------------------- watch-loop guard isolation
@pytest.mark.anyio
async def test_watch_loop_guard_skips_reconciler_when_idle(
repo, monkeypatch
):
"""Tick with no live topology + no pending mutations ⇒ reconciler not called.
Also asserts flat-fleet ``mutate_all`` runs every tick, unchanged.
"""
calls = {"mutate_all": 0, "reconcile": 0}
async def _fake_mutate_all(force=False, repo=None):
calls["mutate_all"] += 1
async def _fake_reconcile(r):
calls["reconcile"] += 1
return 0
monkeypatch.setattr(_engine, "mutate_all", _fake_mutate_all)
monkeypatch.setattr(_engine, "reconcile_topologies", _fake_reconcile)
# Manually drive one iteration of the loop body.
await _engine.mutate_all(force=False, repo=repo)
if await repo.has_pending_topology_mutation():
await _engine.reconcile_topologies(repo)
assert calls["mutate_all"] == 1
assert calls["reconcile"] == 0
@pytest.mark.anyio
async def test_watch_loop_guard_fires_reconciler_when_work_exists(
repo, monkeypatch
):
tid = await _make_active(repo)
await repo.enqueue_topology_mutation(tid, "add_lan", {"name": "X"})
calls = {"reconcile": 0}
async def _fake_reconcile(r):
calls["reconcile"] += 1
return 0
monkeypatch.setattr(_engine, "reconcile_topologies", _fake_reconcile)
if await repo.has_pending_topology_mutation():
await _engine.reconcile_topologies(repo)
assert calls["reconcile"] == 1
def test_ops_payload_shape_docstring_present():
"""Smoke: DISPATCH covers every op name referenced in the plan."""
from decnet.mutator.ops import DISPATCH
assert set(DISPATCH) == {
"add_lan", "remove_lan",
"add_decky", "attach_decky", "detach_decky", "remove_decky",
"update_decky", "update_lan",
}
def _payload_json(d: dict) -> str:
return json.dumps(d)
# ---------------------------------------------------- bus publishing (DEBT-030)
async def _drain(sub, expected: int, timeout: float = 2.0) -> list:
"""Collect up to *expected* events from *sub* with a hard timeout.
Used to assert bus publishes without racing against the in-process
FakeBus queue — drains are short by construction (the reconciler
produces a bounded number of events per claim).
"""
events: list = []
sub_iter = sub.__aiter__()
for _ in range(expected):
events.append(await asyncio.wait_for(sub_iter.__anext__(), timeout=timeout))
return events
@pytest.mark.anyio
async def test_reconcile_publishes_applying_and_applied(repo):
tid = await _make_active(repo)
await repo.enqueue_topology_mutation(
tid, "add_lan",
{"name": "LAN-PUB", "subnet": "172.20.45.0/24"},
)
bus = FakeBus()
await bus.connect()
sub = bus.subscribe(f"{_topics.TOPOLOGY}.{tid}.>")
try:
async with sub:
drained = await _engine.reconcile_topologies(repo, bus=bus)
assert drained == 1
events = await _drain(sub, expected=2)
finally:
await bus.close()
types = [e.type for e in events]
assert types == [_topics.MUTATION_APPLYING, _topics.MUTATION_APPLIED]
@pytest.mark.anyio
async def test_reconcile_publishes_failed_and_status(repo):
tid = await _make_active(repo)
existing = (await repo.list_lans_for_topology(tid))[0]["name"]
await repo.enqueue_topology_mutation(
tid, "add_lan", {"name": existing, "subnet": "172.20.89.0/24"},
)
bus = FakeBus()
await bus.connect()
sub = bus.subscribe(f"{_topics.TOPOLOGY}.{tid}.>")
try:
async with sub:
await _engine.reconcile_topologies(repo, bus=bus)
# applying + failed + status(degraded)
events = await _drain(sub, expected=3)
finally:
await bus.close()
types = [e.type for e in events]
assert types == [
_topics.MUTATION_APPLYING, _topics.MUTATION_FAILED, _topics.TOPOLOGY_STATUS,
]
assert events[-1].payload["state"] == TopologyStatus.DEGRADED
@pytest.mark.anyio
async def test_reconcile_with_null_bus_is_safe(repo):
"""Passing ``bus=None`` must not break the reconciler — publish is
a fire-and-forget nicety, the DB is the source of truth."""
tid = await _make_active(repo)
await repo.enqueue_topology_mutation(
tid, "add_lan",
{"name": "LAN-NULL", "subnet": "172.20.46.0/24"},
)
drained = await _engine.reconcile_topologies(repo, bus=None)
assert drained == 1
@pytest.mark.anyio
async def test_wake_on_enqueue_sets_event(repo):
"""``_wake_on_enqueue`` flips the asyncio.Event on every matching event."""
bus = FakeBus()
await bus.connect()
wake = asyncio.Event()
task = asyncio.create_task(_engine._wake_on_enqueue(bus, wake))
try:
# Give the subscription a tick to register.
await asyncio.sleep(0)
await bus.publish(
_topics.topology_mutation("abc", _topics.MUTATION_ENQUEUED),
{"mutation_id": "m1", "op": "add_lan"},
event_type=_topics.MUTATION_ENQUEUED,
)
await asyncio.wait_for(wake.wait(), timeout=1.0)
assert wake.is_set()
finally:
task.cancel()
try:
await task
except (asyncio.CancelledError, Exception):
pass
await bus.close()

View File

@@ -0,0 +1,91 @@
"""MazeNET persistence-layer tests: generator → repo → hydrate roundtrip."""
import pytest
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.persistence import (
hydrate,
persist,
transition_status,
)
from decnet.topology.status import TopologyStatus, TopologyStatusError
from decnet.web.db.factory import get_repository
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "persist.db"))
await r.initialize()
return r
def _config(**kw) -> TopologyConfig:
base = dict(
name="roundtrip",
depth=2,
branching_factor=2,
deckies_per_lan_min=1,
deckies_per_lan_max=2,
cross_edge_probability=0.0,
randomize_services=True,
seed=7,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.mark.anyio
async def test_persist_then_hydrate(repo):
plan = generate(_config())
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
assert hydrated is not None
assert hydrated["topology"]["name"] == "roundtrip"
assert hydrated["topology"]["status"] == TopologyStatus.PENDING
assert len(hydrated["lans"]) == len(plan.lans)
assert len(hydrated["deckies"]) == len(plan.deckies)
assert len(hydrated["edges"]) == len(plan.edges)
# LANs round-trip with their DMZ flag and subnet.
by_name = {lan["name"]: lan for lan in hydrated["lans"]}
for planned in plan.lans:
assert by_name[planned.name]["subnet"] == planned.subnet
assert by_name[planned.name]["is_dmz"] == planned.is_dmz
# Deckies round-trip their services as a list, not a string.
for d in hydrated["deckies"]:
assert isinstance(d["services"], list)
@pytest.mark.anyio
async def test_transition_status_enforces_legality(repo):
plan = generate(_config())
tid = await persist(repo, plan)
await transition_status(repo, tid, TopologyStatus.DEPLOYING, reason="go")
await transition_status(repo, tid, TopologyStatus.ACTIVE)
topo = await repo.get_topology(tid)
assert topo["status"] == TopologyStatus.ACTIVE
# Can't go from active directly back to pending.
with pytest.raises(TopologyStatusError):
await transition_status(repo, tid, TopologyStatus.PENDING)
# Unknown topology raises ValueError, not silent no-op.
with pytest.raises(ValueError):
await transition_status(repo, "does-not-exist", TopologyStatus.ACTIVE)
@pytest.mark.anyio
async def test_hydrate_missing_topology(repo):
assert await hydrate(repo, "no-such-id") is None
@pytest.mark.anyio
async def test_config_snapshot_preserves_seed(repo):
plan = generate(_config(seed=12345))
tid = await persist(repo, plan)
topo = await repo.get_topology(tid)
assert topo["config_snapshot"]["seed"] == 12345
assert topo["config_snapshot"]["depth"] == 2

View File

@@ -0,0 +1,228 @@
"""Tests for the orphan topology-resource reaper."""
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
from decnet.engine.reaper import (
ReapReport,
_orphan_prefixes,
_prefix_of,
reap_orphan_topology_resources,
)
# ---------------------------------------------------------------- pure helpers
def test_prefix_of_matches_decnet_convention():
assert _prefix_of("decnet_t_abcd1234_dmz") == "abcd1234"
assert _prefix_of("decnet_t_abcd1234_subnet-01") == "abcd1234"
assert _prefix_of("decnet_t_abcd1234_decky-631b") == "abcd1234"
def test_prefix_of_rejects_non_decnet_names():
assert _prefix_of("bridge") is None
assert _prefix_of("host") is None
assert _prefix_of("development_default") is None
# Prefix must be 8 hex chars exactly.
assert _prefix_of("decnet_t_abcd_dmz") is None
assert _prefix_of("decnet_t_abcd1234_") == "abcd1234" # trailing edge
def test_orphan_prefixes_flags_only_unknowns():
live = {"aaaa1111", "bbbb2222"}
containers = [
"decnet_t_aaaa1111_decky-01", # live
"decnet_t_cccc3333_dmz-gateway", # orphan
"bridge", # not DECNET
]
networks = [
"decnet_t_bbbb2222_subnet-01", # live
"decnet_t_cccc3333_dmz", # orphan
"decnet_t_dddd4444_subnet-01", # orphan
]
orphans, decnet_cs, decnet_ns = _orphan_prefixes(containers, networks, live)
assert orphans == {"cccc3333", "dddd4444"}
assert "bridge" not in decnet_cs
assert len(decnet_ns) == 3
def test_orphan_prefixes_empty_when_all_live():
live = {"aaaa1111"}
containers = ["decnet_t_aaaa1111_decky"]
networks = ["decnet_t_aaaa1111_dmz"]
orphans, *_ = _orphan_prefixes(containers, networks, live)
assert orphans == set()
# ---------------------------------------------------------------- integration
class _FakeContainer:
def __init__(self, name, remove_raises=None):
self.name = name
self._raises = remove_raises
self.removed = False
def remove(self, force=False): # noqa: ARG002
if self._raises:
raise self._raises
self.removed = True
class _FakeNetwork:
def __init__(self, name):
self.name = name
self.id = f"id-{name}"
self.attrs = {"Containers": {}}
self.removed = False
def remove(self):
self.removed = True
def disconnect(self, cid, force=False): # pragma: no cover
pass
class _FakeClient:
def __init__(self, containers, networks):
self._cs = containers
self._ns = networks
self.containers = SimpleNamespace(list=lambda all=False: list(self._cs))
self.networks = self
def list(self, names=None, filters=None): # noqa: ARG002
if names is None:
return list(self._ns)
return [n for n in self._ns if n.name in set(names)]
class _StubRepo:
def __init__(self, topology_ids):
self._ids = topology_ids
async def list_topologies(self):
return [{"id": tid} for tid in self._ids]
@pytest.mark.anyio
async def test_reap_removes_only_orphans():
live_tid = "aaaa1111-1111-1111-1111-111111111111"
repo = _StubRepo([live_tid])
containers = [
_FakeContainer("decnet_t_aaaa1111_decky"), # live — keep
_FakeContainer("decnet_t_dead0000_dmz-gateway"), # orphan
_FakeContainer("decnet_t_dead0000_decky-1"), # orphan
_FakeContainer("bridge"), # non-DECNET
]
networks = [
_FakeNetwork("decnet_t_aaaa1111_dmz"), # live — keep
_FakeNetwork("decnet_t_dead0000_dmz"), # orphan
_FakeNetwork("decnet_t_dead0000_subnet-01"), # orphan
_FakeNetwork("host"), # non-DECNET
]
client = _FakeClient(containers, networks)
report = await reap_orphan_topology_resources(repo, client=client)
assert report.live_prefixes == ["aaaa1111"]
assert report.orphan_prefixes == ["dead0000"]
assert set(report.containers_removed) == {
"decnet_t_dead0000_dmz-gateway",
"decnet_t_dead0000_decky-1",
}
assert set(report.networks_removed) == {
"decnet_t_dead0000_dmz",
"decnet_t_dead0000_subnet-01",
}
assert report.errors == []
# Live resources must survive.
assert all(c.removed is False for c in containers if "aaaa1111" in c.name)
assert all(n.removed is False for n in networks if "aaaa1111" in n.name)
@pytest.mark.anyio
async def test_reap_is_noop_when_no_orphans():
repo = _StubRepo(["aaaa1111-xxx"])
containers = [_FakeContainer("decnet_t_aaaa1111_d")]
networks = [_FakeNetwork("decnet_t_aaaa1111_net")]
client = _FakeClient(containers, networks)
report = await reap_orphan_topology_resources(repo, client=client)
assert report.orphan_prefixes == []
assert report.containers_removed == []
assert report.networks_removed == []
@pytest.mark.anyio
async def test_reap_captures_per_resource_errors_without_aborting():
repo = _StubRepo([])
containers = [
_FakeContainer("decnet_t_dead0000_c1", remove_raises=RuntimeError("stuck")),
_FakeContainer("decnet_t_dead0000_c2"),
]
networks = [_FakeNetwork("decnet_t_dead0000_net")]
client = _FakeClient(containers, networks)
report = await reap_orphan_topology_resources(repo, client=client)
# The failing container is reported; the next one still gets removed.
assert any("c1" in e for e in report.errors)
assert "decnet_t_dead0000_c2" in report.containers_removed
assert "decnet_t_dead0000_net" in report.networks_removed
@pytest.mark.anyio
async def test_reap_handles_docker_list_failure():
repo = _StubRepo(["aaaa1111"])
client = MagicMock()
client.containers.list.side_effect = RuntimeError("docker down")
client.networks.list.return_value = []
report = await reap_orphan_topology_resources(repo, client=client)
assert any("docker list failed" in e for e in report.errors)
assert report.containers_removed == []
assert report.networks_removed == []
# ---------------------------------------------------------------------- report
def test_reap_report_to_dict_is_serialisable():
r = ReapReport(
live_prefixes=["aa"], orphan_prefixes=["bb"],
containers_removed=["c"], networks_removed=["n"], errors=[],
)
d = r.to_dict()
assert d == {
"live_prefixes": ["aa"],
"orphan_prefixes": ["bb"],
"containers_removed": ["c"],
"networks_removed": ["n"],
"errors": [],
}
# ---------------------------------------------------------------------- API
@pytest.mark.anyio
async def test_api_reap_orphans_requires_admin(monkeypatch):
"""POST /topologies/reap-orphans returns the report dict."""
from decnet.web.router.topology.api_reap_orphans import api_reap_orphans
with patch(
"decnet.web.router.topology.api_reap_orphans.reap_orphan_topology_resources"
) as mock_reap:
mock_reap.return_value = ReapReport(
live_prefixes=["aaaa1111"],
orphan_prefixes=["dead0000"],
containers_removed=["decnet_t_dead0000_c"],
networks_removed=["decnet_t_dead0000_n"],
)
result = await api_reap_orphans(_admin={"role": "admin"})
assert result["orphan_prefixes"] == ["dead0000"]
assert result["containers_removed"] == ["decnet_t_dead0000_c"]
assert result["networks_removed"] == ["decnet_t_dead0000_n"]
assert result["errors"] == []

167
tests/topology/test_repo.py Normal file
View File

@@ -0,0 +1,167 @@
"""Direct async tests for MazeNET topology persistence.
Exercises the repository layer without going through the HTTP stack or
the in-memory generator. The synthetic topology here is hand-built so
the test remains meaningful even if generator.py regresses.
"""
import pytest
from decnet.web.db.factory import get_repository
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "mazenet.db"))
await r.initialize()
return r
@pytest.mark.anyio
async def test_topology_roundtrip(repo):
t_id = await repo.create_topology(
{
"name": "alpha",
"mode": "unihost",
"config_snapshot": {"depth": 3, "seed": 42},
}
)
assert t_id
t = await repo.get_topology(t_id)
assert t is not None
assert t["name"] == "alpha"
assert t["status"] == "pending"
# JSON field round-trips as a dict, not a string
assert t["config_snapshot"] == {"depth": 3, "seed": 42}
@pytest.mark.anyio
async def test_lan_add_update_list(repo):
t_id = await repo.create_topology(
{"name": "beta", "mode": "unihost", "config_snapshot": {}}
)
lan_id = await repo.add_lan(
{"topology_id": t_id, "name": "DMZ", "subnet": "172.20.0.0/24", "is_dmz": True}
)
await repo.add_lan(
{"topology_id": t_id, "name": "LAN-A", "subnet": "172.20.1.0/24"}
)
await repo.update_lan(lan_id, {"docker_network_id": "abc123"})
lans = await repo.list_lans_for_topology(t_id)
assert len(lans) == 2
by_name = {lan["name"]: lan for lan in lans}
assert by_name["DMZ"]["docker_network_id"] == "abc123"
assert by_name["DMZ"]["is_dmz"] is True
assert by_name["LAN-A"]["is_dmz"] is False
@pytest.mark.anyio
async def test_topology_decky_json_roundtrip(repo):
t_id = await repo.create_topology(
{"name": "gamma", "mode": "unihost", "config_snapshot": {}}
)
d_uuid = await repo.add_topology_decky(
{
"topology_id": t_id,
"name": "decky-01",
"services": ["ssh", "http"],
"decky_config": {"hostname": "bastion"},
"ip": "172.20.0.10",
}
)
assert d_uuid
deckies = await repo.list_topology_deckies(t_id)
assert len(deckies) == 1
assert deckies[0]["services"] == ["ssh", "http"]
assert deckies[0]["decky_config"] == {"hostname": "bastion"}
assert deckies[0]["state"] == "pending"
await repo.update_topology_decky(d_uuid, {"state": "running", "ip": "172.20.0.11"})
deckies = await repo.list_topology_deckies(t_id)
assert deckies[0]["state"] == "running"
assert deckies[0]["ip"] == "172.20.0.11"
@pytest.mark.anyio
async def test_topology_decky_name_unique_within_topology(repo):
"""Same decky name is legal across topologies, forbidden within one."""
t1 = await repo.create_topology(
{"name": "one", "mode": "unihost", "config_snapshot": {}}
)
t2 = await repo.create_topology(
{"name": "two", "mode": "unihost", "config_snapshot": {}}
)
await repo.add_topology_decky(
{"topology_id": t1, "name": "decky-01", "services": []}
)
# Same name, different topology — must succeed.
await repo.add_topology_decky(
{"topology_id": t2, "name": "decky-01", "services": []}
)
# Same name, same topology — must fail at the DB level.
with pytest.raises(Exception):
await repo.add_topology_decky(
{"topology_id": t1, "name": "decky-01", "services": []}
)
@pytest.mark.anyio
async def test_status_transition_writes_event(repo):
t_id = await repo.create_topology(
{"name": "delta", "mode": "unihost", "config_snapshot": {}}
)
await repo.update_topology_status(t_id, "deploying", reason="kickoff")
await repo.update_topology_status(t_id, "active")
topo = await repo.get_topology(t_id)
assert topo["status"] == "active"
events = await repo.list_topology_status_events(t_id)
assert len(events) == 2
# Ordered desc by at — latest first
assert events[0]["to_status"] == "active"
assert events[0]["from_status"] == "deploying"
assert events[1]["to_status"] == "deploying"
assert events[1]["from_status"] == "pending"
assert events[1]["reason"] == "kickoff"
@pytest.mark.anyio
async def test_cascade_delete_clears_all_children(repo):
t_id = await repo.create_topology(
{"name": "eps", "mode": "unihost", "config_snapshot": {}}
)
lan_id = await repo.add_lan(
{"topology_id": t_id, "name": "L", "subnet": "10.0.0.0/24"}
)
d_uuid = await repo.add_topology_decky(
{"topology_id": t_id, "name": "d", "services": []}
)
await repo.add_topology_edge(
{"topology_id": t_id, "decky_uuid": d_uuid, "lan_id": lan_id}
)
await repo.update_topology_status(t_id, "deploying")
await repo.enqueue_topology_mutation(t_id, "noop", {"x": 1})
assert await repo.delete_topology_cascade(t_id) is True
assert await repo.get_topology(t_id) is None
assert await repo.list_lans_for_topology(t_id) == []
assert await repo.list_topology_deckies(t_id) == []
assert await repo.list_topology_edges(t_id) == []
assert await repo.list_topology_status_events(t_id) == []
# Second delete on a missing row returns False, no raise
assert await repo.delete_topology_cascade(t_id) is False
@pytest.mark.anyio
async def test_list_topologies_filters_by_status(repo):
a = await repo.create_topology(
{"name": "a", "mode": "unihost", "config_snapshot": {}}
)
b = await repo.create_topology(
{"name": "b", "mode": "unihost", "config_snapshot": {}}
)
await repo.update_topology_status(b, "deploying")
pend = await repo.list_topologies(status="pending")
assert {t["id"] for t in pend} == {a}
dep = await repo.list_topologies(status="deploying")
assert {t["id"] for t in dep} == {b}
both = await repo.list_topologies()
assert {t["id"] for t in both} == {a, b}

View File

@@ -0,0 +1,168 @@
"""Mutator reconcile loop + deployer.resync_agent_topology.
Covers the last mile of Step 7: once the heartbeat handler flags a
topology as ``needs_resync``, the mutator's ``reconcile_agent_resyncs``
pass must pick it up, re-push via AgentClient, and clear the flag.
Failures must leave the flag set so the next tick retries.
"""
from __future__ import annotations
from typing import Any
import pytest
from decnet.engine import deployer as _deployer
from decnet.mutator import engine as _mut_engine
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.hashing import canonical_hash
from decnet.topology.persistence import hydrate, persist, transition_status
from decnet.topology.status import TopologyStatus
from decnet.web.db.factory import get_repository
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="resync",
mode="agent",
depth=1,
branching_factor=1,
deckies_per_lan_min=1,
deckies_per_lan_max=1,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=9,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "resync.db"))
await r.initialize()
return r
async def _seed_host(repo, uuid_: str) -> None:
await repo.add_swarm_host(
{
"uuid": uuid_,
"name": f"host-{uuid_}",
"address": "10.9.9.9",
"agent_port": 8765,
"status": "active",
"client_cert_fingerprint": "a" * 64,
"cert_bundle_path": "/tmp/ignored",
}
)
class _FakeAgentClient:
instances: list["_FakeAgentClient"] = []
def __init__(self, *, host: dict[str, Any]) -> None:
self.host = host
self.calls: list[tuple[str, tuple]] = []
_FakeAgentClient.instances.append(self)
async def __aenter__(self) -> "_FakeAgentClient":
return self
async def __aexit__(self, *_exc) -> None:
return None
async def apply_topology(self, hydrated, version_hash):
self.calls.append(("apply", (hydrated, version_hash)))
return {"status": "applied", "version_hash": version_hash}
@pytest.fixture
def fake_agent(monkeypatch: pytest.MonkeyPatch):
_FakeAgentClient.instances.clear()
import decnet.swarm.client as _swarm_client
monkeypatch.setattr(_swarm_client, "AgentClient", _FakeAgentClient)
return _FakeAgentClient
async def _active_topology(repo, host_uuid: str) -> tuple[str, str]:
plan = generate(_cfg())
tid = await persist(repo, plan, target_host_uuid=host_uuid)
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
await transition_status(repo, tid, TopologyStatus.ACTIVE)
hydrated = await hydrate(repo, tid)
return tid, canonical_hash(hydrated)
@pytest.mark.anyio
async def test_resync_agent_topology_pushes_current_hash(repo, fake_agent) -> None:
await _seed_host(repo, "h-sync")
tid, expected = await _active_topology(repo, "h-sync")
await _deployer.resync_agent_topology(repo, tid)
assert len(fake_agent.instances) == 1
inst = fake_agent.instances[0]
assert inst.calls[0][0] == "apply"
_, (hydrated, version_hash) = inst.calls[0]
assert version_hash == expected
assert hydrated["topology"]["id"] == tid
row = await repo.get_topology(tid)
assert row["status"] == TopologyStatus.ACTIVE # unchanged
@pytest.mark.anyio
async def test_resync_rejects_master_local_topology(repo) -> None:
plan = generate(_cfg(mode="unihost"))
tid = await persist(repo, plan, target_host_uuid=None)
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
await transition_status(repo, tid, TopologyStatus.ACTIVE)
with pytest.raises(ValueError, match="no target_host_uuid"):
await _deployer.resync_agent_topology(repo, tid)
@pytest.mark.anyio
async def test_reconcile_agent_resyncs_drains_flag(repo, fake_agent) -> None:
await _seed_host(repo, "h-drain")
tid, _ = await _active_topology(repo, "h-drain")
await repo.set_topology_resync(tid, True)
drained = await _mut_engine.reconcile_agent_resyncs(repo)
assert drained == 1
row = await repo.get_topology(tid)
assert row["needs_resync"] is False
assert len(fake_agent.instances) == 1
@pytest.mark.anyio
async def test_reconcile_retains_flag_on_push_failure(repo, monkeypatch) -> None:
await _seed_host(repo, "h-boom")
tid, _ = await _active_topology(repo, "h-boom")
await repo.set_topology_resync(tid, True)
class _Boom:
def __init__(self, *, host): ...
async def __aenter__(self): return self
async def __aexit__(self, *_): return None
async def apply_topology(self, *_a, **_k):
raise RuntimeError("agent unreachable")
import decnet.swarm.client as _swarm_client
monkeypatch.setattr(_swarm_client, "AgentClient", _Boom)
drained = await _mut_engine.reconcile_agent_resyncs(repo)
assert drained == 0
row = await repo.get_topology(tid)
assert row["needs_resync"] is True # still flagged — next tick retries
@pytest.mark.anyio
async def test_reconcile_noop_when_nothing_flagged(repo, fake_agent) -> None:
await _seed_host(repo, "h-idle")
await _active_topology(repo, "h-idle")
drained = await _mut_engine.reconcile_agent_resyncs(repo)
assert drained == 0
assert fake_agent.instances == []

View File

@@ -0,0 +1,112 @@
"""Per-decky, per-service config roundtrips through persist + compose."""
from __future__ import annotations
import pytest
import yaml
from decnet.topology.compose import generate_topology_compose
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.persistence import hydrate, persist
from decnet.web.db.factory import get_repository
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="svc",
depth=1,
branching_factor=1,
deckies_per_lan_min=1,
deckies_per_lan_max=1,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=5,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "svc.db"))
await r.initialize()
return r
@pytest.mark.anyio
async def test_service_config_roundtrips(repo):
plan = generate(_cfg())
# Operator-style override, as the web editor would write it.
plan.deckies[0].service_config = {"ssh": {"password": "megapassword"}}
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
decky = next(
d for d in hydrated["deckies"] if d["name"] == plan.deckies[0].name
)
assert decky["decky_config"]["service_config"] == {
"ssh": {"password": "megapassword"}
}
@pytest.mark.anyio
async def test_service_config_reaches_compose_fragment(repo):
plan = generate(_cfg())
plan.deckies[0].service_config = {"ssh": {"password": "megapassword"}}
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
compose = generate_topology_compose(hydrated)
# The ssh fragment keys are "<decky>-ssh" (see compose.py:107).
ssh_key = f"{plan.deckies[0].name}-ssh"
frag = compose["services"][ssh_key]
env = frag.get("environment", {})
assert env.get("SSH_ROOT_PASSWORD") == "megapassword"
@pytest.mark.anyio
async def test_missing_service_config_defaults_work(repo):
"""No service_config override → service falls back to its default."""
plan = generate(_cfg())
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
compose = generate_topology_compose(hydrated)
ssh_key = f"{plan.deckies[0].name}-ssh"
frag = compose["services"][ssh_key]
assert frag["environment"]["SSH_ROOT_PASSWORD"] == "admin"
@pytest.mark.anyio
async def test_unknown_nested_key_passes_through(repo):
"""Forward-compat: unknown keys under a service reach the fragment
untouched (current services ignore them; future services may read)."""
plan = generate(_cfg())
plan.deckies[0].service_config = {
"ssh": {"password": "x", "future_flag": "hi"}
}
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
decky = next(
d for d in hydrated["deckies"] if d["name"] == plan.deckies[0].name
)
assert (
decky["decky_config"]["service_config"]["ssh"]["future_flag"] == "hi"
)
@pytest.mark.anyio
async def test_compose_file_yaml_is_loadable(repo):
"""Regression: the compose dict roundtrips through yaml cleanly."""
plan = generate(_cfg())
plan.deckies[0].service_config = {"ssh": {"password": "roundtrip"}}
tid = await persist(repo, plan)
hydrated = await hydrate(repo, tid)
compose = generate_topology_compose(hydrated)
dumped = yaml.dump(compose, sort_keys=False)
reloaded = yaml.safe_load(dumped)
ssh_key = f"{plan.deckies[0].name}-ssh"
assert (
reloaded["services"][ssh_key]["environment"]["SSH_ROOT_PASSWORD"]
== "roundtrip"
)

View File

@@ -0,0 +1,55 @@
"""MazeNET status state-machine tests.
Every legal transition declared in the plan is permitted; every other
pair (including self-loops and unknowns) must raise.
"""
import pytest
from decnet.topology.status import (
TopologyStatus,
TopologyStatusError,
assert_transition,
legal_next,
)
LEGAL = {
(TopologyStatus.PENDING, TopologyStatus.DEPLOYING),
(TopologyStatus.PENDING, TopologyStatus.TORN_DOWN),
(TopologyStatus.DEPLOYING, TopologyStatus.ACTIVE),
(TopologyStatus.DEPLOYING, TopologyStatus.FAILED),
(TopologyStatus.DEPLOYING, TopologyStatus.DEGRADED),
(TopologyStatus.DEPLOYING, TopologyStatus.TEARING_DOWN),
(TopologyStatus.ACTIVE, TopologyStatus.DEGRADED),
(TopologyStatus.ACTIVE, TopologyStatus.TEARING_DOWN),
(TopologyStatus.DEGRADED, TopologyStatus.ACTIVE),
(TopologyStatus.DEGRADED, TopologyStatus.TEARING_DOWN),
(TopologyStatus.FAILED, TopologyStatus.TEARING_DOWN),
(TopologyStatus.TEARING_DOWN, TopologyStatus.TORN_DOWN),
(TopologyStatus.TEARING_DOWN, TopologyStatus.DEGRADED),
}
def test_every_legal_transition_permitted():
for cur, nxt in LEGAL:
assert_transition(cur, nxt) # no raise
def test_every_illegal_transition_raises():
for cur in TopologyStatus.ALL:
for nxt in TopologyStatus.ALL:
if (cur, nxt) in LEGAL:
continue
with pytest.raises(TopologyStatusError):
assert_transition(cur, nxt)
def test_torn_down_is_terminal():
assert legal_next(TopologyStatus.TORN_DOWN) == frozenset()
def test_unknown_status_raises():
with pytest.raises(TopologyStatusError):
assert_transition("pending", "bogus")
with pytest.raises(TopologyStatusError):
assert_transition("bogus", "active")
with pytest.raises(TopologyStatusError):
legal_next("bogus")

View File

@@ -0,0 +1,178 @@
"""Validator-rule unit tests + deployer precondition integration."""
from __future__ import annotations
from unittest.mock import patch
import pytest
from decnet.engine.deployer import deploy_topology
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.persistence import hydrate, persist
from decnet.topology.status import TopologyStatus
from decnet.topology.validate import (
ValidationError,
errors,
validate,
)
from decnet.web.db.factory import get_repository
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="val",
depth=1,
branching_factor=1,
deckies_per_lan_min=1,
deckies_per_lan_max=1,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=9,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "val.db"))
await r.initialize()
return r
async def _hydrate_plan(repo, plan) -> dict:
tid = await persist(repo, plan)
return await hydrate(repo, tid), tid
# --------------------------------------------------------------------- rules
@pytest.mark.anyio
async def test_valid_topology_has_no_errors(repo):
plan = generate(_cfg())
h, _ = await _hydrate_plan(repo, plan)
assert errors(validate(h)) == []
@pytest.mark.anyio
async def test_dmz_missing(repo):
plan = generate(_cfg())
h, _ = await _hydrate_plan(repo, plan)
for lan in h["lans"]:
lan["is_dmz"] = False
codes = [i.code for i in validate(h) if i.severity == "error"]
# DMZ_MISSING plus cascaded DMZ_ORPHAN checks are both acceptable;
# the specific rule must fire at minimum.
assert "DMZ_MISSING" in codes
@pytest.mark.anyio
async def test_dmz_multiple(repo):
plan = generate(_cfg())
h, _ = await _hydrate_plan(repo, plan)
for lan in h["lans"]:
lan["is_dmz"] = True
assert "DMZ_MULTIPLE" in [i.code for i in validate(h)]
@pytest.mark.anyio
async def test_orphan_decky(repo):
plan = generate(_cfg())
h, _ = await _hydrate_plan(repo, plan)
h["edges"] = [e for e in h["edges"] if e["decky_uuid"] != h["deckies"][0]["uuid"]]
assert "DECKY_ORPHAN" in [i.code for i in validate(h)]
@pytest.mark.anyio
async def test_ip_collision(repo):
plan = generate(_cfg(deckies_per_lan_max=2, deckies_per_lan_min=2))
h, _ = await _hydrate_plan(repo, plan)
# Force two deckies in the same LAN to claim the same IP.
deckies = [
d for d in h["deckies"]
if any(
e["decky_uuid"] == d["uuid"]
for e in h["edges"]
if e["lan_id"] == h["lans"][0]["id"]
)
]
assert len(deckies) >= 2
shared_ip = next(iter(deckies[0]["decky_config"]["ips_by_lan"].values()))
deckies[1]["decky_config"]["ips_by_lan"][h["lans"][0]["name"]] = shared_ip
assert "IP_COLLISION" in [i.code for i in validate(h)]
@pytest.mark.anyio
async def test_ip_out_of_subnet(repo):
plan = generate(_cfg())
h, _ = await _hydrate_plan(repo, plan)
d = h["deckies"][0]
lan_name = next(iter(d["decky_config"]["ips_by_lan"]))
d["decky_config"]["ips_by_lan"][lan_name] = "10.99.99.99"
assert "IP_OUT_OF_SUBNET" in [i.code for i in validate(h)]
@pytest.mark.anyio
async def test_subnet_overlap(repo):
plan = generate(_cfg())
h, _ = await _hydrate_plan(repo, plan)
# Shrink two LANs onto overlapping /16s.
h["lans"][0]["subnet"] = "10.0.0.0/16"
if len(h["lans"]) > 1:
h["lans"][1]["subnet"] = "10.0.5.0/24"
codes = [i.code for i in validate(h)]
assert "SUBNET_OVERLAP" in codes
@pytest.mark.anyio
async def test_unknown_service(repo):
plan = generate(_cfg())
h, _ = await _hydrate_plan(repo, plan)
h["deckies"][0]["services"].append("teleporter-xyz")
assert "UNKNOWN_SERVICE" in [i.code for i in validate(h)]
@pytest.mark.anyio
async def test_service_config_undeclared(repo):
plan = generate(_cfg())
h, _ = await _hydrate_plan(repo, plan)
h["deckies"][0]["decky_config"]["service_config"] = {
"rdp": {"password": "no"}
}
# "rdp" is not in the decky's services list (which is ["ssh"]).
assert "SERVICE_CFG_UNDECLARED" in [i.code for i in validate(h)]
# --------------------------------------------------------------------- deployer hook
@pytest.mark.anyio
async def test_deploy_aborts_on_validation_error(repo, tmp_path, monkeypatch):
"""Broken topology must be rejected before any Docker call."""
monkeypatch.chdir(tmp_path)
plan = generate(_cfg())
tid = await persist(repo, plan)
# Corrupt the persisted state: strip the DMZ flag.
lan = (await repo.list_lans_for_topology(tid))[0]
# Use raw repo path — SQLModel UPDATE via get + setattr.
from sqlmodel import select
from decnet.web.db.models import LAN
async with repo._session() as s:
row = (await s.execute(select(LAN).where(LAN.id == lan["id"]))).scalar_one()
row.is_dmz = False
s.add(row)
await s.commit()
class _ShouldNotCall:
def from_env(self): # noqa: D401
raise AssertionError("docker must not be called on a rejected topology")
with patch("decnet.engine.deployer.docker", _ShouldNotCall()):
with pytest.raises(ValidationError):
await deploy_topology(repo, tid)
topo = await repo.get_topology(tid)
assert topo["status"] == TopologyStatus.PENDING