merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
0
tests/topology/__init__.py
Normal file
0
tests/topology/__init__.py
Normal file
198
tests/topology/test_allocator.py
Normal file
198
tests/topology/test_allocator.py
Normal file
@@ -0,0 +1,198 @@
|
||||
"""Allocator unit + integration tests."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.topology.allocator import (
|
||||
AllocatorExhausted,
|
||||
IPAllocator,
|
||||
SubnetAllocator,
|
||||
reserved_subnets,
|
||||
)
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import persist, transition_status
|
||||
from decnet.topology.status import TopologyStatus
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- IPAllocator
|
||||
|
||||
|
||||
def test_ip_allocator_sequential_skips_gateway():
|
||||
a = IPAllocator("10.0.0.0/29") # hosts: .1 .. .6; .1 is gateway
|
||||
got = [a.next_free() for _ in range(5)]
|
||||
assert got == ["10.0.0.2", "10.0.0.3", "10.0.0.4", "10.0.0.5", "10.0.0.6"]
|
||||
|
||||
|
||||
def test_ip_allocator_reserve_release_roundtrip():
|
||||
a = IPAllocator("10.0.0.0/29")
|
||||
a.reserve("10.0.0.3")
|
||||
assert not a.is_free("10.0.0.3")
|
||||
a.release("10.0.0.3")
|
||||
assert a.is_free("10.0.0.3")
|
||||
|
||||
|
||||
def test_ip_allocator_reserve_rejects_gateway():
|
||||
a = IPAllocator("10.0.0.0/29")
|
||||
with pytest.raises(ValueError):
|
||||
a.reserve("10.0.0.1")
|
||||
|
||||
|
||||
def test_ip_allocator_reserve_rejects_out_of_subnet():
|
||||
a = IPAllocator("10.0.0.0/29")
|
||||
with pytest.raises(ValueError):
|
||||
a.reserve("10.0.0.100")
|
||||
|
||||
|
||||
def test_ip_allocator_next_free_after_reserve_skips():
|
||||
a = IPAllocator("10.0.0.0/29")
|
||||
a.reserve("10.0.0.2")
|
||||
assert a.next_free() == "10.0.0.3"
|
||||
|
||||
|
||||
def test_ip_allocator_exhaustion_raises():
|
||||
a = IPAllocator("10.0.0.0/30") # hosts: .1 .. .2; .1 gateway → only .2 usable
|
||||
assert a.next_free() == "10.0.0.2"
|
||||
with pytest.raises(AllocatorExhausted):
|
||||
a.next_free()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- SubnetAllocator
|
||||
|
||||
|
||||
def test_subnet_allocator_sequential():
|
||||
s = SubnetAllocator("172.20")
|
||||
assert s.next_free() == "172.20.0.0/24"
|
||||
assert s.next_free() == "172.20.1.0/24"
|
||||
assert s.next_free() == "172.20.2.0/24"
|
||||
|
||||
|
||||
def test_subnet_allocator_skips_reserved():
|
||||
s = SubnetAllocator("172.20", reserved={"172.20.0.0/24", "172.20.1.0/24"})
|
||||
assert s.next_free() == "172.20.2.0/24"
|
||||
|
||||
|
||||
def test_subnet_allocator_reserve_is_idempotent():
|
||||
s = SubnetAllocator("172.20")
|
||||
s.reserve("172.20.0.0/24")
|
||||
assert s.next_free() == "172.20.1.0/24"
|
||||
|
||||
|
||||
def test_subnet_allocator_exhaustion_raises():
|
||||
reserved = {f"10.0.{i}.0/24" for i in range(256)}
|
||||
s = SubnetAllocator("10.0", reserved=reserved)
|
||||
with pytest.raises(AllocatorExhausted):
|
||||
s.next_free()
|
||||
|
||||
|
||||
def test_subnet_allocator_accepts_cidr_base():
|
||||
"""Full-CIDR base form is equivalent to the legacy two-octet form."""
|
||||
s = SubnetAllocator("172.20.0.0/16")
|
||||
assert s.next_free() == "172.20.0.0/24"
|
||||
assert s.next_free() == "172.20.1.0/24"
|
||||
|
||||
|
||||
def test_subnet_allocator_slash12_yields_more_than_256_slots():
|
||||
"""The whole point of widening: a /12 base must outlast a single /16."""
|
||||
s = SubnetAllocator("172.16.0.0/12")
|
||||
# Burn the first 256 /24s. With a /16 base this is exhaustion; with
|
||||
# /12 we should roll into 172.17.x.x without raising.
|
||||
for _ in range(256):
|
||||
s.next_free()
|
||||
nxt = s.next_free()
|
||||
assert nxt.startswith("172.17.")
|
||||
assert nxt.endswith(".0/24")
|
||||
|
||||
|
||||
def test_subnet_allocator_slash12_total_capacity_is_4096():
|
||||
s = SubnetAllocator("172.16.0.0/12")
|
||||
count = 0
|
||||
try:
|
||||
while True:
|
||||
s.next_free()
|
||||
count += 1
|
||||
except AllocatorExhausted:
|
||||
pass
|
||||
assert count == 4096
|
||||
|
||||
|
||||
def test_subnet_allocator_rejects_narrower_than_slash24():
|
||||
with pytest.raises(ValueError, match="narrower than /24"):
|
||||
SubnetAllocator("192.168.1.0/25")
|
||||
|
||||
|
||||
def test_subnet_allocator_exhausted_message_uses_parent_cidr():
|
||||
s = SubnetAllocator("172.20.0.0/24") # exactly one slot
|
||||
s.next_free()
|
||||
with pytest.raises(AllocatorExhausted, match="172.20.0.0/24"):
|
||||
s.next_free()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- reserved_subnets
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="alloc",
|
||||
depth=1,
|
||||
branching_factor=1,
|
||||
deckies_per_lan_min=1,
|
||||
deckies_per_lan_max=1,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=False,
|
||||
services_explicit=["ssh"],
|
||||
seed=3,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "alloc.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reserved_subnets_includes_pending_and_active(repo):
|
||||
plan_a = generate(_cfg(name="a"))
|
||||
tid_a = await persist(repo, plan_a) # pending
|
||||
|
||||
plan_b = generate(_cfg(name="b", subnet_base_prefix="172.21"))
|
||||
tid_b = await persist(repo, plan_b)
|
||||
await transition_status(repo, tid_b, TopologyStatus.DEPLOYING)
|
||||
# DEPLOYING → ACTIVE
|
||||
await transition_status(repo, tid_b, TopologyStatus.ACTIVE)
|
||||
|
||||
claimed = await reserved_subnets(repo)
|
||||
for lan in plan_a.lans:
|
||||
assert lan.subnet in claimed
|
||||
for lan in plan_b.lans:
|
||||
assert lan.subnet in claimed
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reserved_subnets_excludes_torn_down(repo):
|
||||
plan = generate(_cfg(name="gone"))
|
||||
tid = await persist(repo, plan)
|
||||
# pending → torn_down is legal
|
||||
await transition_status(repo, tid, TopologyStatus.TORN_DOWN)
|
||||
|
||||
claimed = await reserved_subnets(repo)
|
||||
for lan in plan.lans:
|
||||
assert lan.subnet not in claimed
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_generate_respects_reserved(repo):
|
||||
plan_a = generate(_cfg(name="a"))
|
||||
await persist(repo, plan_a)
|
||||
claimed = await reserved_subnets(repo)
|
||||
# Second topology on the same base, told about reservations: must
|
||||
# pick subnets not in the first one's set.
|
||||
plan_b = generate(_cfg(name="b"), reserved_subnets=claimed)
|
||||
b_subnets = {lan.subnet for lan in plan_b.lans}
|
||||
a_subnets = {lan.subnet for lan in plan_a.lans}
|
||||
assert b_subnets.isdisjoint(a_subnets)
|
||||
135
tests/topology/test_compose.py
Normal file
135
tests/topology/test_compose.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""MazeNET compose-generator + teardown-order tests."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.engine.deployer import _teardown_order
|
||||
from decnet.topology.compose import (
|
||||
_container_name,
|
||||
_network_name,
|
||||
generate_topology_compose,
|
||||
)
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import hydrate, persist
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="cmp",
|
||||
depth=2,
|
||||
branching_factor=2,
|
||||
deckies_per_lan_min=1,
|
||||
deckies_per_lan_max=1,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=False,
|
||||
services_explicit=["ssh"],
|
||||
seed=9,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "compose.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_compose_has_one_network_per_lan(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
hydrated = await hydrate(repo, tid)
|
||||
|
||||
data = generate_topology_compose(hydrated)
|
||||
assert set(data["networks"].keys()) == {
|
||||
_network_name(tid, lan.name) for lan in plan.lans
|
||||
}
|
||||
for net in data["networks"].values():
|
||||
assert net["external"] is True
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_compose_multi_home_bridge_decky(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
hydrated = await hydrate(repo, tid)
|
||||
data = generate_topology_compose(hydrated)
|
||||
|
||||
# Every bridge decky (multi-homed) must list ≥2 networks in its base.
|
||||
for decky in hydrated["deckies"]:
|
||||
cfg = decky["decky_config"]
|
||||
base = data["services"][cfg["name"]]
|
||||
assert base["container_name"] == _container_name(tid, cfg["name"])
|
||||
assert len(base["networks"]) == len(cfg["ips_by_lan"])
|
||||
for lan_name, ip in cfg["ips_by_lan"].items():
|
||||
net_key = _network_name(tid, lan_name)
|
||||
assert base["networks"][net_key]["ipv4_address"] == ip
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_compose_forwards_l3_sets_sysctl(repo):
|
||||
# Force every bridge to forward L3, then assert at least one base has it.
|
||||
plan = generate(_cfg(bridge_forward_probability=1.0))
|
||||
tid = await persist(repo, plan)
|
||||
hydrated = await hydrate(repo, tid)
|
||||
data = generate_topology_compose(hydrated)
|
||||
|
||||
forwarders = [
|
||||
d for d in hydrated["deckies"]
|
||||
if d["decky_config"].get("forwards_l3")
|
||||
]
|
||||
assert forwarders, "expected at least one forwarding bridge decky"
|
||||
for d in forwarders:
|
||||
base = data["services"][d["decky_config"]["name"]]
|
||||
assert base["sysctls"]["net.ipv4.ip_forward"] == 1
|
||||
assert "NET_ADMIN" in base["cap_add"]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_compose_labels_service_containers_for_collector(repo):
|
||||
"""Service fragments must carry ``decnet.topology.service=true`` so
|
||||
the host-side collector picks up their log streams — the old fleet
|
||||
state file never mentions topology containers."""
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
hydrated = await hydrate(repo, tid)
|
||||
data = generate_topology_compose(hydrated)
|
||||
|
||||
service_keys = [
|
||||
k for k in data["services"]
|
||||
if "-" in k and k not in {d["decky_config"]["name"] for d in hydrated["deckies"]}
|
||||
]
|
||||
assert service_keys, "expected at least one service container"
|
||||
for k in service_keys:
|
||||
labels = data["services"][k].get("labels") or {}
|
||||
assert labels.get("decnet.topology.service") == "true", (
|
||||
f"service {k!r} missing collector-discovery label: {labels}"
|
||||
)
|
||||
assert labels.get("decnet.topology.id") == tid
|
||||
assert "decnet.topology.decky" in labels
|
||||
assert "decnet.topology.service_name" in labels
|
||||
|
||||
# Base containers get their own label (role=base) but MUST NOT carry
|
||||
# the service marker — otherwise the collector double-attaches.
|
||||
base_keys = {d["decky_config"]["name"] for d in hydrated["deckies"]}
|
||||
for k in base_keys:
|
||||
labels = data["services"][k].get("labels") or {}
|
||||
assert labels.get("decnet.topology.role") == "base"
|
||||
assert labels.get("decnet.topology.service") != "true"
|
||||
|
||||
|
||||
def test_teardown_order_is_leaf_first():
|
||||
lans = [
|
||||
{"name": "LAN-00"},
|
||||
{"name": "LAN-01"},
|
||||
{"name": "LAN-02"},
|
||||
{"name": "LAN-03"},
|
||||
]
|
||||
order = _teardown_order(lans)
|
||||
assert order == ["LAN-03", "LAN-02", "LAN-01", "LAN-00"]
|
||||
# DMZ is last — nothing should be torn down after LAN-00.
|
||||
assert order[-1] == "LAN-00"
|
||||
118
tests/topology/test_concurrency.py
Normal file
118
tests/topology/test_concurrency.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""Optimistic-concurrency (version) checks on topology child mutations."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import persist
|
||||
from decnet.topology.status import VersionConflict
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="ver",
|
||||
depth=1,
|
||||
branching_factor=1,
|
||||
deckies_per_lan_min=1,
|
||||
deckies_per_lan_max=1,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=False,
|
||||
services_explicit=["ssh"],
|
||||
seed=2,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "ver.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_version_starts_at_one_after_persist(repo):
|
||||
plan = generate(_cfg())
|
||||
# persist() adds LANs/deckies/edges without expected_version, so
|
||||
# the version token stays at 1.
|
||||
tid = await persist(repo, plan)
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["version"] == 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_happy_path_two_sequential_writes(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
|
||||
await repo.add_lan(
|
||||
{"topology_id": tid, "name": "LAN-A", "subnet": "10.9.0.0/24", "is_dmz": False},
|
||||
expected_version=1,
|
||||
)
|
||||
assert (await repo.get_topology(tid))["version"] == 2
|
||||
|
||||
await repo.add_lan(
|
||||
{"topology_id": tid, "name": "LAN-B", "subnet": "10.9.1.0/24", "is_dmz": False},
|
||||
expected_version=2,
|
||||
)
|
||||
assert (await repo.get_topology(tid))["version"] == 3
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_stale_expected_version_raises(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
|
||||
await repo.add_lan(
|
||||
{"topology_id": tid, "name": "LAN-A", "subnet": "10.8.0.0/24", "is_dmz": False},
|
||||
expected_version=1,
|
||||
)
|
||||
with pytest.raises(VersionConflict) as ei:
|
||||
await repo.add_lan(
|
||||
{"topology_id": tid, "name": "LAN-B", "subnet": "10.8.1.0/24", "is_dmz": False},
|
||||
expected_version=1, # stale
|
||||
)
|
||||
assert ei.value.current == 2
|
||||
assert ei.value.expected == 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_no_expected_version_skips_check(repo):
|
||||
"""Existing callers (persist) don't pass expected_version and must
|
||||
continue to work without version bumps."""
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
before = (await repo.get_topology(tid))["version"]
|
||||
await repo.add_lan(
|
||||
{"topology_id": tid, "name": "LAN-X", "subnet": "10.7.0.0/24", "is_dmz": False}
|
||||
)
|
||||
after = (await repo.get_topology(tid))["version"]
|
||||
assert before == after # no bump when version not asserted
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_update_topology_decky_bumps_version(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
decky = (await repo.list_topology_deckies(tid))[0]
|
||||
await repo.update_topology_decky(
|
||||
decky["uuid"],
|
||||
{"decky_config": {"name": decky["name"], "services": ["ssh"],
|
||||
"ips_by_lan": decky["decky_config"]["ips_by_lan"],
|
||||
"forwards_l3": False,
|
||||
"service_config": {"ssh": {"password": "x"}}}},
|
||||
expected_version=1,
|
||||
)
|
||||
assert (await repo.get_topology(tid))["version"] == 2
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_update_lan_bumps_version(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
lan = (await repo.list_lans_for_topology(tid))[0]
|
||||
await repo.update_lan(lan["id"], {"name": "LAN-RENAMED"}, expected_version=1)
|
||||
assert (await repo.get_topology(tid))["version"] == 2
|
||||
236
tests/topology/test_deploy.py
Normal file
236
tests/topology/test_deploy.py
Normal file
@@ -0,0 +1,236 @@
|
||||
"""Deploy/teardown integration tests for MazeNET topologies.
|
||||
|
||||
Docker-touching paths live behind ``@pytest.mark.live`` per
|
||||
feedback_skip_heavy_tests.md. The non-live path here exercises dry-run
|
||||
deploy (compose file is written, repo status is left untouched) and the
|
||||
state-machine around failure/teardown using a stub repo.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.engine.deployer import (
|
||||
_teardown_order,
|
||||
_topology_compose_path,
|
||||
deploy_topology,
|
||||
teardown_topology,
|
||||
)
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import persist
|
||||
from decnet.topology.status import TopologyStatus
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="dep",
|
||||
depth=2,
|
||||
branching_factor=2,
|
||||
deckies_per_lan_min=1,
|
||||
deckies_per_lan_max=1,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=False,
|
||||
services_explicit=["ssh"],
|
||||
seed=11,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "dep.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_dry_run_writes_compose_and_preserves_pending(repo, tmp_path, monkeypatch):
|
||||
monkeypatch.chdir(tmp_path)
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
|
||||
await deploy_topology(repo, tid, dry_run=True)
|
||||
|
||||
compose_path = _topology_compose_path(tid)
|
||||
assert compose_path.exists(), "dry run must emit a compose file"
|
||||
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["status"] == TopologyStatus.PENDING, (
|
||||
"dry run must not transition status"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_deploy_failure_transitions_to_failed(repo, tmp_path, monkeypatch):
|
||||
"""If compose-up fails, status lands at FAILED with the reason logged."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
|
||||
class _BoomClient:
|
||||
def __init__(self):
|
||||
self.networks = self
|
||||
def list(self, names=None, filters=None): # noqa: ARG002
|
||||
return []
|
||||
def create(self, *a, **kw): # noqa: ARG002
|
||||
raise RuntimeError("boom: docker daemon unreachable")
|
||||
|
||||
with patch("decnet.engine.deployer.docker.from_env", return_value=_BoomClient()):
|
||||
with pytest.raises(RuntimeError, match="boom"):
|
||||
await deploy_topology(repo, tid)
|
||||
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["status"] == TopologyStatus.FAILED
|
||||
|
||||
events = await repo.list_topology_status_events(tid)
|
||||
# Events are returned newest-first.
|
||||
last = events[0]
|
||||
assert last["to_status"] == TopologyStatus.FAILED
|
||||
assert "boom" in (last["reason"] or "")
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_deploy_failure_rolls_back_created_networks(repo, tmp_path, monkeypatch):
|
||||
"""Networks created before the failing op must be removed on rollback.
|
||||
|
||||
Reproduces the ``Pool overlaps`` regression: a failed deploy left
|
||||
partial networks alive and the next deploy hit an IPAM conflict."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
|
||||
class _PartialClient:
|
||||
def __init__(self):
|
||||
self.networks = self
|
||||
self.created: list[str] = []
|
||||
self.removed: list[str] = []
|
||||
self._call = 0
|
||||
self._created_objs: dict[str, _FakeNet] = {}
|
||||
def list(self, names=None, filters=None): # noqa: ARG002
|
||||
if not names:
|
||||
return []
|
||||
return [self._created_objs[n] for n in names if n in self._created_objs]
|
||||
def create(self, name, *a, **kw): # noqa: ARG002
|
||||
self._call += 1
|
||||
# Succeed on the first N-1 creates, blow up on the last.
|
||||
if self._call >= 2:
|
||||
raise RuntimeError("boom: pool overlap")
|
||||
self.created.append(name)
|
||||
obj = _FakeNet(name, self)
|
||||
self._created_objs[name] = obj
|
||||
return obj
|
||||
|
||||
class _FakeNet:
|
||||
def __init__(self, name, client):
|
||||
self.name = name
|
||||
self.id = f"id-{name}"
|
||||
self.attrs = {"Containers": {}}
|
||||
self._client = client
|
||||
def remove(self):
|
||||
self._client.removed.append(self.name)
|
||||
self._client._created_objs.pop(self.name, None)
|
||||
|
||||
fake = _PartialClient()
|
||||
with patch("decnet.engine.deployer.docker.from_env", return_value=fake):
|
||||
with patch("decnet.engine.deployer._compose") as mock_down:
|
||||
with pytest.raises(RuntimeError, match="boom"):
|
||||
await deploy_topology(repo, tid)
|
||||
# compose down is invoked only when compose was actually started
|
||||
# OR a partial compose file exists; create_bridge_network failed
|
||||
# before write_topology_compose, so _compose should not have run.
|
||||
mock_down.assert_not_called()
|
||||
|
||||
# Every network created this attempt must have been removed on rollback.
|
||||
assert set(fake.removed) == set(fake.created)
|
||||
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["status"] == TopologyStatus.FAILED
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_teardown_from_failed_marks_torn_down(repo, tmp_path, monkeypatch):
|
||||
monkeypatch.chdir(tmp_path)
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
# Drive it into FAILED directly via the legal path.
|
||||
from decnet.topology.persistence import transition_status
|
||||
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
|
||||
await transition_status(repo, tid, TopologyStatus.FAILED, reason="test")
|
||||
|
||||
class _StubClient:
|
||||
def __init__(self):
|
||||
self.networks = self
|
||||
def list(self, names=None, filters=None): # noqa: ARG002
|
||||
return []
|
||||
|
||||
with patch("decnet.engine.deployer.docker.from_env", return_value=_StubClient()):
|
||||
await teardown_topology(repo, tid)
|
||||
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["status"] == TopologyStatus.TORN_DOWN
|
||||
|
||||
|
||||
def test_teardown_order_is_stable():
|
||||
lans = [{"name": f"LAN-{i:02d}"} for i in range(5)]
|
||||
assert _teardown_order(lans) == [
|
||||
"LAN-04", "LAN-03", "LAN-02", "LAN-01", "LAN-00",
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.live
|
||||
@pytest.mark.anyio
|
||||
async def test_deploy_and_teardown_against_real_docker(repo, tmp_path, monkeypatch):
|
||||
"""End-to-end: create real Docker bridge networks, verify, tear down.
|
||||
|
||||
Skipped on CI; run locally with ``pytest -m live tests/topology``.
|
||||
Does NOT run ``docker compose up`` — that's exercised by the flat
|
||||
fleet tests. This test covers the topology-specific paths only
|
||||
(LAN network creation, multi-home bridge wiring, teardown order).
|
||||
"""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
docker = pytest.importorskip("docker")
|
||||
try:
|
||||
client = docker.from_env()
|
||||
client.ping()
|
||||
except Exception as exc: # pragma: no cover - environment-specific
|
||||
pytest.skip(f"docker daemon not reachable: {exc}")
|
||||
|
||||
plan = generate(_cfg(depth=1, branching_factor=1))
|
||||
tid = await persist(repo, plan)
|
||||
|
||||
from decnet.topology.compose import _network_name
|
||||
|
||||
try:
|
||||
await deploy_topology(repo, tid, dry_run=True)
|
||||
# Dry run doesn't create networks. Now exercise the real path by
|
||||
# creating just the networks (no compose up) and tearing down.
|
||||
from decnet.network import create_bridge_network, remove_bridge_network
|
||||
for lan in plan.lans:
|
||||
create_bridge_network(
|
||||
client,
|
||||
_network_name(tid, lan.name),
|
||||
lan.subnet,
|
||||
internal=not lan.is_dmz,
|
||||
)
|
||||
existing = {n.name for n in client.networks.list()}
|
||||
for lan in plan.lans:
|
||||
assert _network_name(tid, lan.name) in existing
|
||||
finally:
|
||||
for lan in plan.lans:
|
||||
remove_bridge_network(client, _network_name(tid, lan.name))
|
||||
|
||||
remaining = {n.name for n in client.networks.list()}
|
||||
for lan in plan.lans:
|
||||
assert _network_name(tid, lan.name) not in remaining
|
||||
|
||||
# Compose artifact cleanup
|
||||
p = _topology_compose_path(tid)
|
||||
if p.exists():
|
||||
p.unlink()
|
||||
# Sanity: Path roundtrip still resolvable
|
||||
assert isinstance(Path(str(p)), Path)
|
||||
168
tests/topology/test_deploy_agent_branch.py
Normal file
168
tests/topology/test_deploy_agent_branch.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""Agent-branch routing inside deploy_topology / teardown_topology.
|
||||
|
||||
Exercises the target_host_uuid branch added in Step 6. We never hit a
|
||||
real agent — AgentClient is swapped out for a recording fake so we
|
||||
assert the right hydrated blob + version hash are forwarded and the
|
||||
master's status machine advances as expected.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.engine import deployer as _deployer
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.hashing import canonical_hash
|
||||
from decnet.topology.persistence import persist
|
||||
from decnet.topology.status import TopologyStatus
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="agent-branch",
|
||||
mode="agent",
|
||||
depth=1,
|
||||
branching_factor=1,
|
||||
deckies_per_lan_min=1,
|
||||
deckies_per_lan_max=1,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=False,
|
||||
services_explicit=["ssh"],
|
||||
seed=7,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "agent-branch.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
async def _seed_host(repo, uuid_: str = "h-1") -> None:
|
||||
await repo.add_swarm_host(
|
||||
{
|
||||
"uuid": uuid_,
|
||||
"name": f"host-{uuid_}",
|
||||
"address": "10.9.9.9",
|
||||
"agent_port": 8765,
|
||||
"status": "active",
|
||||
"client_cert_fingerprint": "a" * 64,
|
||||
"cert_bundle_path": "/tmp/ignored",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class _FakeAgentClient:
|
||||
"""Records every call; never touches the network."""
|
||||
|
||||
instances: list["_FakeAgentClient"] = []
|
||||
|
||||
def __init__(self, *, host: dict[str, Any]) -> None:
|
||||
self.host = host
|
||||
self.calls: list[tuple[str, tuple, dict]] = []
|
||||
_FakeAgentClient.instances.append(self)
|
||||
|
||||
async def __aenter__(self) -> "_FakeAgentClient":
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *_exc) -> None:
|
||||
return None
|
||||
|
||||
async def apply_topology(self, hydrated, version_hash):
|
||||
self.calls.append(("apply", (hydrated, version_hash), {}))
|
||||
return {"status": "applied", "version_hash": version_hash}
|
||||
|
||||
async def teardown_topology(self, topology_id):
|
||||
self.calls.append(("teardown", (topology_id,), {}))
|
||||
return {"status": "torn_down", "topology_id": topology_id}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_agent(monkeypatch: pytest.MonkeyPatch):
|
||||
_FakeAgentClient.instances.clear()
|
||||
# Patch the import site inside the local functions; they do
|
||||
# `from decnet.swarm.client import AgentClient` at call time.
|
||||
import decnet.swarm.client as _swarm_client
|
||||
monkeypatch.setattr(_swarm_client, "AgentClient", _FakeAgentClient)
|
||||
return _FakeAgentClient
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_deploy_on_agent_routes_via_agent_client(repo, fake_agent) -> None:
|
||||
await _seed_host(repo, "h-deploy")
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan, target_host_uuid="h-deploy")
|
||||
|
||||
await _deployer.deploy_topology(repo, tid)
|
||||
|
||||
# Exactly one AgentClient, one apply call.
|
||||
assert len(fake_agent.instances) == 1
|
||||
inst = fake_agent.instances[0]
|
||||
assert inst.host["uuid"] == "h-deploy"
|
||||
assert len(inst.calls) == 1
|
||||
verb, (hydrated, version_hash), _ = inst.calls[0]
|
||||
assert verb == "apply"
|
||||
assert hydrated["topology"]["id"] == tid
|
||||
assert version_hash == canonical_hash(hydrated)
|
||||
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["status"] == TopologyStatus.ACTIVE
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_deploy_on_agent_failure_marks_failed(repo, monkeypatch) -> None:
|
||||
await _seed_host(repo, "h-fail")
|
||||
plan = generate(_cfg(name="agent-fail"))
|
||||
tid = await persist(repo, plan, target_host_uuid="h-fail")
|
||||
|
||||
class _BoomClient(_FakeAgentClient):
|
||||
async def apply_topology(self, hydrated, version_hash):
|
||||
raise RuntimeError("agent refused")
|
||||
|
||||
import decnet.swarm.client as _swarm_client
|
||||
monkeypatch.setattr(_swarm_client, "AgentClient", _BoomClient)
|
||||
|
||||
with pytest.raises(RuntimeError, match="agent refused"):
|
||||
await _deployer.deploy_topology(repo, tid)
|
||||
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["status"] == TopologyStatus.FAILED
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_deploy_on_agent_unknown_host_raises(repo, fake_agent) -> None:
|
||||
plan = generate(_cfg(name="agent-missing"))
|
||||
tid = await persist(repo, plan, target_host_uuid="nope")
|
||||
|
||||
with pytest.raises(ValueError, match="unknown swarm host"):
|
||||
await _deployer.deploy_topology(repo, tid)
|
||||
|
||||
# No AgentClient should ever be constructed for a nonexistent host.
|
||||
assert fake_agent.instances == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_teardown_on_agent_routes_via_agent_client(repo, fake_agent) -> None:
|
||||
await _seed_host(repo, "h-teardown")
|
||||
plan = generate(_cfg(name="agent-down"))
|
||||
tid = await persist(repo, plan, target_host_uuid="h-teardown")
|
||||
|
||||
# Seed into an ACTIVE state the teardown will accept.
|
||||
from decnet.topology.persistence import transition_status
|
||||
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
|
||||
await transition_status(repo, tid, TopologyStatus.ACTIVE)
|
||||
|
||||
await _deployer.teardown_topology(repo, tid)
|
||||
|
||||
inst = fake_agent.instances[-1]
|
||||
assert inst.host["uuid"] == "h-teardown"
|
||||
assert inst.calls == [("teardown", (tid,), {})]
|
||||
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["status"] == TopologyStatus.TORN_DOWN
|
||||
132
tests/topology/test_editing.py
Normal file
132
tests/topology/test_editing.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""Pre-deploy mutation repo methods: pending-only, version-aware."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import persist, transition_status
|
||||
from decnet.topology.status import TopologyNotEditable, TopologyStatus
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="edit",
|
||||
depth=1,
|
||||
branching_factor=1,
|
||||
deckies_per_lan_min=2,
|
||||
deckies_per_lan_max=2,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=False,
|
||||
services_explicit=["ssh"],
|
||||
seed=6,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "edit.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_add_lan_to_pending_bumps_version(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
await repo.add_lan(
|
||||
{"topology_id": tid, "name": "LAN-NEW", "subnet": "10.55.0.0/24", "is_dmz": False},
|
||||
expected_version=1,
|
||||
)
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["version"] == 2
|
||||
lans = {l["name"] for l in await repo.list_lans_for_topology(tid)}
|
||||
assert "LAN-NEW" in lans
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_update_decky_roundtrips_service_config(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
decky = (await repo.list_topology_deckies(tid))[0]
|
||||
patch = dict(decky["decky_config"])
|
||||
patch["service_config"] = {"ssh": {"password": "megapassword"}}
|
||||
await repo.update_topology_decky(
|
||||
decky["uuid"], {"decky_config": patch}, expected_version=1,
|
||||
)
|
||||
fresh = next(
|
||||
d for d in await repo.list_topology_deckies(tid)
|
||||
if d["uuid"] == decky["uuid"]
|
||||
)
|
||||
assert fresh["decky_config"]["service_config"]["ssh"]["password"] == "megapassword"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_update_decky_rejected_on_active_topology(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
decky = (await repo.list_topology_deckies(tid))[0]
|
||||
# pending → deploying → active
|
||||
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
|
||||
await transition_status(repo, tid, TopologyStatus.ACTIVE)
|
||||
with pytest.raises(TopologyNotEditable) as ei:
|
||||
await repo.update_topology_decky(
|
||||
decky["uuid"],
|
||||
{"decky_config": decky["decky_config"]},
|
||||
enforce_pending=True,
|
||||
)
|
||||
assert ei.value.status == TopologyStatus.ACTIVE
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_delete_lan_with_home_decky_refused(repo):
|
||||
"""A LAN whose decky has no other edge cannot be deleted — it'd orphan."""
|
||||
plan = generate(_cfg(depth=1, branching_factor=1, deckies_per_lan_max=1, deckies_per_lan_min=1))
|
||||
tid = await persist(repo, plan)
|
||||
lan = (await repo.list_lans_for_topology(tid))[0]
|
||||
with pytest.raises(ValueError, match="orphaned"):
|
||||
await repo.delete_lan(lan["id"])
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_delete_edge_leaves_decky_intact(repo):
|
||||
"""Deleting one bridge edge of a multi-homed decky should succeed."""
|
||||
# depth=1 branching=1 gives DMZ(LAN-00) + LAN-01 with a bridge decky.
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
edges = await repo.list_topology_edges(tid)
|
||||
bridge_edges = [e for e in edges if e["is_bridge"]]
|
||||
assert bridge_edges, "generator should produce at least one bridge edge"
|
||||
# Delete exactly one — the bridge decky should keep at least one edge.
|
||||
edge = bridge_edges[0]
|
||||
before_deckies = {d["uuid"] for d in await repo.list_topology_deckies(tid)}
|
||||
await repo.delete_topology_edge(edge["id"])
|
||||
after_deckies = {d["uuid"] for d in await repo.list_topology_deckies(tid)}
|
||||
assert before_deckies == after_deckies
|
||||
remaining = await repo.list_topology_edges(tid)
|
||||
assert edge["id"] not in {e["id"] for e in remaining}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_delete_decky_cascades_edges(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
decky = (await repo.list_topology_deckies(tid))[0]
|
||||
await repo.delete_topology_decky(decky["uuid"])
|
||||
# No edge pointing to the removed decky remains.
|
||||
remaining = await repo.list_topology_edges(tid)
|
||||
assert decky["uuid"] not in {e["decky_uuid"] for e in remaining}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_delete_edge_rejected_on_active(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
edges = await repo.list_topology_edges(tid)
|
||||
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
|
||||
await transition_status(repo, tid, TopologyStatus.ACTIVE)
|
||||
with pytest.raises(TopologyNotEditable):
|
||||
await repo.delete_topology_edge(edges[0]["id"])
|
||||
137
tests/topology/test_generator.py
Normal file
137
tests/topology/test_generator.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""MazeNET generator determinism + DAG shape tests."""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="test",
|
||||
depth=3,
|
||||
branching_factor=2,
|
||||
deckies_per_lan_min=2,
|
||||
deckies_per_lan_max=2,
|
||||
bridge_forward_probability=1.0,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=True,
|
||||
seed=42,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
def test_seed_is_deterministic():
|
||||
a = generate(_cfg())
|
||||
b = generate(_cfg())
|
||||
# Same structure: same LAN names, same decky names, same edge set.
|
||||
assert [lan.name for lan in a.lans] == [lan.name for lan in b.lans]
|
||||
assert [d.name for d in a.deckies] == [d.name for d in b.deckies]
|
||||
assert [(d.name, sorted(d.services)) for d in a.deckies] == [
|
||||
(d.name, sorted(d.services)) for d in b.deckies
|
||||
]
|
||||
assert sorted((e.decky_name, e.lan_name) for e in a.edges) == sorted(
|
||||
(e.decky_name, e.lan_name) for e in b.edges
|
||||
)
|
||||
|
||||
|
||||
def test_different_seed_yields_different_structure():
|
||||
a = generate(_cfg(seed=1))
|
||||
b = generate(_cfg(seed=2))
|
||||
# With modest depth/branching, at least one of structure, service
|
||||
# assignment, or edge count will differ — fail only if everything is
|
||||
# byte-identical, which would indicate the seed is being ignored.
|
||||
a_sig = (
|
||||
[lan.name for lan in a.lans],
|
||||
[(d.name, sorted(d.services)) for d in a.deckies],
|
||||
sorted((e.decky_name, e.lan_name) for e in a.edges),
|
||||
)
|
||||
b_sig = (
|
||||
[lan.name for lan in b.lans],
|
||||
[(d.name, sorted(d.services)) for d in b.deckies],
|
||||
sorted((e.decky_name, e.lan_name) for e in b.edges),
|
||||
)
|
||||
assert a_sig != b_sig
|
||||
|
||||
|
||||
def test_dmz_is_exactly_one_lan():
|
||||
t = generate(_cfg())
|
||||
dmz = [lan for lan in t.lans if lan.is_dmz]
|
||||
assert len(dmz) == 1
|
||||
assert dmz[0].parent is None
|
||||
assert dmz[0].name == "LAN-00"
|
||||
|
||||
|
||||
def test_every_non_dmz_lan_has_exactly_one_bridge_into_parent():
|
||||
t = generate(_cfg(branching_factor=2, depth=3))
|
||||
# For each non-DMZ LAN, find the decky that is multi-homed to its parent.
|
||||
for lan in t.lans:
|
||||
if lan.is_dmz:
|
||||
continue
|
||||
bridges_to_parent = [
|
||||
d for d in t.deckies
|
||||
if lan.name in d.ips_by_lan and lan.parent in d.ips_by_lan
|
||||
]
|
||||
assert len(bridges_to_parent) >= 1, (
|
||||
f"{lan.name} has no bridge into parent {lan.parent}"
|
||||
)
|
||||
|
||||
|
||||
def test_cross_edge_probability_zero_yields_tree():
|
||||
"""With cross_edge_probability=0, a decky is bridged only to its home
|
||||
LAN and (if it's the chosen bridge) its parent LAN — never to a
|
||||
sibling or cousin. Validates by checking no decky is connected to
|
||||
both a parent AND a non-parent non-home LAN."""
|
||||
t = generate(_cfg(cross_edge_probability=0.0))
|
||||
lans_by_name = {lan.name: lan for lan in t.lans}
|
||||
for d in t.deckies:
|
||||
if len(d.ips_by_lan) <= 1:
|
||||
continue
|
||||
# Home LAN = first membership. Other memberships must all be
|
||||
# the parent of the home LAN, i.e. a single parent bridge.
|
||||
home = next(iter(d.ips_by_lan))
|
||||
others = [name for name in list(d.ips_by_lan.keys())[1:]]
|
||||
parent = lans_by_name[home].parent
|
||||
assert all(o == parent for o in others), (
|
||||
f"tree mode but decky {d.name} bridges {home}→{others} (parent={parent})"
|
||||
)
|
||||
|
||||
|
||||
def test_cross_edge_probability_one_produces_cross_edges_over_runs():
|
||||
"""With probability=1, every non-DMZ LAN rolls a cross-edge (may be
|
||||
skipped if no valid peer), so across a moderately branching topology
|
||||
we expect ≥1 cross-edge."""
|
||||
t = generate(_cfg(cross_edge_probability=1.0, depth=3, branching_factor=3))
|
||||
lans_by_name = {lan.name: lan for lan in t.lans}
|
||||
cross_edges = 0
|
||||
for d in t.deckies:
|
||||
if len(d.ips_by_lan) < 2:
|
||||
continue
|
||||
home = next(iter(d.ips_by_lan))
|
||||
others = list(d.ips_by_lan.keys())[1:]
|
||||
parent = lans_by_name[home].parent
|
||||
for o in others:
|
||||
if o != parent:
|
||||
cross_edges += 1
|
||||
assert cross_edges >= 1
|
||||
|
||||
|
||||
def test_every_decky_has_at_least_one_edge():
|
||||
t = generate(_cfg())
|
||||
edge_deckies = Counter(e.decky_name for e in t.edges)
|
||||
for d in t.deckies:
|
||||
assert edge_deckies[d.name] >= 1
|
||||
|
||||
|
||||
def test_dmz_has_exactly_one_decky():
|
||||
t = generate(_cfg(deckies_per_lan_min=5, deckies_per_lan_max=5))
|
||||
dmz_edges = [e for e in t.edges if e.lan_name == "LAN-00"]
|
||||
# The DMZ LAN itself gets 1 decky + possibly acts as parent for
|
||||
# bridge deckies from LAN-01/LAN-02 etc. The "home" decky count
|
||||
# should be exactly 1.
|
||||
home_only = [e for e in dmz_edges if not e.is_bridge]
|
||||
assert len(home_only) == 1
|
||||
80
tests/topology/test_hashing.py
Normal file
80
tests/topology/test_hashing.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""Tests for :mod:`decnet.topology.hashing`."""
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
|
||||
from decnet.topology.hashing import canonical_hash
|
||||
|
||||
|
||||
def _sample() -> dict:
|
||||
return {
|
||||
"topology": {
|
||||
"id": "t1",
|
||||
"name": "n",
|
||||
"mode": "agent",
|
||||
"target_host_uuid": "h1",
|
||||
"status": "deploying",
|
||||
"version": 3,
|
||||
"created_at": "2026-04-21T00:00:00+00:00",
|
||||
},
|
||||
"lans": [
|
||||
{"id": "l1", "name": "dmz", "subnet": "10.0.0.0/24", "is_dmz": True,
|
||||
"x": 40, "y": 40},
|
||||
],
|
||||
"deckies": [
|
||||
{
|
||||
"uuid": "d1",
|
||||
"name": "gw",
|
||||
"services": ["ssh"],
|
||||
"decky_config": {"archetype": "deaddeck", "forwards_l3": True},
|
||||
"state": "pending",
|
||||
"x": 10,
|
||||
"y": 20,
|
||||
}
|
||||
],
|
||||
"edges": [
|
||||
{"id": "e1", "decky_uuid": "d1", "lan_id": "l1",
|
||||
"is_bridge": True, "forwards_l3": True},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_hash_is_stable() -> None:
|
||||
assert canonical_hash(_sample()) == canonical_hash(_sample())
|
||||
|
||||
|
||||
def test_key_order_does_not_matter() -> None:
|
||||
a = _sample()
|
||||
b = {
|
||||
"edges": a["edges"],
|
||||
"deckies": a["deckies"],
|
||||
"lans": a["lans"],
|
||||
"topology": a["topology"],
|
||||
}
|
||||
assert canonical_hash(a) == canonical_hash(b)
|
||||
|
||||
|
||||
def test_volatile_fields_ignored() -> None:
|
||||
a = _sample()
|
||||
b = copy.deepcopy(a)
|
||||
b["topology"]["status"] = "active"
|
||||
b["topology"]["version"] = 99
|
||||
b["topology"]["status_changed_at"] = "2099-01-01T00:00:00+00:00"
|
||||
b["deckies"][0]["last_error"] = "transient"
|
||||
b["deckies"][0]["x"] = 9999
|
||||
b["lans"][0]["y"] = 12345
|
||||
assert canonical_hash(a) == canonical_hash(b)
|
||||
|
||||
|
||||
def test_behavioural_change_flips_hash() -> None:
|
||||
a = _sample()
|
||||
b = copy.deepcopy(a)
|
||||
b["deckies"][0]["services"] = ["ssh", "http"]
|
||||
assert canonical_hash(a) != canonical_hash(b)
|
||||
|
||||
|
||||
def test_input_is_not_mutated() -> None:
|
||||
a = _sample()
|
||||
snapshot = copy.deepcopy(a)
|
||||
_ = canonical_hash(a)
|
||||
assert a == snapshot
|
||||
58
tests/topology/test_layout.py
Normal file
58
tests/topology/test_layout.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""Layout coordinate roundtrips for LAN and TopologyDecky."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import hydrate, persist
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="layout",
|
||||
depth=1,
|
||||
branching_factor=1,
|
||||
deckies_per_lan_min=1,
|
||||
deckies_per_lan_max=1,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=False,
|
||||
services_explicit=["ssh"],
|
||||
seed=4,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "layout.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_coords_roundtrip_when_set(repo):
|
||||
plan = generate(_cfg())
|
||||
plan.lans[0].x = 10.5
|
||||
plan.lans[0].y = -3.25
|
||||
plan.deckies[0].x = 42.0
|
||||
plan.deckies[0].y = 7.5
|
||||
tid = await persist(repo, plan)
|
||||
hydrated = await hydrate(repo, tid)
|
||||
lan = next(l for l in hydrated["lans"] if l["name"] == plan.lans[0].name)
|
||||
assert lan["x"] == 10.5 and lan["y"] == -3.25
|
||||
d = next(d for d in hydrated["deckies"] if d["name"] == plan.deckies[0].name)
|
||||
assert d["x"] == 42.0 and d["y"] == 7.5
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_coords_default_to_none(repo):
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
hydrated = await hydrate(repo, tid)
|
||||
for lan in hydrated["lans"]:
|
||||
assert lan["x"] is None and lan["y"] is None
|
||||
for d in hydrated["deckies"]:
|
||||
assert d["x"] is None and d["y"] is None
|
||||
452
tests/topology/test_mutator.py
Normal file
452
tests/topology/test_mutator.py
Normal file
@@ -0,0 +1,452 @@
|
||||
"""Step 7 — topology_mutations queue + mutator reconciler branch."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.bus import topics as _topics
|
||||
from decnet.bus.fake import FakeBus
|
||||
from decnet.mutator import engine as _engine
|
||||
from decnet.mutator.ops import (
|
||||
MutationError,
|
||||
apply_add_decky,
|
||||
apply_add_lan,
|
||||
apply_update_decky,
|
||||
)
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import persist, transition_status
|
||||
from decnet.topology.status import TopologyStatus, VersionConflict
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="mut",
|
||||
depth=1,
|
||||
branching_factor=1,
|
||||
deckies_per_lan_min=2,
|
||||
deckies_per_lan_max=2,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=False,
|
||||
services_explicit=["ssh"],
|
||||
seed=9,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "mut.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
async def _make_active(repo) -> str:
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
|
||||
await transition_status(repo, tid, TopologyStatus.ACTIVE)
|
||||
return tid
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- queue
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_enqueue_bumps_topology_version(repo):
|
||||
tid = await _make_active(repo)
|
||||
before = (await repo.get_topology(tid))["version"]
|
||||
mid = await repo.enqueue_topology_mutation(
|
||||
tid, "add_lan", {"name": "LAN-X", "subnet": "172.20.77.0/24"},
|
||||
expected_version=before,
|
||||
)
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["version"] == before + 1
|
||||
rows = await repo.list_topology_mutations(tid)
|
||||
assert rows[0]["id"] == mid
|
||||
assert rows[0]["state"] == "pending"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_enqueue_version_conflict(repo):
|
||||
tid = await _make_active(repo)
|
||||
await repo.enqueue_topology_mutation(
|
||||
tid, "add_lan", {"name": "LAN-X", "subnet": "172.20.77.0/24"},
|
||||
expected_version=1,
|
||||
)
|
||||
with pytest.raises(VersionConflict):
|
||||
await repo.enqueue_topology_mutation(
|
||||
tid, "add_lan", {"name": "LAN-Y", "subnet": "172.20.78.0/24"},
|
||||
expected_version=1, # stale — version is now 2
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_claim_next_mutation_is_atomic_single_winner(repo):
|
||||
"""Two simulated watch loops; only one claims the row."""
|
||||
tid = await _make_active(repo)
|
||||
await repo.enqueue_topology_mutation(
|
||||
tid, "add_lan", {"name": "LAN-X"},
|
||||
)
|
||||
# Sequential simulated races: because the claim is a single SQL
|
||||
# UPDATE with ``WHERE state='pending'``, the second call observes
|
||||
# state='applying' and returns None rather than re-claiming.
|
||||
first = await repo.claim_next_mutation(tid)
|
||||
second = await repo.claim_next_mutation(tid)
|
||||
assert first is not None
|
||||
assert second is None
|
||||
assert first["state"] == "applying"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_claim_none_when_empty(repo):
|
||||
tid = await _make_active(repo)
|
||||
assert await repo.claim_next_mutation(tid) is None
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_mark_applied_and_failed(repo):
|
||||
tid = await _make_active(repo)
|
||||
mid1 = await repo.enqueue_topology_mutation(tid, "add_lan", {"name": "A"})
|
||||
mid2 = await repo.enqueue_topology_mutation(tid, "add_lan", {"name": "B"})
|
||||
await repo.claim_next_mutation(tid)
|
||||
await repo.mark_mutation_applied(mid1)
|
||||
await repo.claim_next_mutation(tid)
|
||||
await repo.mark_mutation_failed(mid2, "boom")
|
||||
|
||||
by_id = {r["id"]: r for r in await repo.list_topology_mutations(tid)}
|
||||
assert by_id[mid1]["state"] == "applied"
|
||||
assert by_id[mid2]["state"] == "failed"
|
||||
assert by_id[mid2]["reason"] == "boom"
|
||||
|
||||
|
||||
# --------------------------------------------------------------- guard query
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_guard_false_without_pending_or_live(repo):
|
||||
# No topologies at all.
|
||||
assert await repo.has_pending_topology_mutation() is False
|
||||
# Pending topology with a mutation (but not live) — guard stays False.
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
# enqueue_topology_mutation doesn't require status, but pending
|
||||
# topologies don't trip the guard.
|
||||
await repo.enqueue_topology_mutation(tid, "add_lan", {"name": "Z"})
|
||||
assert await repo.has_pending_topology_mutation() is False
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_guard_true_with_live_pending(repo):
|
||||
tid = await _make_active(repo)
|
||||
await repo.enqueue_topology_mutation(tid, "add_lan", {"name": "Z"})
|
||||
assert await repo.has_pending_topology_mutation() is True
|
||||
# After claiming, the pending row becomes applying — guard drops.
|
||||
await repo.claim_next_mutation(tid)
|
||||
assert await repo.has_pending_topology_mutation() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- ops
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_apply_add_lan_persists(repo):
|
||||
tid = await _make_active(repo)
|
||||
await apply_add_lan(
|
||||
repo, tid, {"name": "LAN-MUT", "subnet": "172.20.55.0/24"}
|
||||
)
|
||||
names = {l["name"] for l in await repo.list_lans_for_topology(tid)}
|
||||
assert "LAN-MUT" in names
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_apply_add_decky_creates_and_attaches(repo):
|
||||
"""add_decky creates a new decky row + home-LAN edge in one op."""
|
||||
tid = await _make_active(repo)
|
||||
lans = await repo.list_lans_for_topology(tid)
|
||||
home_lan = lans[0]
|
||||
|
||||
await apply_add_decky(
|
||||
repo, tid,
|
||||
{
|
||||
"name": "new-decky-mut",
|
||||
"lan": home_lan["name"],
|
||||
"services": ["ssh"],
|
||||
"archetype": "deaddeck",
|
||||
},
|
||||
)
|
||||
|
||||
deckies = await repo.list_topology_deckies(tid)
|
||||
new = next((d for d in deckies if d["decky_config"]["name"] == "new-decky-mut"), None)
|
||||
assert new is not None
|
||||
assert new["services"] == ["ssh"]
|
||||
assert new["decky_config"]["archetype"] == "deaddeck"
|
||||
assert home_lan["name"] in new["decky_config"]["ips_by_lan"]
|
||||
|
||||
edges = await repo.list_topology_edges(tid)
|
||||
assert any(e["decky_uuid"] == new["uuid"] and e["lan_id"] == home_lan["id"] for e in edges)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_apply_add_decky_rejects_duplicate_name(repo):
|
||||
tid = await _make_active(repo)
|
||||
lans = await repo.list_lans_for_topology(tid)
|
||||
existing = (await repo.list_topology_deckies(tid))[0]
|
||||
with pytest.raises(MutationError, match="already exists"):
|
||||
await apply_add_decky(
|
||||
repo, tid,
|
||||
{"name": existing["decky_config"]["name"], "lan": lans[0]["name"]},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_apply_add_decky_rejects_missing_lan(repo):
|
||||
tid = await _make_active(repo)
|
||||
with pytest.raises(MutationError, match="not found"):
|
||||
await apply_add_decky(
|
||||
repo, tid, {"name": "orphan-decky", "lan": "nonexistent-lan"},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_apply_update_decky_replaces_services(repo):
|
||||
"""Top-level ``services`` payload key replaces the decky's services list."""
|
||||
tid = await _make_active(repo)
|
||||
decky = (await repo.list_topology_deckies(tid))[0]
|
||||
await apply_update_decky(
|
||||
repo, tid,
|
||||
{
|
||||
"decky": decky["decky_config"]["name"],
|
||||
"services": ["ssh", "http"],
|
||||
},
|
||||
)
|
||||
updated = next(
|
||||
d for d in await repo.list_topology_deckies(tid)
|
||||
if d["uuid"] == decky["uuid"]
|
||||
)
|
||||
assert sorted(updated["services"]) == ["http", "ssh"]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_apply_rejected_on_validator_error(repo):
|
||||
"""Unknown service name must trip the post-apply validator."""
|
||||
tid = await _make_active(repo)
|
||||
decky = (await repo.list_topology_deckies(tid))[0]
|
||||
with pytest.raises(MutationError):
|
||||
await apply_update_decky(
|
||||
repo, tid,
|
||||
{
|
||||
"decky": decky["decky_config"]["name"],
|
||||
# service_config for an undeclared service trips
|
||||
# SERVICE_CFG_UNDECLARED in the post-apply invariants.
|
||||
"patch": {"service_config": {"telnet": {"banner": "x"}}},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------- reconciler flow
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reconcile_applies_pending_mutation(repo):
|
||||
tid = await _make_active(repo)
|
||||
await repo.enqueue_topology_mutation(
|
||||
tid, "add_lan",
|
||||
{"name": "LAN-RECON", "subnet": "172.20.44.0/24"},
|
||||
)
|
||||
drained = await _engine.reconcile_topologies(repo)
|
||||
assert drained == 1
|
||||
names = {l["name"] for l in await repo.list_lans_for_topology(tid)}
|
||||
assert "LAN-RECON" in names
|
||||
# Mutation row is now applied.
|
||||
state = {r["state"] for r in await repo.list_topology_mutations(tid)}
|
||||
assert state == {"applied"}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reconcile_failed_mutation_degrades_topology(repo):
|
||||
tid = await _make_active(repo)
|
||||
existing = (await repo.list_lans_for_topology(tid))[0]["name"]
|
||||
# Validator will reject duplicate LAN name → failure path.
|
||||
await repo.enqueue_topology_mutation(
|
||||
tid, "add_lan", {"name": existing, "subnet": "172.20.88.0/24"},
|
||||
)
|
||||
drained = await _engine.reconcile_topologies(repo)
|
||||
assert drained == 0
|
||||
mut = (await repo.list_topology_mutations(tid))[0]
|
||||
assert mut["state"] == "failed"
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["status"] == TopologyStatus.DEGRADED
|
||||
|
||||
|
||||
# ----------------------------------------------------- watch-loop guard isolation
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_watch_loop_guard_skips_reconciler_when_idle(
|
||||
repo, monkeypatch
|
||||
):
|
||||
"""Tick with no live topology + no pending mutations ⇒ reconciler not called.
|
||||
|
||||
Also asserts flat-fleet ``mutate_all`` runs every tick, unchanged.
|
||||
"""
|
||||
calls = {"mutate_all": 0, "reconcile": 0}
|
||||
|
||||
async def _fake_mutate_all(force=False, repo=None):
|
||||
calls["mutate_all"] += 1
|
||||
|
||||
async def _fake_reconcile(r):
|
||||
calls["reconcile"] += 1
|
||||
return 0
|
||||
|
||||
monkeypatch.setattr(_engine, "mutate_all", _fake_mutate_all)
|
||||
monkeypatch.setattr(_engine, "reconcile_topologies", _fake_reconcile)
|
||||
|
||||
# Manually drive one iteration of the loop body.
|
||||
await _engine.mutate_all(force=False, repo=repo)
|
||||
if await repo.has_pending_topology_mutation():
|
||||
await _engine.reconcile_topologies(repo)
|
||||
|
||||
assert calls["mutate_all"] == 1
|
||||
assert calls["reconcile"] == 0
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_watch_loop_guard_fires_reconciler_when_work_exists(
|
||||
repo, monkeypatch
|
||||
):
|
||||
tid = await _make_active(repo)
|
||||
await repo.enqueue_topology_mutation(tid, "add_lan", {"name": "X"})
|
||||
|
||||
calls = {"reconcile": 0}
|
||||
|
||||
async def _fake_reconcile(r):
|
||||
calls["reconcile"] += 1
|
||||
return 0
|
||||
|
||||
monkeypatch.setattr(_engine, "reconcile_topologies", _fake_reconcile)
|
||||
|
||||
if await repo.has_pending_topology_mutation():
|
||||
await _engine.reconcile_topologies(repo)
|
||||
|
||||
assert calls["reconcile"] == 1
|
||||
|
||||
|
||||
def test_ops_payload_shape_docstring_present():
|
||||
"""Smoke: DISPATCH covers every op name referenced in the plan."""
|
||||
from decnet.mutator.ops import DISPATCH
|
||||
|
||||
assert set(DISPATCH) == {
|
||||
"add_lan", "remove_lan",
|
||||
"add_decky", "attach_decky", "detach_decky", "remove_decky",
|
||||
"update_decky", "update_lan",
|
||||
}
|
||||
|
||||
|
||||
def _payload_json(d: dict) -> str:
|
||||
return json.dumps(d)
|
||||
|
||||
|
||||
# ---------------------------------------------------- bus publishing (DEBT-030)
|
||||
|
||||
|
||||
async def _drain(sub, expected: int, timeout: float = 2.0) -> list:
|
||||
"""Collect up to *expected* events from *sub* with a hard timeout.
|
||||
|
||||
Used to assert bus publishes without racing against the in-process
|
||||
FakeBus queue — drains are short by construction (the reconciler
|
||||
produces a bounded number of events per claim).
|
||||
"""
|
||||
events: list = []
|
||||
sub_iter = sub.__aiter__()
|
||||
for _ in range(expected):
|
||||
events.append(await asyncio.wait_for(sub_iter.__anext__(), timeout=timeout))
|
||||
return events
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reconcile_publishes_applying_and_applied(repo):
|
||||
tid = await _make_active(repo)
|
||||
await repo.enqueue_topology_mutation(
|
||||
tid, "add_lan",
|
||||
{"name": "LAN-PUB", "subnet": "172.20.45.0/24"},
|
||||
)
|
||||
bus = FakeBus()
|
||||
await bus.connect()
|
||||
sub = bus.subscribe(f"{_topics.TOPOLOGY}.{tid}.>")
|
||||
try:
|
||||
async with sub:
|
||||
drained = await _engine.reconcile_topologies(repo, bus=bus)
|
||||
assert drained == 1
|
||||
events = await _drain(sub, expected=2)
|
||||
finally:
|
||||
await bus.close()
|
||||
types = [e.type for e in events]
|
||||
assert types == [_topics.MUTATION_APPLYING, _topics.MUTATION_APPLIED]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reconcile_publishes_failed_and_status(repo):
|
||||
tid = await _make_active(repo)
|
||||
existing = (await repo.list_lans_for_topology(tid))[0]["name"]
|
||||
await repo.enqueue_topology_mutation(
|
||||
tid, "add_lan", {"name": existing, "subnet": "172.20.89.0/24"},
|
||||
)
|
||||
bus = FakeBus()
|
||||
await bus.connect()
|
||||
sub = bus.subscribe(f"{_topics.TOPOLOGY}.{tid}.>")
|
||||
try:
|
||||
async with sub:
|
||||
await _engine.reconcile_topologies(repo, bus=bus)
|
||||
# applying + failed + status(degraded)
|
||||
events = await _drain(sub, expected=3)
|
||||
finally:
|
||||
await bus.close()
|
||||
types = [e.type for e in events]
|
||||
assert types == [
|
||||
_topics.MUTATION_APPLYING, _topics.MUTATION_FAILED, _topics.TOPOLOGY_STATUS,
|
||||
]
|
||||
assert events[-1].payload["state"] == TopologyStatus.DEGRADED
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reconcile_with_null_bus_is_safe(repo):
|
||||
"""Passing ``bus=None`` must not break the reconciler — publish is
|
||||
a fire-and-forget nicety, the DB is the source of truth."""
|
||||
tid = await _make_active(repo)
|
||||
await repo.enqueue_topology_mutation(
|
||||
tid, "add_lan",
|
||||
{"name": "LAN-NULL", "subnet": "172.20.46.0/24"},
|
||||
)
|
||||
drained = await _engine.reconcile_topologies(repo, bus=None)
|
||||
assert drained == 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_wake_on_enqueue_sets_event(repo):
|
||||
"""``_wake_on_enqueue`` flips the asyncio.Event on every matching event."""
|
||||
bus = FakeBus()
|
||||
await bus.connect()
|
||||
wake = asyncio.Event()
|
||||
task = asyncio.create_task(_engine._wake_on_enqueue(bus, wake))
|
||||
try:
|
||||
# Give the subscription a tick to register.
|
||||
await asyncio.sleep(0)
|
||||
await bus.publish(
|
||||
_topics.topology_mutation("abc", _topics.MUTATION_ENQUEUED),
|
||||
{"mutation_id": "m1", "op": "add_lan"},
|
||||
event_type=_topics.MUTATION_ENQUEUED,
|
||||
)
|
||||
await asyncio.wait_for(wake.wait(), timeout=1.0)
|
||||
assert wake.is_set()
|
||||
finally:
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
await bus.close()
|
||||
91
tests/topology/test_persistence.py
Normal file
91
tests/topology/test_persistence.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""MazeNET persistence-layer tests: generator → repo → hydrate roundtrip."""
|
||||
import pytest
|
||||
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import (
|
||||
hydrate,
|
||||
persist,
|
||||
transition_status,
|
||||
)
|
||||
from decnet.topology.status import TopologyStatus, TopologyStatusError
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "persist.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
def _config(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="roundtrip",
|
||||
depth=2,
|
||||
branching_factor=2,
|
||||
deckies_per_lan_min=1,
|
||||
deckies_per_lan_max=2,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=True,
|
||||
seed=7,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_persist_then_hydrate(repo):
|
||||
plan = generate(_config())
|
||||
tid = await persist(repo, plan)
|
||||
|
||||
hydrated = await hydrate(repo, tid)
|
||||
assert hydrated is not None
|
||||
assert hydrated["topology"]["name"] == "roundtrip"
|
||||
assert hydrated["topology"]["status"] == TopologyStatus.PENDING
|
||||
assert len(hydrated["lans"]) == len(plan.lans)
|
||||
assert len(hydrated["deckies"]) == len(plan.deckies)
|
||||
assert len(hydrated["edges"]) == len(plan.edges)
|
||||
|
||||
# LANs round-trip with their DMZ flag and subnet.
|
||||
by_name = {lan["name"]: lan for lan in hydrated["lans"]}
|
||||
for planned in plan.lans:
|
||||
assert by_name[planned.name]["subnet"] == planned.subnet
|
||||
assert by_name[planned.name]["is_dmz"] == planned.is_dmz
|
||||
|
||||
# Deckies round-trip their services as a list, not a string.
|
||||
for d in hydrated["deckies"]:
|
||||
assert isinstance(d["services"], list)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_transition_status_enforces_legality(repo):
|
||||
plan = generate(_config())
|
||||
tid = await persist(repo, plan)
|
||||
|
||||
await transition_status(repo, tid, TopologyStatus.DEPLOYING, reason="go")
|
||||
await transition_status(repo, tid, TopologyStatus.ACTIVE)
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["status"] == TopologyStatus.ACTIVE
|
||||
|
||||
# Can't go from active directly back to pending.
|
||||
with pytest.raises(TopologyStatusError):
|
||||
await transition_status(repo, tid, TopologyStatus.PENDING)
|
||||
|
||||
# Unknown topology raises ValueError, not silent no-op.
|
||||
with pytest.raises(ValueError):
|
||||
await transition_status(repo, "does-not-exist", TopologyStatus.ACTIVE)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_hydrate_missing_topology(repo):
|
||||
assert await hydrate(repo, "no-such-id") is None
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_config_snapshot_preserves_seed(repo):
|
||||
plan = generate(_config(seed=12345))
|
||||
tid = await persist(repo, plan)
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["config_snapshot"]["seed"] == 12345
|
||||
assert topo["config_snapshot"]["depth"] == 2
|
||||
228
tests/topology/test_reaper.py
Normal file
228
tests/topology/test_reaper.py
Normal file
@@ -0,0 +1,228 @@
|
||||
"""Tests for the orphan topology-resource reaper."""
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.engine.reaper import (
|
||||
ReapReport,
|
||||
_orphan_prefixes,
|
||||
_prefix_of,
|
||||
reap_orphan_topology_resources,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- pure helpers
|
||||
|
||||
|
||||
def test_prefix_of_matches_decnet_convention():
|
||||
assert _prefix_of("decnet_t_abcd1234_dmz") == "abcd1234"
|
||||
assert _prefix_of("decnet_t_abcd1234_subnet-01") == "abcd1234"
|
||||
assert _prefix_of("decnet_t_abcd1234_decky-631b") == "abcd1234"
|
||||
|
||||
|
||||
def test_prefix_of_rejects_non_decnet_names():
|
||||
assert _prefix_of("bridge") is None
|
||||
assert _prefix_of("host") is None
|
||||
assert _prefix_of("development_default") is None
|
||||
# Prefix must be 8 hex chars exactly.
|
||||
assert _prefix_of("decnet_t_abcd_dmz") is None
|
||||
assert _prefix_of("decnet_t_abcd1234_") == "abcd1234" # trailing edge
|
||||
|
||||
|
||||
def test_orphan_prefixes_flags_only_unknowns():
|
||||
live = {"aaaa1111", "bbbb2222"}
|
||||
containers = [
|
||||
"decnet_t_aaaa1111_decky-01", # live
|
||||
"decnet_t_cccc3333_dmz-gateway", # orphan
|
||||
"bridge", # not DECNET
|
||||
]
|
||||
networks = [
|
||||
"decnet_t_bbbb2222_subnet-01", # live
|
||||
"decnet_t_cccc3333_dmz", # orphan
|
||||
"decnet_t_dddd4444_subnet-01", # orphan
|
||||
]
|
||||
orphans, decnet_cs, decnet_ns = _orphan_prefixes(containers, networks, live)
|
||||
assert orphans == {"cccc3333", "dddd4444"}
|
||||
assert "bridge" not in decnet_cs
|
||||
assert len(decnet_ns) == 3
|
||||
|
||||
|
||||
def test_orphan_prefixes_empty_when_all_live():
|
||||
live = {"aaaa1111"}
|
||||
containers = ["decnet_t_aaaa1111_decky"]
|
||||
networks = ["decnet_t_aaaa1111_dmz"]
|
||||
orphans, *_ = _orphan_prefixes(containers, networks, live)
|
||||
assert orphans == set()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- integration
|
||||
|
||||
|
||||
class _FakeContainer:
|
||||
def __init__(self, name, remove_raises=None):
|
||||
self.name = name
|
||||
self._raises = remove_raises
|
||||
self.removed = False
|
||||
def remove(self, force=False): # noqa: ARG002
|
||||
if self._raises:
|
||||
raise self._raises
|
||||
self.removed = True
|
||||
|
||||
|
||||
class _FakeNetwork:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.id = f"id-{name}"
|
||||
self.attrs = {"Containers": {}}
|
||||
self.removed = False
|
||||
def remove(self):
|
||||
self.removed = True
|
||||
def disconnect(self, cid, force=False): # pragma: no cover
|
||||
pass
|
||||
|
||||
|
||||
class _FakeClient:
|
||||
def __init__(self, containers, networks):
|
||||
self._cs = containers
|
||||
self._ns = networks
|
||||
self.containers = SimpleNamespace(list=lambda all=False: list(self._cs))
|
||||
self.networks = self
|
||||
|
||||
def list(self, names=None, filters=None): # noqa: ARG002
|
||||
if names is None:
|
||||
return list(self._ns)
|
||||
return [n for n in self._ns if n.name in set(names)]
|
||||
|
||||
|
||||
class _StubRepo:
|
||||
def __init__(self, topology_ids):
|
||||
self._ids = topology_ids
|
||||
async def list_topologies(self):
|
||||
return [{"id": tid} for tid in self._ids]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reap_removes_only_orphans():
|
||||
live_tid = "aaaa1111-1111-1111-1111-111111111111"
|
||||
repo = _StubRepo([live_tid])
|
||||
|
||||
containers = [
|
||||
_FakeContainer("decnet_t_aaaa1111_decky"), # live — keep
|
||||
_FakeContainer("decnet_t_dead0000_dmz-gateway"), # orphan
|
||||
_FakeContainer("decnet_t_dead0000_decky-1"), # orphan
|
||||
_FakeContainer("bridge"), # non-DECNET
|
||||
]
|
||||
networks = [
|
||||
_FakeNetwork("decnet_t_aaaa1111_dmz"), # live — keep
|
||||
_FakeNetwork("decnet_t_dead0000_dmz"), # orphan
|
||||
_FakeNetwork("decnet_t_dead0000_subnet-01"), # orphan
|
||||
_FakeNetwork("host"), # non-DECNET
|
||||
]
|
||||
client = _FakeClient(containers, networks)
|
||||
|
||||
report = await reap_orphan_topology_resources(repo, client=client)
|
||||
|
||||
assert report.live_prefixes == ["aaaa1111"]
|
||||
assert report.orphan_prefixes == ["dead0000"]
|
||||
assert set(report.containers_removed) == {
|
||||
"decnet_t_dead0000_dmz-gateway",
|
||||
"decnet_t_dead0000_decky-1",
|
||||
}
|
||||
assert set(report.networks_removed) == {
|
||||
"decnet_t_dead0000_dmz",
|
||||
"decnet_t_dead0000_subnet-01",
|
||||
}
|
||||
assert report.errors == []
|
||||
# Live resources must survive.
|
||||
assert all(c.removed is False for c in containers if "aaaa1111" in c.name)
|
||||
assert all(n.removed is False for n in networks if "aaaa1111" in n.name)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reap_is_noop_when_no_orphans():
|
||||
repo = _StubRepo(["aaaa1111-xxx"])
|
||||
containers = [_FakeContainer("decnet_t_aaaa1111_d")]
|
||||
networks = [_FakeNetwork("decnet_t_aaaa1111_net")]
|
||||
client = _FakeClient(containers, networks)
|
||||
|
||||
report = await reap_orphan_topology_resources(repo, client=client)
|
||||
|
||||
assert report.orphan_prefixes == []
|
||||
assert report.containers_removed == []
|
||||
assert report.networks_removed == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reap_captures_per_resource_errors_without_aborting():
|
||||
repo = _StubRepo([])
|
||||
containers = [
|
||||
_FakeContainer("decnet_t_dead0000_c1", remove_raises=RuntimeError("stuck")),
|
||||
_FakeContainer("decnet_t_dead0000_c2"),
|
||||
]
|
||||
networks = [_FakeNetwork("decnet_t_dead0000_net")]
|
||||
client = _FakeClient(containers, networks)
|
||||
|
||||
report = await reap_orphan_topology_resources(repo, client=client)
|
||||
|
||||
# The failing container is reported; the next one still gets removed.
|
||||
assert any("c1" in e for e in report.errors)
|
||||
assert "decnet_t_dead0000_c2" in report.containers_removed
|
||||
assert "decnet_t_dead0000_net" in report.networks_removed
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reap_handles_docker_list_failure():
|
||||
repo = _StubRepo(["aaaa1111"])
|
||||
client = MagicMock()
|
||||
client.containers.list.side_effect = RuntimeError("docker down")
|
||||
client.networks.list.return_value = []
|
||||
report = await reap_orphan_topology_resources(repo, client=client)
|
||||
assert any("docker list failed" in e for e in report.errors)
|
||||
assert report.containers_removed == []
|
||||
assert report.networks_removed == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- report
|
||||
|
||||
|
||||
def test_reap_report_to_dict_is_serialisable():
|
||||
r = ReapReport(
|
||||
live_prefixes=["aa"], orphan_prefixes=["bb"],
|
||||
containers_removed=["c"], networks_removed=["n"], errors=[],
|
||||
)
|
||||
d = r.to_dict()
|
||||
assert d == {
|
||||
"live_prefixes": ["aa"],
|
||||
"orphan_prefixes": ["bb"],
|
||||
"containers_removed": ["c"],
|
||||
"networks_removed": ["n"],
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- API
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_api_reap_orphans_requires_admin(monkeypatch):
|
||||
"""POST /topologies/reap-orphans returns the report dict."""
|
||||
from decnet.web.router.topology.api_reap_orphans import api_reap_orphans
|
||||
|
||||
with patch(
|
||||
"decnet.web.router.topology.api_reap_orphans.reap_orphan_topology_resources"
|
||||
) as mock_reap:
|
||||
mock_reap.return_value = ReapReport(
|
||||
live_prefixes=["aaaa1111"],
|
||||
orphan_prefixes=["dead0000"],
|
||||
containers_removed=["decnet_t_dead0000_c"],
|
||||
networks_removed=["decnet_t_dead0000_n"],
|
||||
)
|
||||
result = await api_reap_orphans(_admin={"role": "admin"})
|
||||
|
||||
assert result["orphan_prefixes"] == ["dead0000"]
|
||||
assert result["containers_removed"] == ["decnet_t_dead0000_c"]
|
||||
assert result["networks_removed"] == ["decnet_t_dead0000_n"]
|
||||
assert result["errors"] == []
|
||||
167
tests/topology/test_repo.py
Normal file
167
tests/topology/test_repo.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""Direct async tests for MazeNET topology persistence.
|
||||
|
||||
Exercises the repository layer without going through the HTTP stack or
|
||||
the in-memory generator. The synthetic topology here is hand-built so
|
||||
the test remains meaningful even if generator.py regresses.
|
||||
"""
|
||||
import pytest
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "mazenet.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_topology_roundtrip(repo):
|
||||
t_id = await repo.create_topology(
|
||||
{
|
||||
"name": "alpha",
|
||||
"mode": "unihost",
|
||||
"config_snapshot": {"depth": 3, "seed": 42},
|
||||
}
|
||||
)
|
||||
assert t_id
|
||||
t = await repo.get_topology(t_id)
|
||||
assert t is not None
|
||||
assert t["name"] == "alpha"
|
||||
assert t["status"] == "pending"
|
||||
# JSON field round-trips as a dict, not a string
|
||||
assert t["config_snapshot"] == {"depth": 3, "seed": 42}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_lan_add_update_list(repo):
|
||||
t_id = await repo.create_topology(
|
||||
{"name": "beta", "mode": "unihost", "config_snapshot": {}}
|
||||
)
|
||||
lan_id = await repo.add_lan(
|
||||
{"topology_id": t_id, "name": "DMZ", "subnet": "172.20.0.0/24", "is_dmz": True}
|
||||
)
|
||||
await repo.add_lan(
|
||||
{"topology_id": t_id, "name": "LAN-A", "subnet": "172.20.1.0/24"}
|
||||
)
|
||||
await repo.update_lan(lan_id, {"docker_network_id": "abc123"})
|
||||
lans = await repo.list_lans_for_topology(t_id)
|
||||
assert len(lans) == 2
|
||||
by_name = {lan["name"]: lan for lan in lans}
|
||||
assert by_name["DMZ"]["docker_network_id"] == "abc123"
|
||||
assert by_name["DMZ"]["is_dmz"] is True
|
||||
assert by_name["LAN-A"]["is_dmz"] is False
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_topology_decky_json_roundtrip(repo):
|
||||
t_id = await repo.create_topology(
|
||||
{"name": "gamma", "mode": "unihost", "config_snapshot": {}}
|
||||
)
|
||||
d_uuid = await repo.add_topology_decky(
|
||||
{
|
||||
"topology_id": t_id,
|
||||
"name": "decky-01",
|
||||
"services": ["ssh", "http"],
|
||||
"decky_config": {"hostname": "bastion"},
|
||||
"ip": "172.20.0.10",
|
||||
}
|
||||
)
|
||||
assert d_uuid
|
||||
deckies = await repo.list_topology_deckies(t_id)
|
||||
assert len(deckies) == 1
|
||||
assert deckies[0]["services"] == ["ssh", "http"]
|
||||
assert deckies[0]["decky_config"] == {"hostname": "bastion"}
|
||||
assert deckies[0]["state"] == "pending"
|
||||
|
||||
await repo.update_topology_decky(d_uuid, {"state": "running", "ip": "172.20.0.11"})
|
||||
deckies = await repo.list_topology_deckies(t_id)
|
||||
assert deckies[0]["state"] == "running"
|
||||
assert deckies[0]["ip"] == "172.20.0.11"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_topology_decky_name_unique_within_topology(repo):
|
||||
"""Same decky name is legal across topologies, forbidden within one."""
|
||||
t1 = await repo.create_topology(
|
||||
{"name": "one", "mode": "unihost", "config_snapshot": {}}
|
||||
)
|
||||
t2 = await repo.create_topology(
|
||||
{"name": "two", "mode": "unihost", "config_snapshot": {}}
|
||||
)
|
||||
await repo.add_topology_decky(
|
||||
{"topology_id": t1, "name": "decky-01", "services": []}
|
||||
)
|
||||
# Same name, different topology — must succeed.
|
||||
await repo.add_topology_decky(
|
||||
{"topology_id": t2, "name": "decky-01", "services": []}
|
||||
)
|
||||
# Same name, same topology — must fail at the DB level.
|
||||
with pytest.raises(Exception):
|
||||
await repo.add_topology_decky(
|
||||
{"topology_id": t1, "name": "decky-01", "services": []}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_status_transition_writes_event(repo):
|
||||
t_id = await repo.create_topology(
|
||||
{"name": "delta", "mode": "unihost", "config_snapshot": {}}
|
||||
)
|
||||
await repo.update_topology_status(t_id, "deploying", reason="kickoff")
|
||||
await repo.update_topology_status(t_id, "active")
|
||||
topo = await repo.get_topology(t_id)
|
||||
assert topo["status"] == "active"
|
||||
|
||||
events = await repo.list_topology_status_events(t_id)
|
||||
assert len(events) == 2
|
||||
# Ordered desc by at — latest first
|
||||
assert events[0]["to_status"] == "active"
|
||||
assert events[0]["from_status"] == "deploying"
|
||||
assert events[1]["to_status"] == "deploying"
|
||||
assert events[1]["from_status"] == "pending"
|
||||
assert events[1]["reason"] == "kickoff"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_cascade_delete_clears_all_children(repo):
|
||||
t_id = await repo.create_topology(
|
||||
{"name": "eps", "mode": "unihost", "config_snapshot": {}}
|
||||
)
|
||||
lan_id = await repo.add_lan(
|
||||
{"topology_id": t_id, "name": "L", "subnet": "10.0.0.0/24"}
|
||||
)
|
||||
d_uuid = await repo.add_topology_decky(
|
||||
{"topology_id": t_id, "name": "d", "services": []}
|
||||
)
|
||||
await repo.add_topology_edge(
|
||||
{"topology_id": t_id, "decky_uuid": d_uuid, "lan_id": lan_id}
|
||||
)
|
||||
await repo.update_topology_status(t_id, "deploying")
|
||||
await repo.enqueue_topology_mutation(t_id, "noop", {"x": 1})
|
||||
|
||||
assert await repo.delete_topology_cascade(t_id) is True
|
||||
assert await repo.get_topology(t_id) is None
|
||||
assert await repo.list_lans_for_topology(t_id) == []
|
||||
assert await repo.list_topology_deckies(t_id) == []
|
||||
assert await repo.list_topology_edges(t_id) == []
|
||||
assert await repo.list_topology_status_events(t_id) == []
|
||||
# Second delete on a missing row returns False, no raise
|
||||
assert await repo.delete_topology_cascade(t_id) is False
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_list_topologies_filters_by_status(repo):
|
||||
a = await repo.create_topology(
|
||||
{"name": "a", "mode": "unihost", "config_snapshot": {}}
|
||||
)
|
||||
b = await repo.create_topology(
|
||||
{"name": "b", "mode": "unihost", "config_snapshot": {}}
|
||||
)
|
||||
await repo.update_topology_status(b, "deploying")
|
||||
pend = await repo.list_topologies(status="pending")
|
||||
assert {t["id"] for t in pend} == {a}
|
||||
dep = await repo.list_topologies(status="deploying")
|
||||
assert {t["id"] for t in dep} == {b}
|
||||
both = await repo.list_topologies()
|
||||
assert {t["id"] for t in both} == {a, b}
|
||||
168
tests/topology/test_resync_reconcile.py
Normal file
168
tests/topology/test_resync_reconcile.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""Mutator reconcile loop + deployer.resync_agent_topology.
|
||||
|
||||
Covers the last mile of Step 7: once the heartbeat handler flags a
|
||||
topology as ``needs_resync``, the mutator's ``reconcile_agent_resyncs``
|
||||
pass must pick it up, re-push via AgentClient, and clear the flag.
|
||||
Failures must leave the flag set so the next tick retries.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.engine import deployer as _deployer
|
||||
from decnet.mutator import engine as _mut_engine
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.hashing import canonical_hash
|
||||
from decnet.topology.persistence import hydrate, persist, transition_status
|
||||
from decnet.topology.status import TopologyStatus
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="resync",
|
||||
mode="agent",
|
||||
depth=1,
|
||||
branching_factor=1,
|
||||
deckies_per_lan_min=1,
|
||||
deckies_per_lan_max=1,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=False,
|
||||
services_explicit=["ssh"],
|
||||
seed=9,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "resync.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
async def _seed_host(repo, uuid_: str) -> None:
|
||||
await repo.add_swarm_host(
|
||||
{
|
||||
"uuid": uuid_,
|
||||
"name": f"host-{uuid_}",
|
||||
"address": "10.9.9.9",
|
||||
"agent_port": 8765,
|
||||
"status": "active",
|
||||
"client_cert_fingerprint": "a" * 64,
|
||||
"cert_bundle_path": "/tmp/ignored",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class _FakeAgentClient:
|
||||
instances: list["_FakeAgentClient"] = []
|
||||
|
||||
def __init__(self, *, host: dict[str, Any]) -> None:
|
||||
self.host = host
|
||||
self.calls: list[tuple[str, tuple]] = []
|
||||
_FakeAgentClient.instances.append(self)
|
||||
|
||||
async def __aenter__(self) -> "_FakeAgentClient":
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *_exc) -> None:
|
||||
return None
|
||||
|
||||
async def apply_topology(self, hydrated, version_hash):
|
||||
self.calls.append(("apply", (hydrated, version_hash)))
|
||||
return {"status": "applied", "version_hash": version_hash}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_agent(monkeypatch: pytest.MonkeyPatch):
|
||||
_FakeAgentClient.instances.clear()
|
||||
import decnet.swarm.client as _swarm_client
|
||||
monkeypatch.setattr(_swarm_client, "AgentClient", _FakeAgentClient)
|
||||
return _FakeAgentClient
|
||||
|
||||
|
||||
async def _active_topology(repo, host_uuid: str) -> tuple[str, str]:
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan, target_host_uuid=host_uuid)
|
||||
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
|
||||
await transition_status(repo, tid, TopologyStatus.ACTIVE)
|
||||
hydrated = await hydrate(repo, tid)
|
||||
return tid, canonical_hash(hydrated)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_resync_agent_topology_pushes_current_hash(repo, fake_agent) -> None:
|
||||
await _seed_host(repo, "h-sync")
|
||||
tid, expected = await _active_topology(repo, "h-sync")
|
||||
|
||||
await _deployer.resync_agent_topology(repo, tid)
|
||||
|
||||
assert len(fake_agent.instances) == 1
|
||||
inst = fake_agent.instances[0]
|
||||
assert inst.calls[0][0] == "apply"
|
||||
_, (hydrated, version_hash) = inst.calls[0]
|
||||
assert version_hash == expected
|
||||
assert hydrated["topology"]["id"] == tid
|
||||
|
||||
row = await repo.get_topology(tid)
|
||||
assert row["status"] == TopologyStatus.ACTIVE # unchanged
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_resync_rejects_master_local_topology(repo) -> None:
|
||||
plan = generate(_cfg(mode="unihost"))
|
||||
tid = await persist(repo, plan, target_host_uuid=None)
|
||||
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
|
||||
await transition_status(repo, tid, TopologyStatus.ACTIVE)
|
||||
|
||||
with pytest.raises(ValueError, match="no target_host_uuid"):
|
||||
await _deployer.resync_agent_topology(repo, tid)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reconcile_agent_resyncs_drains_flag(repo, fake_agent) -> None:
|
||||
await _seed_host(repo, "h-drain")
|
||||
tid, _ = await _active_topology(repo, "h-drain")
|
||||
await repo.set_topology_resync(tid, True)
|
||||
|
||||
drained = await _mut_engine.reconcile_agent_resyncs(repo)
|
||||
assert drained == 1
|
||||
row = await repo.get_topology(tid)
|
||||
assert row["needs_resync"] is False
|
||||
assert len(fake_agent.instances) == 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reconcile_retains_flag_on_push_failure(repo, monkeypatch) -> None:
|
||||
await _seed_host(repo, "h-boom")
|
||||
tid, _ = await _active_topology(repo, "h-boom")
|
||||
await repo.set_topology_resync(tid, True)
|
||||
|
||||
class _Boom:
|
||||
def __init__(self, *, host): ...
|
||||
async def __aenter__(self): return self
|
||||
async def __aexit__(self, *_): return None
|
||||
async def apply_topology(self, *_a, **_k):
|
||||
raise RuntimeError("agent unreachable")
|
||||
|
||||
import decnet.swarm.client as _swarm_client
|
||||
monkeypatch.setattr(_swarm_client, "AgentClient", _Boom)
|
||||
|
||||
drained = await _mut_engine.reconcile_agent_resyncs(repo)
|
||||
assert drained == 0
|
||||
row = await repo.get_topology(tid)
|
||||
assert row["needs_resync"] is True # still flagged — next tick retries
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_reconcile_noop_when_nothing_flagged(repo, fake_agent) -> None:
|
||||
await _seed_host(repo, "h-idle")
|
||||
await _active_topology(repo, "h-idle")
|
||||
drained = await _mut_engine.reconcile_agent_resyncs(repo)
|
||||
assert drained == 0
|
||||
assert fake_agent.instances == []
|
||||
112
tests/topology/test_service_config.py
Normal file
112
tests/topology/test_service_config.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""Per-decky, per-service config roundtrips through persist + compose."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from decnet.topology.compose import generate_topology_compose
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import hydrate, persist
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="svc",
|
||||
depth=1,
|
||||
branching_factor=1,
|
||||
deckies_per_lan_min=1,
|
||||
deckies_per_lan_max=1,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=False,
|
||||
services_explicit=["ssh"],
|
||||
seed=5,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "svc.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_service_config_roundtrips(repo):
|
||||
plan = generate(_cfg())
|
||||
# Operator-style override, as the web editor would write it.
|
||||
plan.deckies[0].service_config = {"ssh": {"password": "megapassword"}}
|
||||
tid = await persist(repo, plan)
|
||||
|
||||
hydrated = await hydrate(repo, tid)
|
||||
decky = next(
|
||||
d for d in hydrated["deckies"] if d["name"] == plan.deckies[0].name
|
||||
)
|
||||
assert decky["decky_config"]["service_config"] == {
|
||||
"ssh": {"password": "megapassword"}
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_service_config_reaches_compose_fragment(repo):
|
||||
plan = generate(_cfg())
|
||||
plan.deckies[0].service_config = {"ssh": {"password": "megapassword"}}
|
||||
tid = await persist(repo, plan)
|
||||
|
||||
hydrated = await hydrate(repo, tid)
|
||||
compose = generate_topology_compose(hydrated)
|
||||
# The ssh fragment keys are "<decky>-ssh" (see compose.py:107).
|
||||
ssh_key = f"{plan.deckies[0].name}-ssh"
|
||||
frag = compose["services"][ssh_key]
|
||||
env = frag.get("environment", {})
|
||||
assert env.get("SSH_ROOT_PASSWORD") == "megapassword"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_missing_service_config_defaults_work(repo):
|
||||
"""No service_config override → service falls back to its default."""
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
hydrated = await hydrate(repo, tid)
|
||||
compose = generate_topology_compose(hydrated)
|
||||
ssh_key = f"{plan.deckies[0].name}-ssh"
|
||||
frag = compose["services"][ssh_key]
|
||||
assert frag["environment"]["SSH_ROOT_PASSWORD"] == "admin"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_unknown_nested_key_passes_through(repo):
|
||||
"""Forward-compat: unknown keys under a service reach the fragment
|
||||
untouched (current services ignore them; future services may read)."""
|
||||
plan = generate(_cfg())
|
||||
plan.deckies[0].service_config = {
|
||||
"ssh": {"password": "x", "future_flag": "hi"}
|
||||
}
|
||||
tid = await persist(repo, plan)
|
||||
hydrated = await hydrate(repo, tid)
|
||||
decky = next(
|
||||
d for d in hydrated["deckies"] if d["name"] == plan.deckies[0].name
|
||||
)
|
||||
assert (
|
||||
decky["decky_config"]["service_config"]["ssh"]["future_flag"] == "hi"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_compose_file_yaml_is_loadable(repo):
|
||||
"""Regression: the compose dict roundtrips through yaml cleanly."""
|
||||
plan = generate(_cfg())
|
||||
plan.deckies[0].service_config = {"ssh": {"password": "roundtrip"}}
|
||||
tid = await persist(repo, plan)
|
||||
hydrated = await hydrate(repo, tid)
|
||||
compose = generate_topology_compose(hydrated)
|
||||
dumped = yaml.dump(compose, sort_keys=False)
|
||||
reloaded = yaml.safe_load(dumped)
|
||||
ssh_key = f"{plan.deckies[0].name}-ssh"
|
||||
assert (
|
||||
reloaded["services"][ssh_key]["environment"]["SSH_ROOT_PASSWORD"]
|
||||
== "roundtrip"
|
||||
)
|
||||
55
tests/topology/test_status.py
Normal file
55
tests/topology/test_status.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""MazeNET status state-machine tests.
|
||||
|
||||
Every legal transition declared in the plan is permitted; every other
|
||||
pair (including self-loops and unknowns) must raise.
|
||||
"""
|
||||
import pytest
|
||||
from decnet.topology.status import (
|
||||
TopologyStatus,
|
||||
TopologyStatusError,
|
||||
assert_transition,
|
||||
legal_next,
|
||||
)
|
||||
|
||||
LEGAL = {
|
||||
(TopologyStatus.PENDING, TopologyStatus.DEPLOYING),
|
||||
(TopologyStatus.PENDING, TopologyStatus.TORN_DOWN),
|
||||
(TopologyStatus.DEPLOYING, TopologyStatus.ACTIVE),
|
||||
(TopologyStatus.DEPLOYING, TopologyStatus.FAILED),
|
||||
(TopologyStatus.DEPLOYING, TopologyStatus.DEGRADED),
|
||||
(TopologyStatus.DEPLOYING, TopologyStatus.TEARING_DOWN),
|
||||
(TopologyStatus.ACTIVE, TopologyStatus.DEGRADED),
|
||||
(TopologyStatus.ACTIVE, TopologyStatus.TEARING_DOWN),
|
||||
(TopologyStatus.DEGRADED, TopologyStatus.ACTIVE),
|
||||
(TopologyStatus.DEGRADED, TopologyStatus.TEARING_DOWN),
|
||||
(TopologyStatus.FAILED, TopologyStatus.TEARING_DOWN),
|
||||
(TopologyStatus.TEARING_DOWN, TopologyStatus.TORN_DOWN),
|
||||
(TopologyStatus.TEARING_DOWN, TopologyStatus.DEGRADED),
|
||||
}
|
||||
|
||||
|
||||
def test_every_legal_transition_permitted():
|
||||
for cur, nxt in LEGAL:
|
||||
assert_transition(cur, nxt) # no raise
|
||||
|
||||
|
||||
def test_every_illegal_transition_raises():
|
||||
for cur in TopologyStatus.ALL:
|
||||
for nxt in TopologyStatus.ALL:
|
||||
if (cur, nxt) in LEGAL:
|
||||
continue
|
||||
with pytest.raises(TopologyStatusError):
|
||||
assert_transition(cur, nxt)
|
||||
|
||||
|
||||
def test_torn_down_is_terminal():
|
||||
assert legal_next(TopologyStatus.TORN_DOWN) == frozenset()
|
||||
|
||||
|
||||
def test_unknown_status_raises():
|
||||
with pytest.raises(TopologyStatusError):
|
||||
assert_transition("pending", "bogus")
|
||||
with pytest.raises(TopologyStatusError):
|
||||
assert_transition("bogus", "active")
|
||||
with pytest.raises(TopologyStatusError):
|
||||
legal_next("bogus")
|
||||
178
tests/topology/test_validate.py
Normal file
178
tests/topology/test_validate.py
Normal file
@@ -0,0 +1,178 @@
|
||||
"""Validator-rule unit tests + deployer precondition integration."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.engine.deployer import deploy_topology
|
||||
from decnet.topology.config import TopologyConfig
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.persistence import hydrate, persist
|
||||
from decnet.topology.status import TopologyStatus
|
||||
from decnet.topology.validate import (
|
||||
ValidationError,
|
||||
errors,
|
||||
validate,
|
||||
)
|
||||
from decnet.web.db.factory import get_repository
|
||||
|
||||
|
||||
def _cfg(**kw) -> TopologyConfig:
|
||||
base = dict(
|
||||
name="val",
|
||||
depth=1,
|
||||
branching_factor=1,
|
||||
deckies_per_lan_min=1,
|
||||
deckies_per_lan_max=1,
|
||||
cross_edge_probability=0.0,
|
||||
randomize_services=False,
|
||||
services_explicit=["ssh"],
|
||||
seed=9,
|
||||
)
|
||||
base.update(kw)
|
||||
return TopologyConfig(**base)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path):
|
||||
r = get_repository(db_path=str(tmp_path / "val.db"))
|
||||
await r.initialize()
|
||||
return r
|
||||
|
||||
|
||||
async def _hydrate_plan(repo, plan) -> dict:
|
||||
tid = await persist(repo, plan)
|
||||
return await hydrate(repo, tid), tid
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- rules
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_valid_topology_has_no_errors(repo):
|
||||
plan = generate(_cfg())
|
||||
h, _ = await _hydrate_plan(repo, plan)
|
||||
assert errors(validate(h)) == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_dmz_missing(repo):
|
||||
plan = generate(_cfg())
|
||||
h, _ = await _hydrate_plan(repo, plan)
|
||||
for lan in h["lans"]:
|
||||
lan["is_dmz"] = False
|
||||
codes = [i.code for i in validate(h) if i.severity == "error"]
|
||||
# DMZ_MISSING plus cascaded DMZ_ORPHAN checks are both acceptable;
|
||||
# the specific rule must fire at minimum.
|
||||
assert "DMZ_MISSING" in codes
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_dmz_multiple(repo):
|
||||
plan = generate(_cfg())
|
||||
h, _ = await _hydrate_plan(repo, plan)
|
||||
for lan in h["lans"]:
|
||||
lan["is_dmz"] = True
|
||||
assert "DMZ_MULTIPLE" in [i.code for i in validate(h)]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_orphan_decky(repo):
|
||||
plan = generate(_cfg())
|
||||
h, _ = await _hydrate_plan(repo, plan)
|
||||
h["edges"] = [e for e in h["edges"] if e["decky_uuid"] != h["deckies"][0]["uuid"]]
|
||||
assert "DECKY_ORPHAN" in [i.code for i in validate(h)]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_ip_collision(repo):
|
||||
plan = generate(_cfg(deckies_per_lan_max=2, deckies_per_lan_min=2))
|
||||
h, _ = await _hydrate_plan(repo, plan)
|
||||
# Force two deckies in the same LAN to claim the same IP.
|
||||
deckies = [
|
||||
d for d in h["deckies"]
|
||||
if any(
|
||||
e["decky_uuid"] == d["uuid"]
|
||||
for e in h["edges"]
|
||||
if e["lan_id"] == h["lans"][0]["id"]
|
||||
)
|
||||
]
|
||||
assert len(deckies) >= 2
|
||||
shared_ip = next(iter(deckies[0]["decky_config"]["ips_by_lan"].values()))
|
||||
deckies[1]["decky_config"]["ips_by_lan"][h["lans"][0]["name"]] = shared_ip
|
||||
assert "IP_COLLISION" in [i.code for i in validate(h)]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_ip_out_of_subnet(repo):
|
||||
plan = generate(_cfg())
|
||||
h, _ = await _hydrate_plan(repo, plan)
|
||||
d = h["deckies"][0]
|
||||
lan_name = next(iter(d["decky_config"]["ips_by_lan"]))
|
||||
d["decky_config"]["ips_by_lan"][lan_name] = "10.99.99.99"
|
||||
assert "IP_OUT_OF_SUBNET" in [i.code for i in validate(h)]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_subnet_overlap(repo):
|
||||
plan = generate(_cfg())
|
||||
h, _ = await _hydrate_plan(repo, plan)
|
||||
# Shrink two LANs onto overlapping /16s.
|
||||
h["lans"][0]["subnet"] = "10.0.0.0/16"
|
||||
if len(h["lans"]) > 1:
|
||||
h["lans"][1]["subnet"] = "10.0.5.0/24"
|
||||
codes = [i.code for i in validate(h)]
|
||||
assert "SUBNET_OVERLAP" in codes
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_unknown_service(repo):
|
||||
plan = generate(_cfg())
|
||||
h, _ = await _hydrate_plan(repo, plan)
|
||||
h["deckies"][0]["services"].append("teleporter-xyz")
|
||||
assert "UNKNOWN_SERVICE" in [i.code for i in validate(h)]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_service_config_undeclared(repo):
|
||||
plan = generate(_cfg())
|
||||
h, _ = await _hydrate_plan(repo, plan)
|
||||
h["deckies"][0]["decky_config"]["service_config"] = {
|
||||
"rdp": {"password": "no"}
|
||||
}
|
||||
# "rdp" is not in the decky's services list (which is ["ssh"]).
|
||||
assert "SERVICE_CFG_UNDECLARED" in [i.code for i in validate(h)]
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- deployer hook
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_deploy_aborts_on_validation_error(repo, tmp_path, monkeypatch):
|
||||
"""Broken topology must be rejected before any Docker call."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
plan = generate(_cfg())
|
||||
tid = await persist(repo, plan)
|
||||
|
||||
# Corrupt the persisted state: strip the DMZ flag.
|
||||
lan = (await repo.list_lans_for_topology(tid))[0]
|
||||
# Use raw repo path — SQLModel UPDATE via get + setattr.
|
||||
from sqlmodel import select
|
||||
from decnet.web.db.models import LAN
|
||||
async with repo._session() as s:
|
||||
row = (await s.execute(select(LAN).where(LAN.id == lan["id"]))).scalar_one()
|
||||
row.is_dmz = False
|
||||
s.add(row)
|
||||
await s.commit()
|
||||
|
||||
class _ShouldNotCall:
|
||||
def from_env(self): # noqa: D401
|
||||
raise AssertionError("docker must not be called on a rejected topology")
|
||||
|
||||
with patch("decnet.engine.deployer.docker", _ShouldNotCall()):
|
||||
with pytest.raises(ValidationError):
|
||||
await deploy_topology(repo, tid)
|
||||
|
||||
topo = await repo.get_topology(tid)
|
||||
assert topo["status"] == TopologyStatus.PENDING
|
||||
Reference in New Issue
Block a user