From 448fcd122723a327348949c72773e792653b45d2 Mon Sep 17 00:00:00 2001 From: anti Date: Sat, 25 Apr 2026 03:06:53 -0400 Subject: [PATCH] feat(mazenet): host resolution + cross-host bridge guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds resolve_lan_host(lan, topology) and partition_lans_by_host(h) in topology.persistence — the single source of truth every per-host caller (deployer, mutator, validator) consults to decide where a LAN belongs. Resolution: lan.host_uuid → topology.target_host_uuid → None (master). Adds validator rule BRIDGE_HOST_SPLIT: a multi-homed (bridge) decky attached to LANs that resolve to different hosts is rejected at deploy-time. A bridge decky is one container with NICs into multiple LANs; under the co-locate constraint (no overlay network), all those LANs must share a host. --- decnet/topology/persistence.py | 43 +++++++++++++++++++++++- decnet/topology/validate.py | 52 ++++++++++++++++++++++++++++++ tests/topology/test_persistence.py | 52 ++++++++++++++++++++++++++++++ tests/topology/test_validate.py | 49 ++++++++++++++++++++++++++++ 4 files changed, 195 insertions(+), 1 deletion(-) diff --git a/decnet/topology/persistence.py b/decnet/topology/persistence.py index d70e8f4f..621ef7cb 100644 --- a/decnet/topology/persistence.py +++ b/decnet/topology/persistence.py @@ -213,6 +213,47 @@ def _backfill_decky_configs( decky["decky_config"] = cfg +def resolve_lan_host( + lan: dict[str, Any], topology: dict[str, Any] +) -> str | None: + """Effective swarm host for a LAN. + + A LAN is one Docker bridge — bridges don't span hosts — so this is + the single source of truth callers (deployer, mutator, validator) + consult before issuing per-host work. + + Resolution order:: + + lan.host_uuid → topology.target_host_uuid → None (= master-local) + """ + h = lan.get("host_uuid") if lan else None + if h: + return h + return (topology or {}).get("target_host_uuid") + + +def partition_lans_by_host( + hydrated: dict[str, Any], +) -> dict[str | None, list[dict[str, Any]]]: + """Group LANs by their effective host. + + Keys are host UUIDs; ``None`` means master-local. Order of LANs + within each bucket follows the input order. + """ + out: dict[str | None, list[dict[str, Any]]] = {} + topology = hydrated.get("topology") or {} + for lan in hydrated.get("lans", []): + out.setdefault(resolve_lan_host(lan, topology), []).append(lan) + return out + + # Re-export the status constants so callers can ``from decnet.topology.persistence # import TopologyStatus`` without chasing modules. -__all__ = ["persist", "transition_status", "hydrate", "TopologyStatus"] +__all__ = [ + "persist", + "transition_status", + "hydrate", + "resolve_lan_host", + "partition_lans_by_host", + "TopologyStatus", +] diff --git a/decnet/topology/validate.py b/decnet/topology/validate.py index 338e55e9..4cb717bd 100644 --- a/decnet/topology/validate.py +++ b/decnet/topology/validate.py @@ -329,6 +329,57 @@ def check_no_host_port_collision(h: dict[str, Any]) -> list[ValidationIssue]: return issues +def check_bridge_decky_same_host( + h: dict[str, Any], +) -> list[ValidationIssue]: + """A multi-homed (bridge) decky is one container — its LANs must + therefore resolve to the same swarm host. + + Without this, the deployer would have to either silently pick a + host for the bridge container (orphaning IPs on the other host's + LAN) or implement a cross-host overlay. The co-locate decision + rules out the overlay, so we reject the topology up front. + """ + from decnet.topology.persistence import resolve_lan_host + + topology = h.get("topology") or {} + lans_by_id = {lan["id"]: lan for lan in h.get("lans", [])} + deckies_by_uuid = {d["uuid"]: d for d in h.get("deckies", [])} + decky_lans: dict[str, list[str]] = {} + for edge in h.get("edges", []): + decky_lans.setdefault(edge["decky_uuid"], []).append(edge["lan_id"]) + + issues: list[ValidationIssue] = [] + for decky_uuid, lan_ids in decky_lans.items(): + if len(lan_ids) < 2: + continue + hosts = { + resolve_lan_host(lans_by_id[lid], topology) + for lid in lan_ids + if lid in lans_by_id + } + if len(hosts) > 1: + decky = deckies_by_uuid.get(decky_uuid, {}) + issues.append( + ValidationIssue( + "error", + "BRIDGE_HOST_SPLIT", + f"bridge decky {decky.get('name', decky_uuid)!r} is " + "attached to LANs assigned to different swarm hosts; " + "a single container cannot span hosts", + target={ + "decky": decky.get("name"), + "lans": [ + lans_by_id[lid].get("name") + for lid in lan_ids + if lid in lans_by_id + ], + }, + ) + ) + return issues + + # Pure-data rules. Host-state rules (like PORT_COLLISION) are # *not* listed here — they're called separately by the live deployer # so that unit tests exercising validate() stay hermetic. @@ -341,6 +392,7 @@ _RULES: list[Callable[[dict[str, Any]], list[ValidationIssue]]] = [ check_no_subnet_overlap, check_services_known, check_service_config_shape, + check_bridge_decky_same_host, ] diff --git a/tests/topology/test_persistence.py b/tests/topology/test_persistence.py index 34fbcd2b..34c79893 100644 --- a/tests/topology/test_persistence.py +++ b/tests/topology/test_persistence.py @@ -5,7 +5,9 @@ from decnet.topology.config import TopologyConfig from decnet.topology.generator import generate from decnet.topology.persistence import ( hydrate, + partition_lans_by_host, persist, + resolve_lan_host, transition_status, ) from decnet.topology.status import TopologyStatus, TopologyStatusError @@ -89,3 +91,53 @@ async def test_config_snapshot_preserves_seed(repo): topo = await repo.get_topology(tid) assert topo["config_snapshot"]["seed"] == 12345 assert topo["config_snapshot"]["depth"] == 2 + + +# --- per-LAN host resolution --- + + +def test_resolve_lan_host_prefers_lan_pin(): + topology = {"target_host_uuid": "topo-host"} + lan = {"host_uuid": "lan-host"} + assert resolve_lan_host(lan, topology) == "lan-host" + + +def test_resolve_lan_host_falls_back_to_topology_target(): + topology = {"target_host_uuid": "topo-host"} + lan = {"host_uuid": None} + assert resolve_lan_host(lan, topology) == "topo-host" + + +def test_resolve_lan_host_returns_none_for_master(): + assert resolve_lan_host({"host_uuid": None}, {"target_host_uuid": None}) is None + assert resolve_lan_host({}, {}) is None + + +def test_partition_lans_by_host_groups_correctly(): + hydrated = { + "topology": {"target_host_uuid": None}, + "lans": [ + {"id": "1", "host_uuid": None}, + {"id": "2", "host_uuid": "A"}, + {"id": "3", "host_uuid": "A"}, + {"id": "4", "host_uuid": "B"}, + ], + } + out = partition_lans_by_host(hydrated) + assert set(out.keys()) == {None, "A", "B"} + assert [lan["id"] for lan in out["A"]] == ["2", "3"] + assert [lan["id"] for lan in out["B"]] == ["4"] + assert [lan["id"] for lan in out[None]] == ["1"] + + +def test_partition_lans_uses_topology_default_when_lan_unset(): + hydrated = { + "topology": {"target_host_uuid": "default-host"}, + "lans": [ + {"id": "1", "host_uuid": None}, + {"id": "2", "host_uuid": "explicit"}, + ], + } + out = partition_lans_by_host(hydrated) + assert set(out.keys()) == {"default-host", "explicit"} + assert [lan["id"] for lan in out["default-host"]] == ["1"] diff --git a/tests/topology/test_validate.py b/tests/topology/test_validate.py index 8863507f..05bf6fe4 100644 --- a/tests/topology/test_validate.py +++ b/tests/topology/test_validate.py @@ -145,6 +145,55 @@ async def test_service_config_undeclared(repo): assert "SERVICE_CFG_UNDECLARED" in [i.code for i in validate(h)] +# --------------------------------------------------------------------- per-LAN host + +@pytest.mark.anyio +async def test_bridge_decky_same_host_passes_when_colocated(repo): + """A bridge decky whose LANs share a host must not flag.""" + plan = generate( + _cfg( + depth=2, + branching_factor=1, + deckies_per_lan_min=1, + deckies_per_lan_max=1, + cross_edge_probability=0.0, + ) + ) + h, _ = await _hydrate_plan(repo, plan) + for lan in h["lans"]: + lan["host_uuid"] = "host-A" + assert "BRIDGE_HOST_SPLIT" not in [i.code for i in validate(h)] + + +@pytest.mark.anyio +async def test_bridge_decky_split_across_hosts_fails(repo): + plan = generate( + _cfg( + depth=2, + branching_factor=1, + deckies_per_lan_min=1, + deckies_per_lan_max=1, + cross_edge_probability=0.0, + ) + ) + h, _ = await _hydrate_plan(repo, plan) + # Find a bridge decky (one connected to ≥2 LANs). + decky_lans: dict[str, list[str]] = {} + for e in h["edges"]: + decky_lans.setdefault(e["decky_uuid"], []).append(e["lan_id"]) + bridge_lan_ids = next( + (lids for lids in decky_lans.values() if len(lids) >= 2), None + ) + assert bridge_lan_ids, "test setup expected ≥1 bridge decky" + # Pin its two LANs to different hosts. + lans_by_id = {lan["id"]: lan for lan in h["lans"]} + lans_by_id[bridge_lan_ids[0]]["host_uuid"] = "host-A" + lans_by_id[bridge_lan_ids[1]]["host_uuid"] = "host-B" + + codes = [i.code for i in validate(h) if i.severity == "error"] + assert "BRIDGE_HOST_SPLIT" in codes + + # --------------------------------------------------------------------- deployer hook