feat(mazenet): host resolution + cross-host bridge guard
Adds resolve_lan_host(lan, topology) and partition_lans_by_host(h) in topology.persistence — the single source of truth every per-host caller (deployer, mutator, validator) consults to decide where a LAN belongs. Resolution: lan.host_uuid → topology.target_host_uuid → None (master). Adds validator rule BRIDGE_HOST_SPLIT: a multi-homed (bridge) decky attached to LANs that resolve to different hosts is rejected at deploy-time. A bridge decky is one container with NICs into multiple LANs; under the co-locate constraint (no overlay network), all those LANs must share a host.
This commit is contained in:
@@ -213,6 +213,47 @@ def _backfill_decky_configs(
|
|||||||
decky["decky_config"] = cfg
|
decky["decky_config"] = cfg
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_lan_host(
|
||||||
|
lan: dict[str, Any], topology: dict[str, Any]
|
||||||
|
) -> str | None:
|
||||||
|
"""Effective swarm host for a LAN.
|
||||||
|
|
||||||
|
A LAN is one Docker bridge — bridges don't span hosts — so this is
|
||||||
|
the single source of truth callers (deployer, mutator, validator)
|
||||||
|
consult before issuing per-host work.
|
||||||
|
|
||||||
|
Resolution order::
|
||||||
|
|
||||||
|
lan.host_uuid → topology.target_host_uuid → None (= master-local)
|
||||||
|
"""
|
||||||
|
h = lan.get("host_uuid") if lan else None
|
||||||
|
if h:
|
||||||
|
return h
|
||||||
|
return (topology or {}).get("target_host_uuid")
|
||||||
|
|
||||||
|
|
||||||
|
def partition_lans_by_host(
|
||||||
|
hydrated: dict[str, Any],
|
||||||
|
) -> dict[str | None, list[dict[str, Any]]]:
|
||||||
|
"""Group LANs by their effective host.
|
||||||
|
|
||||||
|
Keys are host UUIDs; ``None`` means master-local. Order of LANs
|
||||||
|
within each bucket follows the input order.
|
||||||
|
"""
|
||||||
|
out: dict[str | None, list[dict[str, Any]]] = {}
|
||||||
|
topology = hydrated.get("topology") or {}
|
||||||
|
for lan in hydrated.get("lans", []):
|
||||||
|
out.setdefault(resolve_lan_host(lan, topology), []).append(lan)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
# Re-export the status constants so callers can ``from decnet.topology.persistence
|
# Re-export the status constants so callers can ``from decnet.topology.persistence
|
||||||
# import TopologyStatus`` without chasing modules.
|
# import TopologyStatus`` without chasing modules.
|
||||||
__all__ = ["persist", "transition_status", "hydrate", "TopologyStatus"]
|
__all__ = [
|
||||||
|
"persist",
|
||||||
|
"transition_status",
|
||||||
|
"hydrate",
|
||||||
|
"resolve_lan_host",
|
||||||
|
"partition_lans_by_host",
|
||||||
|
"TopologyStatus",
|
||||||
|
]
|
||||||
|
|||||||
@@ -329,6 +329,57 @@ def check_no_host_port_collision(h: dict[str, Any]) -> list[ValidationIssue]:
|
|||||||
return issues
|
return issues
|
||||||
|
|
||||||
|
|
||||||
|
def check_bridge_decky_same_host(
|
||||||
|
h: dict[str, Any],
|
||||||
|
) -> list[ValidationIssue]:
|
||||||
|
"""A multi-homed (bridge) decky is one container — its LANs must
|
||||||
|
therefore resolve to the same swarm host.
|
||||||
|
|
||||||
|
Without this, the deployer would have to either silently pick a
|
||||||
|
host for the bridge container (orphaning IPs on the other host's
|
||||||
|
LAN) or implement a cross-host overlay. The co-locate decision
|
||||||
|
rules out the overlay, so we reject the topology up front.
|
||||||
|
"""
|
||||||
|
from decnet.topology.persistence import resolve_lan_host
|
||||||
|
|
||||||
|
topology = h.get("topology") or {}
|
||||||
|
lans_by_id = {lan["id"]: lan for lan in h.get("lans", [])}
|
||||||
|
deckies_by_uuid = {d["uuid"]: d for d in h.get("deckies", [])}
|
||||||
|
decky_lans: dict[str, list[str]] = {}
|
||||||
|
for edge in h.get("edges", []):
|
||||||
|
decky_lans.setdefault(edge["decky_uuid"], []).append(edge["lan_id"])
|
||||||
|
|
||||||
|
issues: list[ValidationIssue] = []
|
||||||
|
for decky_uuid, lan_ids in decky_lans.items():
|
||||||
|
if len(lan_ids) < 2:
|
||||||
|
continue
|
||||||
|
hosts = {
|
||||||
|
resolve_lan_host(lans_by_id[lid], topology)
|
||||||
|
for lid in lan_ids
|
||||||
|
if lid in lans_by_id
|
||||||
|
}
|
||||||
|
if len(hosts) > 1:
|
||||||
|
decky = deckies_by_uuid.get(decky_uuid, {})
|
||||||
|
issues.append(
|
||||||
|
ValidationIssue(
|
||||||
|
"error",
|
||||||
|
"BRIDGE_HOST_SPLIT",
|
||||||
|
f"bridge decky {decky.get('name', decky_uuid)!r} is "
|
||||||
|
"attached to LANs assigned to different swarm hosts; "
|
||||||
|
"a single container cannot span hosts",
|
||||||
|
target={
|
||||||
|
"decky": decky.get("name"),
|
||||||
|
"lans": [
|
||||||
|
lans_by_id[lid].get("name")
|
||||||
|
for lid in lan_ids
|
||||||
|
if lid in lans_by_id
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return issues
|
||||||
|
|
||||||
|
|
||||||
# Pure-data rules. Host-state rules (like PORT_COLLISION) are
|
# Pure-data rules. Host-state rules (like PORT_COLLISION) are
|
||||||
# *not* listed here — they're called separately by the live deployer
|
# *not* listed here — they're called separately by the live deployer
|
||||||
# so that unit tests exercising validate() stay hermetic.
|
# so that unit tests exercising validate() stay hermetic.
|
||||||
@@ -341,6 +392,7 @@ _RULES: list[Callable[[dict[str, Any]], list[ValidationIssue]]] = [
|
|||||||
check_no_subnet_overlap,
|
check_no_subnet_overlap,
|
||||||
check_services_known,
|
check_services_known,
|
||||||
check_service_config_shape,
|
check_service_config_shape,
|
||||||
|
check_bridge_decky_same_host,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,9 @@ from decnet.topology.config import TopologyConfig
|
|||||||
from decnet.topology.generator import generate
|
from decnet.topology.generator import generate
|
||||||
from decnet.topology.persistence import (
|
from decnet.topology.persistence import (
|
||||||
hydrate,
|
hydrate,
|
||||||
|
partition_lans_by_host,
|
||||||
persist,
|
persist,
|
||||||
|
resolve_lan_host,
|
||||||
transition_status,
|
transition_status,
|
||||||
)
|
)
|
||||||
from decnet.topology.status import TopologyStatus, TopologyStatusError
|
from decnet.topology.status import TopologyStatus, TopologyStatusError
|
||||||
@@ -89,3 +91,53 @@ async def test_config_snapshot_preserves_seed(repo):
|
|||||||
topo = await repo.get_topology(tid)
|
topo = await repo.get_topology(tid)
|
||||||
assert topo["config_snapshot"]["seed"] == 12345
|
assert topo["config_snapshot"]["seed"] == 12345
|
||||||
assert topo["config_snapshot"]["depth"] == 2
|
assert topo["config_snapshot"]["depth"] == 2
|
||||||
|
|
||||||
|
|
||||||
|
# --- per-LAN host resolution ---
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_lan_host_prefers_lan_pin():
|
||||||
|
topology = {"target_host_uuid": "topo-host"}
|
||||||
|
lan = {"host_uuid": "lan-host"}
|
||||||
|
assert resolve_lan_host(lan, topology) == "lan-host"
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_lan_host_falls_back_to_topology_target():
|
||||||
|
topology = {"target_host_uuid": "topo-host"}
|
||||||
|
lan = {"host_uuid": None}
|
||||||
|
assert resolve_lan_host(lan, topology) == "topo-host"
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_lan_host_returns_none_for_master():
|
||||||
|
assert resolve_lan_host({"host_uuid": None}, {"target_host_uuid": None}) is None
|
||||||
|
assert resolve_lan_host({}, {}) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_partition_lans_by_host_groups_correctly():
|
||||||
|
hydrated = {
|
||||||
|
"topology": {"target_host_uuid": None},
|
||||||
|
"lans": [
|
||||||
|
{"id": "1", "host_uuid": None},
|
||||||
|
{"id": "2", "host_uuid": "A"},
|
||||||
|
{"id": "3", "host_uuid": "A"},
|
||||||
|
{"id": "4", "host_uuid": "B"},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
out = partition_lans_by_host(hydrated)
|
||||||
|
assert set(out.keys()) == {None, "A", "B"}
|
||||||
|
assert [lan["id"] for lan in out["A"]] == ["2", "3"]
|
||||||
|
assert [lan["id"] for lan in out["B"]] == ["4"]
|
||||||
|
assert [lan["id"] for lan in out[None]] == ["1"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_partition_lans_uses_topology_default_when_lan_unset():
|
||||||
|
hydrated = {
|
||||||
|
"topology": {"target_host_uuid": "default-host"},
|
||||||
|
"lans": [
|
||||||
|
{"id": "1", "host_uuid": None},
|
||||||
|
{"id": "2", "host_uuid": "explicit"},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
out = partition_lans_by_host(hydrated)
|
||||||
|
assert set(out.keys()) == {"default-host", "explicit"}
|
||||||
|
assert [lan["id"] for lan in out["default-host"]] == ["1"]
|
||||||
|
|||||||
@@ -145,6 +145,55 @@ async def test_service_config_undeclared(repo):
|
|||||||
assert "SERVICE_CFG_UNDECLARED" in [i.code for i in validate(h)]
|
assert "SERVICE_CFG_UNDECLARED" in [i.code for i in validate(h)]
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------- per-LAN host
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_bridge_decky_same_host_passes_when_colocated(repo):
|
||||||
|
"""A bridge decky whose LANs share a host must not flag."""
|
||||||
|
plan = generate(
|
||||||
|
_cfg(
|
||||||
|
depth=2,
|
||||||
|
branching_factor=1,
|
||||||
|
deckies_per_lan_min=1,
|
||||||
|
deckies_per_lan_max=1,
|
||||||
|
cross_edge_probability=0.0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
h, _ = await _hydrate_plan(repo, plan)
|
||||||
|
for lan in h["lans"]:
|
||||||
|
lan["host_uuid"] = "host-A"
|
||||||
|
assert "BRIDGE_HOST_SPLIT" not in [i.code for i in validate(h)]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_bridge_decky_split_across_hosts_fails(repo):
|
||||||
|
plan = generate(
|
||||||
|
_cfg(
|
||||||
|
depth=2,
|
||||||
|
branching_factor=1,
|
||||||
|
deckies_per_lan_min=1,
|
||||||
|
deckies_per_lan_max=1,
|
||||||
|
cross_edge_probability=0.0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
h, _ = await _hydrate_plan(repo, plan)
|
||||||
|
# Find a bridge decky (one connected to ≥2 LANs).
|
||||||
|
decky_lans: dict[str, list[str]] = {}
|
||||||
|
for e in h["edges"]:
|
||||||
|
decky_lans.setdefault(e["decky_uuid"], []).append(e["lan_id"])
|
||||||
|
bridge_lan_ids = next(
|
||||||
|
(lids for lids in decky_lans.values() if len(lids) >= 2), None
|
||||||
|
)
|
||||||
|
assert bridge_lan_ids, "test setup expected ≥1 bridge decky"
|
||||||
|
# Pin its two LANs to different hosts.
|
||||||
|
lans_by_id = {lan["id"]: lan for lan in h["lans"]}
|
||||||
|
lans_by_id[bridge_lan_ids[0]]["host_uuid"] = "host-A"
|
||||||
|
lans_by_id[bridge_lan_ids[1]]["host_uuid"] = "host-B"
|
||||||
|
|
||||||
|
codes = [i.code for i in validate(h) if i.severity == "error"]
|
||||||
|
assert "BRIDGE_HOST_SPLIT" in codes
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------- deployer hook
|
# --------------------------------------------------------------------- deployer hook
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user