merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
23
decnet/topology/__init__.py
Normal file
23
decnet/topology/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""MazeNET — nested deception topologies.
|
||||
|
||||
A topology is an arbitrary-depth DAG of LANs, connected by multi-homed
|
||||
"bridge deckies" that optionally forward L3 between segments. One LAN
|
||||
is marked as the DMZ (Internet-facing). Persisted via the repo pattern;
|
||||
deployed via :mod:`decnet.engine.deployer`.
|
||||
"""
|
||||
from decnet.topology.config import TopologyConfig, GeneratedTopology
|
||||
from decnet.topology.generator import generate
|
||||
from decnet.topology.status import (
|
||||
TopologyStatus,
|
||||
assert_transition,
|
||||
TopologyStatusError,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"TopologyConfig",
|
||||
"GeneratedTopology",
|
||||
"generate",
|
||||
"TopologyStatus",
|
||||
"assert_transition",
|
||||
"TopologyStatusError",
|
||||
]
|
||||
160
decnet/topology/allocator.py
Normal file
160
decnet/topology/allocator.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""IP and subnet allocators for MazeNET topologies.
|
||||
|
||||
Extracted from :mod:`decnet.topology.generator` so the same primitives
|
||||
can be reused by the generator, the pre-deploy editor (REST), and the
|
||||
mutator reconciler. The allocators are pure — persistence lives in the
|
||||
repo; these objects hold in-memory state for a single planning pass.
|
||||
|
||||
``reserved_subnets`` queries the repo for every subnet currently claimed
|
||||
by a non-``torn_down`` topology so a new draft cannot collide with an
|
||||
open one.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from ipaddress import IPv4Network
|
||||
from typing import Any, Iterable
|
||||
|
||||
from decnet.topology.status import TopologyStatus
|
||||
|
||||
|
||||
class AllocatorExhausted(RuntimeError):
|
||||
"""Raised when an allocator cannot produce another value."""
|
||||
|
||||
|
||||
class IPAllocator:
|
||||
"""Hands out host IPs within a single LAN subnet.
|
||||
|
||||
Skips the ``.1`` gateway. Callers may pre-seed taken IPs via
|
||||
:meth:`reserve` before requesting :meth:`next_free`.
|
||||
"""
|
||||
|
||||
def __init__(self, subnet: str) -> None:
|
||||
self._net = IPv4Network(subnet, strict=False)
|
||||
self._gateway = str(next(self._net.hosts()))
|
||||
self._pool: list[str] = [
|
||||
str(ip) for ip in self._net.hosts() if str(ip) != self._gateway
|
||||
]
|
||||
self._taken: set[str] = set()
|
||||
self._cursor = 0
|
||||
|
||||
def next_free(self) -> str:
|
||||
while self._cursor < len(self._pool):
|
||||
ip = self._pool[self._cursor]
|
||||
self._cursor += 1
|
||||
if ip not in self._taken:
|
||||
self._taken.add(ip)
|
||||
return ip
|
||||
# Cursor past the end — fall back to a linear scan in case
|
||||
# releases opened up earlier slots.
|
||||
for ip in self._pool:
|
||||
if ip not in self._taken:
|
||||
self._taken.add(ip)
|
||||
return ip
|
||||
raise AllocatorExhausted(
|
||||
f"no free IPs left in {self._net.with_prefixlen}"
|
||||
)
|
||||
|
||||
def reserve(self, ip: str) -> None:
|
||||
if ip == self._gateway:
|
||||
raise ValueError(f"{ip} is the gateway of {self._net.with_prefixlen}")
|
||||
if ip not in {str(h) for h in self._net.hosts()}:
|
||||
raise ValueError(f"{ip} not in {self._net.with_prefixlen}")
|
||||
self._taken.add(ip)
|
||||
|
||||
def release(self, ip: str) -> None:
|
||||
self._taken.discard(ip)
|
||||
|
||||
def is_free(self, ip: str) -> bool:
|
||||
return ip not in self._taken and ip in {str(h) for h in self._net.hosts()} and ip != self._gateway
|
||||
|
||||
|
||||
class SubnetAllocator:
|
||||
"""Hands out ``/24`` subnets inside a parent network.
|
||||
|
||||
Accepted ``base_prefix`` forms:
|
||||
|
||||
* Full CIDR: ``"172.16.0.0/12"`` → 4096 ``/24`` slots
|
||||
* Legacy two-octet shorthand: ``"172.20"`` → auto-lifted to
|
||||
``"172.20.0.0/16"`` (256 slots), for backward compat with
|
||||
configs written before mass-scale topologies were a thing.
|
||||
|
||||
The parent must be at most ``/24`` wide (i.e. its prefix length
|
||||
must be ≤ 24); a ``/24`` base yields exactly one slot, anything
|
||||
larger yields more.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_prefix: str,
|
||||
reserved: Iterable[str] = (),
|
||||
) -> None:
|
||||
parent = _parse_base(base_prefix)
|
||||
if parent.prefixlen > 24:
|
||||
raise ValueError(
|
||||
f"subnet base {parent.with_prefixlen} is narrower than /24; "
|
||||
"cannot carve /24 children out of it"
|
||||
)
|
||||
self._parent = parent
|
||||
# A generator over all /24 subnets of the parent. ipaddress
|
||||
# yields them in order, so the allocator preserves the legacy
|
||||
# "sequential-third-octet" behaviour for /16 bases. For /12
|
||||
# bases you get second.third-octet sweep.
|
||||
self._iter = parent.subnets(new_prefix=24) if parent.prefixlen < 24 else iter([parent])
|
||||
self._reserved: set[str] = {s for s in reserved}
|
||||
|
||||
def next_free(self) -> str:
|
||||
for net in self._iter:
|
||||
subnet = net.with_prefixlen
|
||||
if subnet not in self._reserved:
|
||||
self._reserved.add(subnet)
|
||||
return subnet
|
||||
raise AllocatorExhausted(
|
||||
f"no free /24s left under {self._parent.with_prefixlen}"
|
||||
)
|
||||
|
||||
def reserve(self, subnet: str) -> None:
|
||||
self._reserved.add(subnet)
|
||||
|
||||
def is_free(self, subnet: str) -> bool:
|
||||
return subnet not in self._reserved
|
||||
|
||||
|
||||
def _parse_base(base_prefix: str) -> IPv4Network:
|
||||
"""Accept either ``'a.b.c.d/n'`` or legacy ``'a.b'`` shorthand."""
|
||||
stripped = base_prefix.strip().rstrip(".")
|
||||
if "/" in stripped:
|
||||
return IPv4Network(stripped, strict=False)
|
||||
octets = stripped.split(".")
|
||||
if len(octets) == 2:
|
||||
return IPv4Network(f"{stripped}.0.0/16", strict=False)
|
||||
if len(octets) == 4:
|
||||
return IPv4Network(f"{stripped}/24", strict=False)
|
||||
raise ValueError(
|
||||
f"unrecognised subnet base {base_prefix!r}; expected 'x.y' or CIDR"
|
||||
)
|
||||
|
||||
|
||||
# Topology statuses whose LANs still claim subnets. torn_down is the
|
||||
# only state that releases its networks back to the pool.
|
||||
_SUBNET_CLAIMING_STATES: frozenset[str] = frozenset(
|
||||
{
|
||||
TopologyStatus.PENDING,
|
||||
TopologyStatus.DEPLOYING,
|
||||
TopologyStatus.ACTIVE,
|
||||
TopologyStatus.DEGRADED,
|
||||
TopologyStatus.FAILED,
|
||||
TopologyStatus.TEARING_DOWN,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def reserved_subnets(repo: Any) -> set[str]:
|
||||
"""All LAN subnets currently claimed by non-torn-down topologies."""
|
||||
out: set[str] = set()
|
||||
for status in _SUBNET_CLAIMING_STATES:
|
||||
for topo in await repo.list_topologies(status=status):
|
||||
for lan in await repo.list_lans_for_topology(topo["id"]):
|
||||
subnet = lan.get("subnet")
|
||||
if subnet:
|
||||
out.add(subnet)
|
||||
return out
|
||||
165
decnet/topology/compose.py
Normal file
165
decnet/topology/compose.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""Compose-file generator for a MazeNET topology.
|
||||
|
||||
Produces a ``docker-compose.yml`` dict given a hydrated topology
|
||||
(the output of :func:`decnet.topology.persistence.hydrate`). The
|
||||
compose file references each LAN as an ``external: true`` network —
|
||||
the deployer creates the Docker bridge networks via the SDK before
|
||||
invoking ``docker compose up``.
|
||||
|
||||
Layout:
|
||||
* Each decky has a "base" container holding the LAN IPs. Multi-homed
|
||||
(bridge) deckies list every LAN they belong to under ``networks``
|
||||
with the per-LAN ``ipv4_address``.
|
||||
* Bridge deckies with ``forwards_l3=True`` get ``net.ipv4.ip_forward=1``
|
||||
baked in via compose ``sysctls`` plus ``NET_ADMIN`` in ``cap_add``.
|
||||
* Service containers share the base namespace via
|
||||
``network_mode: service:<base>``, matching the flat composer.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from decnet.services.registry import get_service
|
||||
|
||||
_DEFAULT_BASE_IMAGE = "debian:bookworm-slim"
|
||||
|
||||
_DOCKER_LOGGING = {
|
||||
"driver": "json-file",
|
||||
"options": {"max-size": "10m", "max-file": "5"},
|
||||
}
|
||||
|
||||
|
||||
def _network_name(topology_id: str, lan_name: str) -> str:
|
||||
"""Docker network name for a given (topology, LAN) pair."""
|
||||
return f"decnet_t_{topology_id[:8]}_{lan_name.lower()}"
|
||||
|
||||
|
||||
def _container_name(topology_id: str, decky_name: str) -> str:
|
||||
"""Container name for a decky base in a topology."""
|
||||
return f"decnet_t_{topology_id[:8]}_{decky_name}"
|
||||
|
||||
|
||||
def generate_topology_compose(hydrated: dict[str, Any]) -> dict:
|
||||
"""Build the compose dict for a hydrated topology.
|
||||
|
||||
``hydrated`` is the shape returned by
|
||||
:func:`decnet.topology.persistence.hydrate`.
|
||||
"""
|
||||
topology = hydrated["topology"]
|
||||
topology_id = topology["id"]
|
||||
lans = hydrated["lans"]
|
||||
deckies = hydrated["deckies"]
|
||||
|
||||
lan_by_name = {lan["name"]: lan for lan in lans}
|
||||
|
||||
services: dict[str, dict] = {}
|
||||
|
||||
for decky in deckies:
|
||||
cfg = decky["decky_config"]
|
||||
name = cfg["name"]
|
||||
ips_by_lan: dict[str, str] = cfg["ips_by_lan"]
|
||||
forwards_l3: bool = cfg.get("forwards_l3", False)
|
||||
service_config: dict[str, dict] = cfg.get("service_config", {}) or {}
|
||||
svc_names: list[str] = decky["services"]
|
||||
|
||||
base_key = name
|
||||
nets: dict[str, dict] = {}
|
||||
for lan_name, ip in ips_by_lan.items():
|
||||
if lan_name not in lan_by_name:
|
||||
raise ValueError(
|
||||
f"decky {name!r} references unknown LAN {lan_name!r}"
|
||||
)
|
||||
nets[_network_name(topology_id, lan_name)] = {"ipv4_address": ip}
|
||||
|
||||
base: dict = {
|
||||
"image": _DEFAULT_BASE_IMAGE,
|
||||
"container_name": _container_name(topology_id, name),
|
||||
"hostname": name,
|
||||
"command": ["sleep", "infinity"],
|
||||
"restart": "unless-stopped",
|
||||
"networks": nets,
|
||||
"cap_add": ["NET_ADMIN"],
|
||||
"logging": _DOCKER_LOGGING,
|
||||
# Labels let the host collector discover topology containers
|
||||
# without consulting decnet-state.json (which only knows about
|
||||
# legacy fleet deckies). See decnet/collector/worker.py.
|
||||
"labels": {
|
||||
"decnet.topology.id": topology_id,
|
||||
"decnet.topology.decky": name,
|
||||
"decnet.topology.role": "base",
|
||||
},
|
||||
}
|
||||
if forwards_l3:
|
||||
base["sysctls"] = {"net.ipv4.ip_forward": 1}
|
||||
# Gateway decky — publish its service ports on the host so
|
||||
# attackers can reach the DMZ via the host's public IP.
|
||||
# Service containers share this base's namespace (see below),
|
||||
# so ports declared here expose every service's listener.
|
||||
published: list[str] = []
|
||||
for svc_name in svc_names:
|
||||
svc = get_service(svc_name)
|
||||
if svc is None or svc.fleet_singleton:
|
||||
continue
|
||||
for port in svc.ports:
|
||||
published.append(f"{port}:{port}")
|
||||
if published:
|
||||
base["ports"] = published
|
||||
|
||||
services[base_key] = base
|
||||
|
||||
for svc_name in svc_names:
|
||||
svc = get_service(svc_name)
|
||||
if svc is None or svc.fleet_singleton:
|
||||
continue
|
||||
fragment = svc.compose_fragment(
|
||||
name, service_cfg=service_config.get(svc_name, {})
|
||||
)
|
||||
if "build" in fragment:
|
||||
fragment["build"].setdefault("args", {}).setdefault(
|
||||
"BASE_IMAGE", _DEFAULT_BASE_IMAGE
|
||||
)
|
||||
fragment.setdefault("environment", {})
|
||||
fragment["environment"]["HOSTNAME"] = name
|
||||
fragment["network_mode"] = f"service:{base_key}"
|
||||
fragment["depends_on"] = [base_key]
|
||||
fragment.pop("hostname", None)
|
||||
fragment.pop("networks", None)
|
||||
fragment["logging"] = _DOCKER_LOGGING
|
||||
# ``decnet.topology.service=true`` is the marker the collector
|
||||
# filters on — without it, log streams for this container are
|
||||
# never attached.
|
||||
labels = dict(fragment.get("labels") or {})
|
||||
labels.update({
|
||||
"decnet.topology.id": topology_id,
|
||||
"decnet.topology.decky": name,
|
||||
"decnet.topology.service_name": svc_name,
|
||||
"decnet.topology.service": "true",
|
||||
})
|
||||
fragment["labels"] = labels
|
||||
services[f"{name}-{svc_name}"] = fragment
|
||||
|
||||
networks: dict[str, dict] = {
|
||||
_network_name(topology_id, lan["name"]): {
|
||||
"external": True,
|
||||
"name": _network_name(topology_id, lan["name"]),
|
||||
}
|
||||
for lan in lans
|
||||
}
|
||||
|
||||
return {
|
||||
"version": "3.8",
|
||||
"services": services,
|
||||
"networks": networks,
|
||||
}
|
||||
|
||||
|
||||
def write_topology_compose(hydrated: dict[str, Any], output_path: Path) -> Path:
|
||||
"""Write the compose dict for a hydrated topology and return the path."""
|
||||
data = generate_topology_compose(hydrated)
|
||||
output_path.write_text(
|
||||
yaml.dump(data, default_flow_style=False, sort_keys=False)
|
||||
)
|
||||
return output_path
|
||||
113
decnet/topology/config.py
Normal file
113
decnet/topology/config.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""MazeNET topology config + in-memory generation output."""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
|
||||
class TopologyConfig(BaseModel):
|
||||
"""Parameters driving :func:`decnet.topology.generator.generate`."""
|
||||
|
||||
name: str = Field(..., min_length=1, max_length=64)
|
||||
mode: str = Field(default="unihost", pattern=r"^(unihost|agent)$")
|
||||
|
||||
# Topology shape
|
||||
depth: int = Field(..., ge=1, le=16, description="Max depth from DMZ")
|
||||
branching_factor: int = Field(..., ge=1, le=8, description="Max child LANs per LAN")
|
||||
deckies_per_lan_min: int = Field(default=1, ge=0, le=32)
|
||||
deckies_per_lan_max: int = Field(default=3, ge=1, le=32)
|
||||
|
||||
# Probability a given non-DMZ LAN's connection to its parent uses a
|
||||
# bridge decky that forwards L3 (enables attacker pivot). Bridge
|
||||
# existence between parent/child is implicit — every non-DMZ LAN
|
||||
# has exactly one parent bridge. This controls *forwarding*, not
|
||||
# the existence of the bridge.
|
||||
bridge_forward_probability: float = Field(default=1.0, ge=0.0, le=1.0)
|
||||
|
||||
# Probability of injecting a DAG cross-edge: a decky also bridged
|
||||
# from its LAN to a non-parent, non-child LAN. 0.0 yields a tree.
|
||||
cross_edge_probability: float = Field(default=0.0, ge=0.0, le=1.0)
|
||||
|
||||
# IP allocation base. LANs get sequential /24s carved out of this
|
||||
# network. Accepts either a full CIDR (e.g. ``172.16.0.0/12`` for
|
||||
# 4096 slots) or the legacy two-octet shorthand ``172.20`` which
|
||||
# auto-lifts to ``172.20.0.0/16`` (256 slots). Default is a /12
|
||||
# so mass-scale topologies (depth/branching trees with >256 LANs)
|
||||
# don't exhaust the pool on first generation.
|
||||
subnet_base_prefix: str = Field(
|
||||
default="172.16.0.0/12",
|
||||
pattern=r"^\d{1,3}\.\d{1,3}(\.\d{1,3}\.\d{1,3}/\d{1,2})?$",
|
||||
)
|
||||
|
||||
# Service selection — reuses decnet.fleet.build_deckies' randomizer.
|
||||
randomize_services: bool = Field(default=True)
|
||||
services_explicit: Optional[list[str]] = None
|
||||
|
||||
seed: Optional[int] = Field(default=None, ge=0)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _check_min_max(self) -> "TopologyConfig":
|
||||
if self.deckies_per_lan_min > self.deckies_per_lan_max:
|
||||
raise ValueError(
|
||||
"deckies_per_lan_min must be <= deckies_per_lan_max"
|
||||
)
|
||||
if not self.randomize_services and not self.services_explicit:
|
||||
raise ValueError(
|
||||
"either randomize_services=True or services_explicit must be set"
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class _PlannedLAN:
|
||||
"""In-memory LAN record emitted by the generator."""
|
||||
name: str
|
||||
subnet: str
|
||||
is_dmz: bool
|
||||
parent: Optional[str] # name of parent LAN, None for DMZ
|
||||
# Canvas coordinates — generator leaves them None; the web editor
|
||||
# (or a future auto-layouter) fills them in.
|
||||
x: Optional[float] = None
|
||||
y: Optional[float] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class _PlannedDecky:
|
||||
"""In-memory decky record emitted by the generator."""
|
||||
name: str
|
||||
services: list[str]
|
||||
# Mapping LAN-name → assigned IP within that LAN's subnet.
|
||||
ips_by_lan: dict[str, str] = field(default_factory=dict)
|
||||
forwards_l3: bool = False # only meaningful when present on ≥2 LANs
|
||||
# Per-service config overrides: {service_name: {field: value}}.
|
||||
# Mirrors ``DeckyConfig.service_config`` from the flat-fleet path;
|
||||
# services read these via ``compose_fragment(service_cfg=...)``.
|
||||
service_config: dict[str, dict] = field(default_factory=dict)
|
||||
# Canvas coordinates — see _PlannedLAN.x/y.
|
||||
x: Optional[float] = None
|
||||
y: Optional[float] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class _PlannedEdge:
|
||||
"""In-memory (decky, LAN) membership edge."""
|
||||
decky_name: str
|
||||
lan_name: str
|
||||
is_bridge: bool
|
||||
forwards_l3: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class GeneratedTopology:
|
||||
"""Full in-memory output of :func:`decnet.topology.generator.generate`.
|
||||
|
||||
Names are unique within the topology. No UUIDs are assigned here —
|
||||
those are minted by :mod:`decnet.topology.persistence` when the
|
||||
topology is written to the repo.
|
||||
"""
|
||||
config: TopologyConfig
|
||||
lans: list[_PlannedLAN]
|
||||
deckies: list[_PlannedDecky]
|
||||
edges: list[_PlannedEdge]
|
||||
237
decnet/topology/generator.py
Normal file
237
decnet/topology/generator.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""MazeNET topology generator.
|
||||
|
||||
Produces a :class:`GeneratedTopology` — an in-memory DAG of LANs and
|
||||
multi-homed deckies. Deterministic under ``config.seed``: the same seed
|
||||
always yields the same structure, service assignments, and IP layout.
|
||||
|
||||
The generator only plans the structure. Persisting UUIDs to the repo
|
||||
is :mod:`decnet.topology.persistence`; spawning Docker networks and
|
||||
containers is :mod:`decnet.engine.deployer`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
from typing import Optional
|
||||
|
||||
from decnet.fleet import all_service_names
|
||||
from decnet.topology.allocator import IPAllocator, SubnetAllocator
|
||||
from decnet.topology.config import (
|
||||
GeneratedTopology,
|
||||
TopologyConfig,
|
||||
_PlannedDecky,
|
||||
_PlannedEdge,
|
||||
_PlannedLAN,
|
||||
)
|
||||
|
||||
# Range of services per randomly assigned decky (matches decnet.fleet).
|
||||
_SVC_MIN = 1
|
||||
_SVC_MAX = 3
|
||||
|
||||
|
||||
def _plan_lans(
|
||||
config: TopologyConfig,
|
||||
rng: random.Random,
|
||||
subnets: SubnetAllocator,
|
||||
) -> list[_PlannedLAN]:
|
||||
"""Plan LANs as a tree of depth ``config.depth``.
|
||||
|
||||
Each non-leaf level adds [1, branching_factor] children per parent.
|
||||
LAN names and subnets are assigned in BFS order; subnets come from
|
||||
``subnets``, which the caller may have pre-seeded with reservations
|
||||
from other topologies.
|
||||
"""
|
||||
lans: list[_PlannedLAN] = []
|
||||
|
||||
# DMZ root.
|
||||
lans.append(
|
||||
_PlannedLAN(
|
||||
name="LAN-00", subnet=subnets.next_free(), is_dmz=True, parent=None
|
||||
)
|
||||
)
|
||||
frontier: list[_PlannedLAN] = [lans[0]]
|
||||
|
||||
for _level in range(1, config.depth + 1):
|
||||
next_frontier: list[_PlannedLAN] = []
|
||||
for parent in frontier:
|
||||
n_children = rng.randint(1, config.branching_factor) # nosec B311
|
||||
for _ in range(n_children):
|
||||
idx = len(lans)
|
||||
child = _PlannedLAN(
|
||||
name=f"LAN-{idx:02d}",
|
||||
subnet=subnets.next_free(),
|
||||
is_dmz=False,
|
||||
parent=parent.name,
|
||||
)
|
||||
lans.append(child)
|
||||
next_frontier.append(child)
|
||||
frontier = next_frontier
|
||||
if not frontier:
|
||||
break
|
||||
return lans
|
||||
|
||||
|
||||
def _pick_services(
|
||||
rng: random.Random,
|
||||
services_explicit: Optional[list[str]],
|
||||
pool: list[str],
|
||||
used_combos: set[frozenset],
|
||||
) -> list[str]:
|
||||
if services_explicit:
|
||||
return list(services_explicit)
|
||||
if not pool:
|
||||
return []
|
||||
attempts = 0
|
||||
while True:
|
||||
count = rng.randint(_SVC_MIN, min(_SVC_MAX, len(pool))) # nosec B311
|
||||
chosen = frozenset(rng.sample(pool, count)) # nosec B311
|
||||
attempts += 1
|
||||
if chosen not in used_combos or attempts > 20:
|
||||
break
|
||||
used_combos.add(chosen)
|
||||
return list(chosen)
|
||||
|
||||
|
||||
def generate(
|
||||
config: TopologyConfig,
|
||||
*,
|
||||
reserved_subnets: Optional[set[str]] = None,
|
||||
) -> GeneratedTopology:
|
||||
"""Generate a topology plan deterministically under ``config.seed``.
|
||||
|
||||
The caller is responsible for persisting the plan via
|
||||
:mod:`decnet.topology.persistence` and then deploying it.
|
||||
|
||||
``reserved_subnets`` (optional): /24s already claimed by other
|
||||
topologies. The subnet allocator skips these so two concurrent
|
||||
drafts can't collide. Populate via
|
||||
:func:`decnet.topology.allocator.reserved_subnets`.
|
||||
"""
|
||||
rng = random.Random(config.seed) # nosec B311
|
||||
svc_pool = all_service_names() if config.randomize_services else []
|
||||
used_combos: set[frozenset] = set()
|
||||
|
||||
subnets = SubnetAllocator(
|
||||
config.subnet_base_prefix, reserved=reserved_subnets or set()
|
||||
)
|
||||
lans = _plan_lans(config, rng, subnets)
|
||||
lans_by_name = {lan.name: lan for lan in lans}
|
||||
|
||||
# Per-LAN IP allocators for deterministic assignment.
|
||||
ip_allocs: dict[str, IPAllocator] = {
|
||||
lan.name: IPAllocator(lan.subnet) for lan in lans
|
||||
}
|
||||
|
||||
def _take_ip(lan_name: str) -> str:
|
||||
return ip_allocs[lan_name].next_free()
|
||||
|
||||
deckies: list[_PlannedDecky] = []
|
||||
edges: list[_PlannedEdge] = []
|
||||
decky_counter = 0
|
||||
|
||||
def _new_decky(home_lan: str) -> _PlannedDecky:
|
||||
nonlocal decky_counter
|
||||
decky_counter += 1
|
||||
name = f"decky-{decky_counter:03d}"
|
||||
services = _pick_services(
|
||||
rng, config.services_explicit, svc_pool, used_combos
|
||||
)
|
||||
decky = _PlannedDecky(
|
||||
name=name,
|
||||
services=services,
|
||||
ips_by_lan={home_lan: _take_ip(home_lan)},
|
||||
)
|
||||
deckies.append(decky)
|
||||
return decky
|
||||
|
||||
# Populate each LAN with its own deckies.
|
||||
for lan in lans:
|
||||
if lan.is_dmz:
|
||||
count = 1 # single DMZ decky (deaddeck)
|
||||
else:
|
||||
count = rng.randint( # nosec B311
|
||||
config.deckies_per_lan_min, config.deckies_per_lan_max
|
||||
)
|
||||
if count < 1:
|
||||
count = 1 # every LAN needs ≥1 decky to host the bridge
|
||||
for _ in range(count):
|
||||
decky = _new_decky(lan.name)
|
||||
edges.append(
|
||||
_PlannedEdge(
|
||||
decky_name=decky.name,
|
||||
lan_name=lan.name,
|
||||
is_bridge=False,
|
||||
forwards_l3=False,
|
||||
)
|
||||
)
|
||||
|
||||
# Parent↔child bridges. For every non-DMZ LAN, pick one of its
|
||||
# deckies and multi-home it to the parent LAN. This decky becomes
|
||||
# the bridge between the two segments.
|
||||
deckies_by_lan: dict[str, list[_PlannedDecky]] = {lan.name: [] for lan in lans}
|
||||
for e in edges:
|
||||
deckies_by_lan[e.lan_name].append(
|
||||
next(d for d in deckies if d.name == e.decky_name)
|
||||
)
|
||||
|
||||
for lan in lans:
|
||||
if lan.is_dmz or lan.parent is None:
|
||||
continue
|
||||
candidates = deckies_by_lan[lan.name]
|
||||
bridge = rng.choice(candidates) # nosec B311
|
||||
bridge.ips_by_lan[lan.parent] = _take_ip(lan.parent)
|
||||
forwards = rng.random() < config.bridge_forward_probability # nosec B311
|
||||
bridge.forwards_l3 = bridge.forwards_l3 or forwards
|
||||
# Mark both existing edges as bridge edges for this decky, and
|
||||
# add a new edge connecting it to the parent LAN.
|
||||
for e in edges:
|
||||
if e.decky_name == bridge.name:
|
||||
e.is_bridge = True
|
||||
e.forwards_l3 = bridge.forwards_l3
|
||||
edges.append(
|
||||
_PlannedEdge(
|
||||
decky_name=bridge.name,
|
||||
lan_name=lan.parent,
|
||||
is_bridge=True,
|
||||
forwards_l3=bridge.forwards_l3,
|
||||
)
|
||||
)
|
||||
|
||||
# Cross-edges: with probability p, pick a non-parent, non-child,
|
||||
# non-self LAN and attach a random decky to it too. Turns the tree
|
||||
# into a DAG. Only rolls on non-DMZ LANs with ≥1 candidate peer.
|
||||
if config.cross_edge_probability > 0:
|
||||
for lan in lans:
|
||||
if lan.is_dmz:
|
||||
continue
|
||||
if rng.random() >= config.cross_edge_probability: # nosec B311
|
||||
continue
|
||||
forbidden = {lan.name, lan.parent}
|
||||
forbidden |= {c.name for c in lans if c.parent == lan.name}
|
||||
peers = [p for p in lans if p.name not in forbidden]
|
||||
if not peers:
|
||||
continue
|
||||
peer = rng.choice(peers) # nosec B311
|
||||
decky = rng.choice(deckies_by_lan[lan.name]) # nosec B311
|
||||
if peer.name in decky.ips_by_lan:
|
||||
continue # already connected, skip
|
||||
decky.ips_by_lan[peer.name] = _take_ip(peer.name)
|
||||
forwards = rng.random() < config.bridge_forward_probability # nosec B311
|
||||
decky.forwards_l3 = decky.forwards_l3 or forwards
|
||||
for e in edges:
|
||||
if e.decky_name == decky.name:
|
||||
e.is_bridge = True
|
||||
e.forwards_l3 = decky.forwards_l3
|
||||
edges.append(
|
||||
_PlannedEdge(
|
||||
decky_name=decky.name,
|
||||
lan_name=peer.name,
|
||||
is_bridge=True,
|
||||
forwards_l3=decky.forwards_l3,
|
||||
)
|
||||
)
|
||||
|
||||
del lans_by_name # intermediate lookup, drop before returning
|
||||
|
||||
return GeneratedTopology(
|
||||
config=config, lans=lans, deckies=deckies, edges=edges
|
||||
)
|
||||
65
decnet/topology/hashing.py
Normal file
65
decnet/topology/hashing.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""Canonical hash of a hydrated topology dict.
|
||||
|
||||
Both master and agent need to agree on "is the applied state the one
|
||||
the master intends?". We answer that by hashing the hydrated topology
|
||||
blob on both sides and comparing the hex digests. The function has to
|
||||
be **pure** and **deterministic**: same logical state → same hash, no
|
||||
matter the dict-key order, no matter the timezone of a ``created_at``.
|
||||
|
||||
Normalisation rules (applied to a deep copy — input is never mutated):
|
||||
|
||||
- Drop fields that change on every read but don't change behaviour:
|
||||
``created_at``, ``status_changed_at``, ``updated_at``, ``last_seen``,
|
||||
``status``, ``version``, ``last_error``.
|
||||
- Drop purely-cosmetic canvas positions (``x``, ``y``, ``w``, ``h``)
|
||||
everywhere — they're client-side layout, not deployment state.
|
||||
- Leave everything else alone; sort-keys=True + ``separators``
|
||||
collapse whitespace and fix ordering.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
# Fields that vary over time or come from layout and must NOT feed the
|
||||
# applied-state hash. Dropped at every nesting level.
|
||||
_VOLATILE_KEYS = frozenset(
|
||||
{
|
||||
"created_at",
|
||||
"status_changed_at",
|
||||
"updated_at",
|
||||
"last_seen",
|
||||
"status",
|
||||
"version",
|
||||
"last_error",
|
||||
"x",
|
||||
"y",
|
||||
"w",
|
||||
"h",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _strip(value: Any) -> Any:
|
||||
"""Return a deep copy of *value* with volatile keys removed."""
|
||||
if isinstance(value, dict):
|
||||
return {k: _strip(v) for k, v in value.items() if k not in _VOLATILE_KEYS}
|
||||
if isinstance(value, list):
|
||||
return [_strip(v) for v in value]
|
||||
return value
|
||||
|
||||
|
||||
def canonical_hash(hydrated: dict) -> str:
|
||||
"""Return the SHA-256 hex digest of *hydrated*'s canonical form."""
|
||||
normalised = _strip(hydrated)
|
||||
blob = json.dumps(
|
||||
normalised,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=str,
|
||||
).encode("utf-8")
|
||||
return hashlib.sha256(blob).hexdigest()
|
||||
|
||||
|
||||
__all__ = ["canonical_hash"]
|
||||
218
decnet/topology/persistence.py
Normal file
218
decnet/topology/persistence.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""Adapter between :class:`GeneratedTopology` and the repository layer."""
|
||||
from __future__ import annotations
|
||||
|
||||
from ipaddress import IPv4Address, IPv4Network
|
||||
from typing import Any
|
||||
|
||||
from decnet.topology.allocator import IPAllocator
|
||||
from decnet.topology.config import GeneratedTopology
|
||||
from decnet.topology.status import TopologyStatus, assert_transition
|
||||
|
||||
|
||||
async def persist(
|
||||
repo: Any,
|
||||
plan: GeneratedTopology,
|
||||
*,
|
||||
target_host_uuid: str | None = None,
|
||||
) -> str:
|
||||
"""Write a generated plan to the repo as a ``pending`` topology.
|
||||
|
||||
Returns the newly created topology id. All child rows are written
|
||||
atomically relative to each other (SQLite transactions are per-call
|
||||
here; the repo methods each commit — good enough for initial create
|
||||
since the whole chain is invoked before any external side effects).
|
||||
|
||||
``target_host_uuid`` — pin the topology to a specific swarm agent.
|
||||
Only meaningful when ``plan.config.mode == "agent"`` (caller
|
||||
validates; this function just stores what it's told).
|
||||
"""
|
||||
topology_id = await repo.create_topology(
|
||||
{
|
||||
"name": plan.config.name,
|
||||
"mode": plan.config.mode,
|
||||
"target_host_uuid": target_host_uuid,
|
||||
"config_snapshot": plan.config.model_dump(),
|
||||
}
|
||||
)
|
||||
|
||||
lan_ids: dict[str, str] = {}
|
||||
for lan in plan.lans:
|
||||
lan_id = await repo.add_lan(
|
||||
{
|
||||
"topology_id": topology_id,
|
||||
"name": lan.name,
|
||||
"subnet": lan.subnet,
|
||||
"is_dmz": lan.is_dmz,
|
||||
"x": lan.x,
|
||||
"y": lan.y,
|
||||
}
|
||||
)
|
||||
lan_ids[lan.name] = lan_id
|
||||
|
||||
decky_ids: dict[str, str] = {}
|
||||
for decky in plan.deckies:
|
||||
# Primary IP: the first LAN the decky was assigned to (insertion
|
||||
# order of ips_by_lan, which reflects generator ordering —
|
||||
# home LAN first, then any bridge targets).
|
||||
primary_lan = next(iter(decky.ips_by_lan))
|
||||
primary_ip = decky.ips_by_lan[primary_lan]
|
||||
decky_uuid = await repo.add_topology_decky(
|
||||
{
|
||||
"topology_id": topology_id,
|
||||
"name": decky.name,
|
||||
"services": decky.services,
|
||||
"decky_config": {
|
||||
"name": decky.name,
|
||||
"services": decky.services,
|
||||
"ips_by_lan": decky.ips_by_lan,
|
||||
"forwards_l3": decky.forwards_l3,
|
||||
"service_config": decky.service_config,
|
||||
},
|
||||
"ip": primary_ip,
|
||||
"x": decky.x,
|
||||
"y": decky.y,
|
||||
}
|
||||
)
|
||||
decky_ids[decky.name] = decky_uuid
|
||||
|
||||
for edge in plan.edges:
|
||||
await repo.add_topology_edge(
|
||||
{
|
||||
"topology_id": topology_id,
|
||||
"decky_uuid": decky_ids[edge.decky_name],
|
||||
"lan_id": lan_ids[edge.lan_name],
|
||||
"is_bridge": edge.is_bridge,
|
||||
"forwards_l3": edge.forwards_l3,
|
||||
}
|
||||
)
|
||||
|
||||
return topology_id
|
||||
|
||||
|
||||
async def transition_status(
|
||||
repo: Any,
|
||||
topology_id: str,
|
||||
new_status: str,
|
||||
reason: str | None = None,
|
||||
) -> None:
|
||||
"""Legal-only status transition.
|
||||
|
||||
Raises :class:`decnet.topology.status.TopologyStatusError` if the
|
||||
current status cannot legally transition to ``new_status``.
|
||||
"""
|
||||
topo = await repo.get_topology(topology_id)
|
||||
if topo is None:
|
||||
raise ValueError(f"topology {topology_id!r} not found")
|
||||
assert_transition(topo["status"], new_status)
|
||||
await repo.update_topology_status(topology_id, new_status, reason=reason)
|
||||
|
||||
|
||||
async def hydrate(repo: Any, topology_id: str) -> dict[str, Any] | None:
|
||||
"""Load a topology + children into a single dict for callers.
|
||||
|
||||
Shape::
|
||||
|
||||
{
|
||||
"topology": { ...row... },
|
||||
"lans": [ {...}, ... ],
|
||||
"deckies": [ {...}, ... ],
|
||||
"edges": [ {...}, ... ],
|
||||
}
|
||||
|
||||
Returns ``None`` if the topology does not exist.
|
||||
"""
|
||||
topo = await repo.get_topology(topology_id)
|
||||
if topo is None:
|
||||
return None
|
||||
lans = await repo.list_lans_for_topology(topology_id)
|
||||
deckies = await repo.list_topology_deckies(topology_id)
|
||||
edges = await repo.list_topology_edges(topology_id)
|
||||
_backfill_decky_configs(lans, deckies, edges)
|
||||
return {
|
||||
"topology": topo,
|
||||
"lans": lans,
|
||||
"deckies": deckies,
|
||||
"edges": edges,
|
||||
}
|
||||
|
||||
|
||||
def _backfill_decky_configs(
|
||||
lans: list[dict[str, Any]],
|
||||
deckies: list[dict[str, Any]],
|
||||
edges: list[dict[str, Any]],
|
||||
) -> None:
|
||||
"""Fill in ``decky_config['name']`` and ``ips_by_lan`` for UI-created rows.
|
||||
|
||||
The generator path writes these fields at persist-time; the REST
|
||||
CRUD path writes whatever the client sends (often just archetype
|
||||
flags). Compose generation requires both, so we normalise here so
|
||||
every write path feeds the same shape downstream.
|
||||
"""
|
||||
lans_by_id = {lan["id"]: lan for lan in lans}
|
||||
allocators: dict[str, IPAllocator] = {}
|
||||
|
||||
def _alloc(lan_id: str) -> IPAllocator | None:
|
||||
lan = lans_by_id.get(lan_id)
|
||||
if lan is None or not lan.get("subnet"):
|
||||
return None
|
||||
if lan_id not in allocators:
|
||||
allocators[lan_id] = IPAllocator(lan["subnet"])
|
||||
return allocators[lan_id]
|
||||
|
||||
decky_edges: dict[str, list[str]] = {}
|
||||
for e in edges:
|
||||
decky_edges.setdefault(e["decky_uuid"], []).append(e["lan_id"])
|
||||
|
||||
ordered = sorted(deckies, key=lambda d: (d.get("name", ""), d["uuid"]))
|
||||
|
||||
# Pass 1: reserve IPs already declared in decky_config.
|
||||
for decky in ordered:
|
||||
cfg = decky.get("decky_config") or {}
|
||||
existing = cfg.get("ips_by_lan") or {}
|
||||
for lan_id in decky_edges.get(decky["uuid"], []):
|
||||
lan = lans_by_id.get(lan_id)
|
||||
if lan is None:
|
||||
continue
|
||||
alloc = _alloc(lan_id)
|
||||
if alloc is None:
|
||||
continue
|
||||
ip = existing.get(lan["name"])
|
||||
if ip and alloc.is_free(ip):
|
||||
alloc.reserve(ip)
|
||||
|
||||
# Pass 2: fill gaps; rewrite decky_config.
|
||||
for decky in ordered:
|
||||
cfg = dict(decky.get("decky_config") or {})
|
||||
cfg.setdefault("name", decky.get("name"))
|
||||
ips_by_lan: dict[str, str] = dict(cfg.get("ips_by_lan") or {})
|
||||
primary_ip = decky.get("ip")
|
||||
for lan_id in decky_edges.get(decky["uuid"], []):
|
||||
lan = lans_by_id.get(lan_id)
|
||||
if lan is None:
|
||||
continue
|
||||
if lan["name"] in ips_by_lan:
|
||||
continue
|
||||
alloc = _alloc(lan_id)
|
||||
if alloc is None:
|
||||
continue
|
||||
ip: str | None = None
|
||||
if primary_ip:
|
||||
try:
|
||||
if (
|
||||
IPv4Address(primary_ip) in IPv4Network(lan["subnet"])
|
||||
and alloc.is_free(primary_ip)
|
||||
):
|
||||
ip = primary_ip
|
||||
alloc.reserve(ip)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if ip is None:
|
||||
ip = alloc.next_free()
|
||||
ips_by_lan[lan["name"]] = ip
|
||||
cfg["ips_by_lan"] = ips_by_lan
|
||||
decky["decky_config"] = cfg
|
||||
|
||||
|
||||
# Re-export the status constants so callers can ``from decnet.topology.persistence
|
||||
# import TopologyStatus`` without chasing modules.
|
||||
__all__ = ["persist", "transition_status", "hydrate", "TopologyStatus"]
|
||||
106
decnet/topology/status.py
Normal file
106
decnet/topology/status.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""MazeNET topology status state machine.
|
||||
|
||||
Seven states — six active in v1. ``degraded`` is schema-reserved for the
|
||||
future Healer worker and has no transitions into it from v1 code paths.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class TopologyStatus:
|
||||
PENDING = "pending"
|
||||
DEPLOYING = "deploying"
|
||||
ACTIVE = "active"
|
||||
DEGRADED = "degraded"
|
||||
FAILED = "failed"
|
||||
TEARING_DOWN = "tearing_down"
|
||||
TORN_DOWN = "torn_down"
|
||||
|
||||
ALL: frozenset[str] = frozenset(
|
||||
{PENDING, DEPLOYING, ACTIVE, DEGRADED, FAILED, TEARING_DOWN, TORN_DOWN}
|
||||
)
|
||||
|
||||
|
||||
# Directed transitions. torn_down is terminal. degraded is unreachable
|
||||
# in v1 (Healer would be the only writer), but its outbound edges stay
|
||||
# defined so when Healer lands the state machine already accepts them.
|
||||
_LEGAL: dict[str, frozenset[str]] = {
|
||||
TopologyStatus.PENDING: frozenset(
|
||||
{TopologyStatus.DEPLOYING, TopologyStatus.TORN_DOWN}
|
||||
),
|
||||
TopologyStatus.DEPLOYING: frozenset(
|
||||
{
|
||||
TopologyStatus.ACTIVE,
|
||||
TopologyStatus.FAILED,
|
||||
TopologyStatus.DEGRADED,
|
||||
TopologyStatus.TEARING_DOWN,
|
||||
}
|
||||
),
|
||||
TopologyStatus.ACTIVE: frozenset(
|
||||
{TopologyStatus.DEGRADED, TopologyStatus.TEARING_DOWN}
|
||||
),
|
||||
TopologyStatus.DEGRADED: frozenset(
|
||||
{TopologyStatus.ACTIVE, TopologyStatus.TEARING_DOWN}
|
||||
),
|
||||
TopologyStatus.FAILED: frozenset({TopologyStatus.TEARING_DOWN}),
|
||||
TopologyStatus.TEARING_DOWN: frozenset(
|
||||
{TopologyStatus.TORN_DOWN, TopologyStatus.DEGRADED}
|
||||
),
|
||||
TopologyStatus.TORN_DOWN: frozenset(),
|
||||
}
|
||||
|
||||
|
||||
class TopologyStatusError(ValueError):
|
||||
"""Raised when an illegal topology status transition is attempted."""
|
||||
|
||||
|
||||
class TopologyNotEditable(RuntimeError):
|
||||
"""Raised when a pending-only mutation hits a non-pending topology.
|
||||
|
||||
Pre-deploy edits (update_lan, delete_lan, update/delete decky,
|
||||
delete_edge) are only legal while the topology is ``pending``.
|
||||
After deploy the mutator's reconciler + topology_mutations table
|
||||
take over.
|
||||
"""
|
||||
|
||||
def __init__(self, *, status: str, reason: str = "") -> None:
|
||||
self.status = status
|
||||
self.reason = reason
|
||||
super().__init__(
|
||||
f"topology not editable (status={status!r})"
|
||||
+ (f": {reason}" if reason else "")
|
||||
)
|
||||
|
||||
|
||||
class VersionConflict(RuntimeError):
|
||||
"""Raised when a topology write is supplied a stale ``expected_version``.
|
||||
|
||||
Optimistic concurrency guard: the caller passed the version it last
|
||||
observed, and the topology has since been mutated by someone else.
|
||||
The caller should re-read and retry.
|
||||
"""
|
||||
|
||||
def __init__(self, *, current: int, expected: int) -> None:
|
||||
self.current = current
|
||||
self.expected = expected
|
||||
super().__init__(
|
||||
f"topology version conflict: expected {expected}, current is {current}"
|
||||
)
|
||||
|
||||
|
||||
def assert_transition(current: str, new: str) -> None:
|
||||
"""Validate ``current → new`` or raise :class:`TopologyStatusError`."""
|
||||
if current not in TopologyStatus.ALL:
|
||||
raise TopologyStatusError(f"unknown current status: {current!r}")
|
||||
if new not in TopologyStatus.ALL:
|
||||
raise TopologyStatusError(f"unknown new status: {new!r}")
|
||||
if new not in _LEGAL[current]:
|
||||
raise TopologyStatusError(
|
||||
f"illegal transition: {current!r} → {new!r}"
|
||||
)
|
||||
|
||||
|
||||
def legal_next(current: str) -> frozenset[str]:
|
||||
"""Return the set of legal successor statuses from ``current``."""
|
||||
if current not in _LEGAL:
|
||||
raise TopologyStatusError(f"unknown status: {current!r}")
|
||||
return _LEGAL[current]
|
||||
356
decnet/topology/validate.py
Normal file
356
decnet/topology/validate.py
Normal file
@@ -0,0 +1,356 @@
|
||||
"""Pre-deploy validator for MazeNET topologies.
|
||||
|
||||
Consumes a hydrated dict (output of
|
||||
:func:`decnet.topology.persistence.hydrate`) and returns a list of
|
||||
:class:`ValidationIssue` records. The deployer calls :func:`validate`
|
||||
before transitioning to ``DEPLOYING`` and refuses to proceed if any
|
||||
issue has ``severity=="error"``.
|
||||
|
||||
Rules are independent functions so the web editor can surface them as
|
||||
inline diagnostics without running the full list.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from ipaddress import IPv4Address, IPv4Network
|
||||
from typing import Any, Callable, Literal
|
||||
|
||||
from decnet.fleet import all_service_names
|
||||
from decnet.services.registry import get_service
|
||||
|
||||
Severity = Literal["error", "warning"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationIssue:
|
||||
severity: Severity
|
||||
code: str
|
||||
message: str
|
||||
target: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
class ValidationError(Exception):
|
||||
"""Raised by the deployer when a topology fails pre-deploy checks."""
|
||||
|
||||
def __init__(self, issues: list[ValidationIssue]) -> None:
|
||||
self.issues = issues
|
||||
errors = [i for i in issues if i.severity == "error"]
|
||||
super().__init__(
|
||||
f"{len(errors)} topology validation error(s): "
|
||||
+ "; ".join(f"[{i.code}] {i.message}" for i in errors)
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- rules
|
||||
|
||||
|
||||
def check_exactly_one_dmz(h: dict[str, Any]) -> list[ValidationIssue]:
|
||||
dmzs = [lan for lan in h["lans"] if lan.get("is_dmz")]
|
||||
if len(dmzs) == 1:
|
||||
return []
|
||||
if not dmzs:
|
||||
return [
|
||||
ValidationIssue("error", "DMZ_MISSING", "no LAN is marked is_dmz=True")
|
||||
]
|
||||
return [
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"DMZ_MULTIPLE",
|
||||
f"{len(dmzs)} LANs marked is_dmz=True; exactly one allowed",
|
||||
target={"lans": [lan["name"] for lan in dmzs]},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def check_all_lans_connected_to_dmz(
|
||||
h: dict[str, Any],
|
||||
) -> list[ValidationIssue]:
|
||||
lans = {lan["id"]: lan for lan in h["lans"]}
|
||||
if not lans:
|
||||
return []
|
||||
dmz = next((lan for lan in h["lans"] if lan.get("is_dmz")), None)
|
||||
if dmz is None:
|
||||
return [] # covered by check_exactly_one_dmz
|
||||
|
||||
# Adjacency: LANs share an edge if ≥1 bridge decky is attached to both.
|
||||
decky_lans: dict[str, set[str]] = {}
|
||||
for edge in h["edges"]:
|
||||
decky_lans.setdefault(edge["decky_uuid"], set()).add(edge["lan_id"])
|
||||
|
||||
adj: dict[str, set[str]] = {lid: set() for lid in lans}
|
||||
for lan_ids in decky_lans.values():
|
||||
if len(lan_ids) < 2:
|
||||
continue
|
||||
for a in lan_ids:
|
||||
for b in lan_ids:
|
||||
if a != b:
|
||||
adj[a].add(b)
|
||||
|
||||
reachable = {dmz["id"]}
|
||||
frontier = [dmz["id"]]
|
||||
while frontier:
|
||||
nxt: list[str] = []
|
||||
for lid in frontier:
|
||||
for peer in adj[lid]:
|
||||
if peer not in reachable:
|
||||
reachable.add(peer)
|
||||
nxt.append(peer)
|
||||
frontier = nxt
|
||||
|
||||
orphans = [lans[lid]["name"] for lid in lans if lid not in reachable]
|
||||
if not orphans:
|
||||
return []
|
||||
return [
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"DMZ_ORPHAN",
|
||||
f"LAN(s) have no bridge path to the DMZ: {', '.join(orphans)}",
|
||||
target={"lans": orphans},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def check_no_orphan_deckies(h: dict[str, Any]) -> list[ValidationIssue]:
|
||||
attached: set[str] = {e["decky_uuid"] for e in h["edges"]}
|
||||
issues: list[ValidationIssue] = []
|
||||
for d in h["deckies"]:
|
||||
if d["uuid"] not in attached:
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"DECKY_ORPHAN",
|
||||
f"decky {d['name']!r} has no LAN edges",
|
||||
target={"decky": d["name"]},
|
||||
)
|
||||
)
|
||||
return issues
|
||||
|
||||
|
||||
def check_names_unique(h: dict[str, Any]) -> list[ValidationIssue]:
|
||||
issues: list[ValidationIssue] = []
|
||||
seen_lan: set[str] = set()
|
||||
for lan in h["lans"]:
|
||||
if lan["name"] in seen_lan:
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"LAN_NAME_DUP",
|
||||
f"duplicate LAN name {lan['name']!r}",
|
||||
target={"lan": lan["name"]},
|
||||
)
|
||||
)
|
||||
seen_lan.add(lan["name"])
|
||||
seen_decky: set[str] = set()
|
||||
for d in h["deckies"]:
|
||||
if d["name"] in seen_decky:
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"DECKY_NAME_DUP",
|
||||
f"duplicate decky name {d['name']!r}",
|
||||
target={"decky": d["name"]},
|
||||
)
|
||||
)
|
||||
seen_decky.add(d["name"])
|
||||
return issues
|
||||
|
||||
|
||||
def check_no_ip_collisions(h: dict[str, Any]) -> list[ValidationIssue]:
|
||||
lans_by_name = {lan["name"]: lan for lan in h["lans"]}
|
||||
per_lan_ips: dict[str, dict[str, str]] = {} # lan_name → {ip: decky_name}
|
||||
issues: list[ValidationIssue] = []
|
||||
for d in h["deckies"]:
|
||||
ips_by_lan: dict[str, str] = (d.get("decky_config") or {}).get(
|
||||
"ips_by_lan", {}
|
||||
)
|
||||
for lan_name, ip in ips_by_lan.items():
|
||||
lan = lans_by_name.get(lan_name)
|
||||
if lan is None:
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"IP_UNKNOWN_LAN",
|
||||
f"decky {d['name']!r} claims IP in unknown LAN "
|
||||
f"{lan_name!r}",
|
||||
target={"decky": d["name"], "lan": lan_name},
|
||||
)
|
||||
)
|
||||
continue
|
||||
# Out-of-subnet check.
|
||||
try:
|
||||
if IPv4Address(ip) not in IPv4Network(lan["subnet"]):
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"IP_OUT_OF_SUBNET",
|
||||
f"{ip} not inside {lan['subnet']} "
|
||||
f"(decky {d['name']!r}, LAN {lan_name!r})",
|
||||
target={"decky": d["name"], "lan": lan_name, "ip": ip},
|
||||
)
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"IP_MALFORMED",
|
||||
f"decky {d['name']!r}: malformed IP {ip!r}",
|
||||
target={"decky": d["name"], "ip": ip},
|
||||
)
|
||||
)
|
||||
continue
|
||||
bucket = per_lan_ips.setdefault(lan_name, {})
|
||||
if ip in bucket:
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"IP_COLLISION",
|
||||
f"IP {ip} claimed by both {bucket[ip]!r} and "
|
||||
f"{d['name']!r} in LAN {lan_name!r}",
|
||||
target={
|
||||
"lan": lan_name,
|
||||
"ip": ip,
|
||||
"deckies": [bucket[ip], d["name"]],
|
||||
},
|
||||
)
|
||||
)
|
||||
else:
|
||||
bucket[ip] = d["name"]
|
||||
return issues
|
||||
|
||||
|
||||
def check_no_subnet_overlap(h: dict[str, Any]) -> list[ValidationIssue]:
|
||||
nets: list[tuple[str, IPv4Network]] = []
|
||||
issues: list[ValidationIssue] = []
|
||||
for lan in h["lans"]:
|
||||
try:
|
||||
nets.append((lan["name"], IPv4Network(lan["subnet"])))
|
||||
except ValueError:
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"SUBNET_MALFORMED",
|
||||
f"LAN {lan['name']!r}: malformed subnet {lan['subnet']!r}",
|
||||
target={"lan": lan["name"]},
|
||||
)
|
||||
)
|
||||
for i, (na, a) in enumerate(nets):
|
||||
for nb, b in nets[i + 1 :]:
|
||||
if a.overlaps(b):
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"SUBNET_OVERLAP",
|
||||
f"LAN {na!r} ({a}) overlaps LAN {nb!r} ({b})",
|
||||
target={"lans": [na, nb]},
|
||||
)
|
||||
)
|
||||
return issues
|
||||
|
||||
|
||||
def check_services_known(h: dict[str, Any]) -> list[ValidationIssue]:
|
||||
known = set(all_service_names())
|
||||
issues: list[ValidationIssue] = []
|
||||
for d in h["deckies"]:
|
||||
for svc in d.get("services", []):
|
||||
if svc not in known:
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"UNKNOWN_SERVICE",
|
||||
f"decky {d['name']!r}: unknown service {svc!r}",
|
||||
target={"decky": d["name"], "service": svc},
|
||||
)
|
||||
)
|
||||
return issues
|
||||
|
||||
|
||||
def check_service_config_shape(h: dict[str, Any]) -> list[ValidationIssue]:
|
||||
issues: list[ValidationIssue] = []
|
||||
for d in h["deckies"]:
|
||||
svc_cfg = (d.get("decky_config") or {}).get("service_config") or {}
|
||||
declared = set(d.get("services", []))
|
||||
for svc_name in svc_cfg:
|
||||
if svc_name not in declared:
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"error",
|
||||
"SERVICE_CFG_UNDECLARED",
|
||||
f"decky {d['name']!r}: service_config for "
|
||||
f"{svc_name!r} but service not in services list",
|
||||
target={"decky": d["name"], "service": svc_name},
|
||||
)
|
||||
)
|
||||
return issues
|
||||
|
||||
|
||||
def check_no_host_port_collision(h: dict[str, Any]) -> list[ValidationIssue]:
|
||||
"""Flag gateway service ports that are already bound on the host.
|
||||
|
||||
Only gateway deckies (``forwards_l3=True`` in decky_config) publish
|
||||
ports (see decnet/topology/compose.py). Best-effort: if ``psutil``
|
||||
isn't importable or probing fails, returns no issues.
|
||||
"""
|
||||
wanted: dict[int, str] = {} # host_port → gateway decky name
|
||||
for d in h["deckies"]:
|
||||
cfg = d.get("decky_config") or {}
|
||||
if not cfg.get("forwards_l3"):
|
||||
continue
|
||||
for svc_name in d.get("services", []):
|
||||
svc = get_service(svc_name)
|
||||
if svc is None or getattr(svc, "fleet_singleton", False):
|
||||
continue
|
||||
for port in getattr(svc, "ports", []) or []:
|
||||
wanted.setdefault(int(port), d["name"])
|
||||
if not wanted:
|
||||
return []
|
||||
|
||||
try:
|
||||
import psutil # type: ignore
|
||||
bound = {
|
||||
c.laddr.port
|
||||
for c in psutil.net_connections(kind="inet")
|
||||
if c.status == psutil.CONN_LISTEN and c.laddr
|
||||
}
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
issues: list[ValidationIssue] = []
|
||||
for port, decky_name in wanted.items():
|
||||
if port in bound:
|
||||
issues.append(
|
||||
ValidationIssue(
|
||||
"warning",
|
||||
"PORT_COLLISION",
|
||||
f"host port {port} is already bound; "
|
||||
f"gateway {decky_name!r} may fail to publish it",
|
||||
target={"decky": decky_name, "port": port},
|
||||
)
|
||||
)
|
||||
return issues
|
||||
|
||||
|
||||
# Pure-data rules. Host-state rules (like PORT_COLLISION) are
|
||||
# *not* listed here — they're called separately by the live deployer
|
||||
# so that unit tests exercising validate() stay hermetic.
|
||||
_RULES: list[Callable[[dict[str, Any]], list[ValidationIssue]]] = [
|
||||
check_exactly_one_dmz,
|
||||
check_all_lans_connected_to_dmz,
|
||||
check_no_orphan_deckies,
|
||||
check_names_unique,
|
||||
check_no_ip_collisions,
|
||||
check_no_subnet_overlap,
|
||||
check_services_known,
|
||||
check_service_config_shape,
|
||||
]
|
||||
|
||||
|
||||
def validate(hydrated: dict[str, Any]) -> list[ValidationIssue]:
|
||||
"""Run every rule and return the flat list of issues (may be empty)."""
|
||||
out: list[ValidationIssue] = []
|
||||
for rule in _RULES:
|
||||
out.extend(rule(hydrated))
|
||||
return out
|
||||
|
||||
|
||||
def errors(issues: list[ValidationIssue]) -> list[ValidationIssue]:
|
||||
return [i for i in issues if i.severity == "error"]
|
||||
Reference in New Issue
Block a user