diff --git a/decnet/engine/deployer.py b/decnet/engine/deployer.py index c788158d..096d6a8f 100644 --- a/decnet/engine/deployer.py +++ b/decnet/engine/deployer.py @@ -17,16 +17,24 @@ from decnet.config import DecnetConfig, clear_state, load_state, save_state from decnet.composer import write_compose from decnet.network import ( MACVLAN_NETWORK_NAME, + create_bridge_network, create_ipvlan_network, create_macvlan_network, get_host_ip, ips_to_range, + remove_bridge_network, remove_macvlan_network, setup_host_ipvlan, setup_host_macvlan, teardown_host_ipvlan, teardown_host_macvlan, ) +from decnet.topology.compose import ( + _network_name as _topology_network_name, + write_topology_compose, +) +from decnet.topology.persistence import hydrate, transition_status +from decnet.topology.status import TopologyStatus log = get_logger("engine") console = Console() @@ -281,6 +289,106 @@ def status() -> None: console.print(table) +def _teardown_order(lans: list[dict]) -> list[str]: + """Return LAN names in leaf-first (DMZ-last) teardown order. + + The generator names LANs in BFS order (``LAN-00`` = DMZ root, + then children, then grandchildren), so reverse-name order is a + correct leaf-first topological sort for the tree. Cross-edges + are membership-only — they don't introduce parent/child + relationships, so the BFS numbering remains valid. + """ + return sorted((lan["name"] for lan in lans), reverse=True) + + +def _topology_compose_path(topology_id: str) -> Path: + return Path(f"decnet-topology-{topology_id[:8]}-compose.yml") + + +@_traced("engine.deploy_topology") +async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> None: + """Deploy a persisted MazeNET topology. + + Assumes ``repo`` has the topology in ``pending`` state. Creates one + Docker bridge network per LAN, writes a per-topology compose file, + and brings all deckies up. Marks ``active`` on success, ``failed`` + on exception (partial state left for later teardown). + """ + hydrated = await hydrate(repo, topology_id) + if hydrated is None: + raise ValueError(f"topology {topology_id!r} not found") + + await transition_status(repo, topology_id, TopologyStatus.DEPLOYING) + + client = docker.from_env() + lans = hydrated["lans"] + compose_path = _topology_compose_path(topology_id) + + try: + for lan in lans: + net_name = _topology_network_name(topology_id, lan["name"]) + # DMZ LAN is publicly routable; internal LANs are isolated + # from the host's default egress. + internal = not lan["is_dmz"] + create_bridge_network( + client, net_name, lan["subnet"], internal=internal + ) + write_topology_compose(hydrated, compose_path) + console.print( + f"[bold cyan]Topology compose file written[/] → {compose_path}" + ) + if dry_run: + log.info("topology %s dry-run complete", topology_id) + return + _compose_with_retry("up", "--build", "-d", compose_file=compose_path) + except Exception as exc: + log.error("topology %s deploy failed: %s", topology_id, exc) + await transition_status( + repo, topology_id, TopologyStatus.FAILED, reason=str(exc) + ) + raise + + await transition_status(repo, topology_id, TopologyStatus.ACTIVE) + log.info("topology %s deployed n_lans=%d", topology_id, len(lans)) + + +@_traced("engine.teardown_topology") +async def teardown_topology(repo, topology_id: str) -> None: + """Tear down a persisted MazeNET topology. + + Legal from ``active|degraded|failed|deploying``. Brings compose + down, removes each LAN's Docker bridge network in leaf-first order, + and marks ``torn_down``. + """ + hydrated = await hydrate(repo, topology_id) + if hydrated is None: + raise ValueError(f"topology {topology_id!r} not found") + + await transition_status(repo, topology_id, TopologyStatus.TEARING_DOWN) + + client = docker.from_env() + compose_path = _topology_compose_path(topology_id) + + if compose_path.exists(): + try: + _compose("down", "--remove-orphans", compose_file=compose_path) + except subprocess.CalledProcessError as exc: + log.warning( + "topology %s compose down failed (continuing): %s", + topology_id, exc, + ) + + for lan_name in _teardown_order(hydrated["lans"]): + net_name = _topology_network_name(topology_id, lan_name) + remove_bridge_network(client, net_name) + + if compose_path.exists(): + compose_path.unlink() + + await transition_status(repo, topology_id, TopologyStatus.TORN_DOWN) + log.info("topology %s torn down", topology_id) + + def _print_status(config: DecnetConfig) -> None: table = Table(title="Deployed Deckies", show_lines=True) table.add_column("Decky") diff --git a/decnet/network.py b/decnet/network.py index 17b05279..30f9659b 100644 --- a/decnet/network.py +++ b/decnet/network.py @@ -227,6 +227,60 @@ def remove_macvlan_network(client: docker.DockerClient) -> None: n.remove() +# --------------------------------------------------------------------------- +# Plain Docker bridge networks (MazeNET topologies — one per LAN) +# --------------------------------------------------------------------------- + +def create_bridge_network( + client: docker.DockerClient, + name: str, + subnet: str, + *, + internal: bool = False, +) -> str: + """Create (or reuse) a plain Docker bridge network and return its id. + + ``internal=True`` blocks outbound routing via the host — used for + non-DMZ MazeNET LANs so deckies can only reach what the bridge + deckies let them reach. + """ + for net in client.networks.list(names=[name]): + pools = (net.attrs.get("IPAM") or {}).get("Config") or [] + cur = pools[0] if pools else {} + if net.attrs.get("Driver") == "bridge" and cur.get("Subnet") == subnet: + return net.id + for cid in (net.attrs.get("Containers") or {}): + try: + net.disconnect(cid, force=True) + except docker.errors.APIError: + pass + net.remove() + + net = client.networks.create( + name=name, + driver="bridge", + internal=internal, + ipam=docker.types.IPAMConfig( + driver="default", + pool_configs=[docker.types.IPAMPool(subnet=subnet)], + ), + ) + return net.id + + +def remove_bridge_network(client: docker.DockerClient, name: str) -> None: + for net in client.networks.list(names=[name]): + for cid in (net.attrs.get("Containers") or {}): + try: + net.disconnect(cid, force=True) + except docker.errors.APIError: + pass + try: + net.remove() + except docker.errors.APIError: + pass + + # --------------------------------------------------------------------------- # Host-side macvlan interface (hairpin fix) # --------------------------------------------------------------------------- diff --git a/decnet/topology/compose.py b/decnet/topology/compose.py new file mode 100644 index 00000000..25b07285 --- /dev/null +++ b/decnet/topology/compose.py @@ -0,0 +1,130 @@ +"""Compose-file generator for a MazeNET topology. + +Produces a ``docker-compose.yml`` dict given a hydrated topology +(the output of :func:`decnet.topology.persistence.hydrate`). The +compose file references each LAN as an ``external: true`` network — +the deployer creates the Docker bridge networks via the SDK before +invoking ``docker compose up``. + +Layout: + * Each decky has a "base" container holding the LAN IPs. Multi-homed + (bridge) deckies list every LAN they belong to under ``networks`` + with the per-LAN ``ipv4_address``. + * Bridge deckies with ``forwards_l3=True`` get ``net.ipv4.ip_forward=1`` + baked in via compose ``sysctls`` plus ``NET_ADMIN`` in ``cap_add``. + * Service containers share the base namespace via + ``network_mode: service:``, matching the flat composer. +""" +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import yaml + +from decnet.services.registry import get_service + +_DEFAULT_BASE_IMAGE = "debian:bookworm-slim" + +_DOCKER_LOGGING = { + "driver": "json-file", + "options": {"max-size": "10m", "max-file": "5"}, +} + + +def _network_name(topology_id: str, lan_name: str) -> str: + """Docker network name for a given (topology, LAN) pair.""" + return f"decnet_t_{topology_id[:8]}_{lan_name.lower()}" + + +def _container_name(topology_id: str, decky_name: str) -> str: + """Container name for a decky base in a topology.""" + return f"decnet_t_{topology_id[:8]}_{decky_name}" + + +def generate_topology_compose(hydrated: dict[str, Any]) -> dict: + """Build the compose dict for a hydrated topology. + + ``hydrated`` is the shape returned by + :func:`decnet.topology.persistence.hydrate`. + """ + topology = hydrated["topology"] + topology_id = topology["id"] + lans = hydrated["lans"] + deckies = hydrated["deckies"] + + lan_by_name = {lan["name"]: lan for lan in lans} + + services: dict[str, dict] = {} + + for decky in deckies: + cfg = decky["decky_config"] + name = cfg["name"] + ips_by_lan: dict[str, str] = cfg["ips_by_lan"] + forwards_l3: bool = cfg.get("forwards_l3", False) + svc_names: list[str] = decky["services"] + + base_key = name + nets: dict[str, dict] = {} + for lan_name, ip in ips_by_lan.items(): + if lan_name not in lan_by_name: + raise ValueError( + f"decky {name!r} references unknown LAN {lan_name!r}" + ) + nets[_network_name(topology_id, lan_name)] = {"ipv4_address": ip} + + base: dict = { + "image": _DEFAULT_BASE_IMAGE, + "container_name": _container_name(topology_id, name), + "hostname": name, + "command": ["sleep", "infinity"], + "restart": "unless-stopped", + "networks": nets, + "cap_add": ["NET_ADMIN"], + "logging": _DOCKER_LOGGING, + } + if forwards_l3: + base["sysctls"] = {"net.ipv4.ip_forward": 1} + + services[base_key] = base + + for svc_name in svc_names: + svc = get_service(svc_name) + if svc is None or svc.fleet_singleton: + continue + fragment = svc.compose_fragment(name, service_cfg={}) + if "build" in fragment: + fragment["build"].setdefault("args", {}).setdefault( + "BASE_IMAGE", _DEFAULT_BASE_IMAGE + ) + fragment.setdefault("environment", {}) + fragment["environment"]["HOSTNAME"] = name + fragment["network_mode"] = f"service:{base_key}" + fragment["depends_on"] = [base_key] + fragment.pop("hostname", None) + fragment.pop("networks", None) + fragment["logging"] = _DOCKER_LOGGING + services[f"{name}-{svc_name}"] = fragment + + networks: dict[str, dict] = { + _network_name(topology_id, lan["name"]): { + "external": True, + "name": _network_name(topology_id, lan["name"]), + } + for lan in lans + } + + return { + "version": "3.8", + "services": services, + "networks": networks, + } + + +def write_topology_compose(hydrated: dict[str, Any], output_path: Path) -> Path: + """Write the compose dict for a hydrated topology and return the path.""" + data = generate_topology_compose(hydrated) + output_path.write_text( + yaml.dump(data, default_flow_style=False, sort_keys=False) + ) + return output_path diff --git a/tests/topology/test_compose.py b/tests/topology/test_compose.py new file mode 100644 index 00000000..6642cf9e --- /dev/null +++ b/tests/topology/test_compose.py @@ -0,0 +1,102 @@ +"""MazeNET compose-generator + teardown-order tests.""" +from __future__ import annotations + +import pytest + +from decnet.engine.deployer import _teardown_order +from decnet.topology.compose import ( + _container_name, + _network_name, + generate_topology_compose, +) +from decnet.topology.config import TopologyConfig +from decnet.topology.generator import generate +from decnet.topology.persistence import hydrate, persist +from decnet.web.db.factory import get_repository + + +def _cfg(**kw) -> TopologyConfig: + base = dict( + name="cmp", + depth=2, + branching_factor=2, + deckies_per_lan_min=1, + deckies_per_lan_max=1, + cross_edge_probability=0.0, + randomize_services=False, + services_explicit=["ssh"], + seed=9, + ) + base.update(kw) + return TopologyConfig(**base) + + +@pytest.fixture +async def repo(tmp_path): + r = get_repository(db_path=str(tmp_path / "compose.db")) + await r.initialize() + return r + + +@pytest.mark.anyio +async def test_compose_has_one_network_per_lan(repo): + plan = generate(_cfg()) + tid = await persist(repo, plan) + hydrated = await hydrate(repo, tid) + + data = generate_topology_compose(hydrated) + assert set(data["networks"].keys()) == { + _network_name(tid, lan.name) for lan in plan.lans + } + for net in data["networks"].values(): + assert net["external"] is True + + +@pytest.mark.anyio +async def test_compose_multi_home_bridge_decky(repo): + plan = generate(_cfg()) + tid = await persist(repo, plan) + hydrated = await hydrate(repo, tid) + data = generate_topology_compose(hydrated) + + # Every bridge decky (multi-homed) must list ≥2 networks in its base. + for decky in hydrated["deckies"]: + cfg = decky["decky_config"] + base = data["services"][cfg["name"]] + assert base["container_name"] == _container_name(tid, cfg["name"]) + assert len(base["networks"]) == len(cfg["ips_by_lan"]) + for lan_name, ip in cfg["ips_by_lan"].items(): + net_key = _network_name(tid, lan_name) + assert base["networks"][net_key]["ipv4_address"] == ip + + +@pytest.mark.anyio +async def test_compose_forwards_l3_sets_sysctl(repo): + # Force every bridge to forward L3, then assert at least one base has it. + plan = generate(_cfg(bridge_forward_probability=1.0)) + tid = await persist(repo, plan) + hydrated = await hydrate(repo, tid) + data = generate_topology_compose(hydrated) + + forwarders = [ + d for d in hydrated["deckies"] + if d["decky_config"].get("forwards_l3") + ] + assert forwarders, "expected at least one forwarding bridge decky" + for d in forwarders: + base = data["services"][d["decky_config"]["name"]] + assert base["sysctls"]["net.ipv4.ip_forward"] == 1 + assert "NET_ADMIN" in base["cap_add"] + + +def test_teardown_order_is_leaf_first(): + lans = [ + {"name": "LAN-00"}, + {"name": "LAN-01"}, + {"name": "LAN-02"}, + {"name": "LAN-03"}, + ] + order = _teardown_order(lans) + assert order == ["LAN-03", "LAN-02", "LAN-01", "LAN-00"] + # DMZ is last — nothing should be torn down after LAN-00. + assert order[-1] == "LAN-00"