feat(topology): add compose generator and deployer integration
Adds per-topology compose generation (one Docker bridge network per LAN, multi-homed bridge deckies, ip_forward sysctl for L3 forwarders) plus async deploy_topology/teardown_topology in the engine. Leaf-first teardown via BFS-named LAN reverse sort; partial-state safe on failure.
This commit is contained in:
@@ -17,16 +17,24 @@ from decnet.config import DecnetConfig, clear_state, load_state, save_state
|
|||||||
from decnet.composer import write_compose
|
from decnet.composer import write_compose
|
||||||
from decnet.network import (
|
from decnet.network import (
|
||||||
MACVLAN_NETWORK_NAME,
|
MACVLAN_NETWORK_NAME,
|
||||||
|
create_bridge_network,
|
||||||
create_ipvlan_network,
|
create_ipvlan_network,
|
||||||
create_macvlan_network,
|
create_macvlan_network,
|
||||||
get_host_ip,
|
get_host_ip,
|
||||||
ips_to_range,
|
ips_to_range,
|
||||||
|
remove_bridge_network,
|
||||||
remove_macvlan_network,
|
remove_macvlan_network,
|
||||||
setup_host_ipvlan,
|
setup_host_ipvlan,
|
||||||
setup_host_macvlan,
|
setup_host_macvlan,
|
||||||
teardown_host_ipvlan,
|
teardown_host_ipvlan,
|
||||||
teardown_host_macvlan,
|
teardown_host_macvlan,
|
||||||
)
|
)
|
||||||
|
from decnet.topology.compose import (
|
||||||
|
_network_name as _topology_network_name,
|
||||||
|
write_topology_compose,
|
||||||
|
)
|
||||||
|
from decnet.topology.persistence import hydrate, transition_status
|
||||||
|
from decnet.topology.status import TopologyStatus
|
||||||
|
|
||||||
log = get_logger("engine")
|
log = get_logger("engine")
|
||||||
console = Console()
|
console = Console()
|
||||||
@@ -281,6 +289,106 @@ def status() -> None:
|
|||||||
console.print(table)
|
console.print(table)
|
||||||
|
|
||||||
|
|
||||||
|
def _teardown_order(lans: list[dict]) -> list[str]:
|
||||||
|
"""Return LAN names in leaf-first (DMZ-last) teardown order.
|
||||||
|
|
||||||
|
The generator names LANs in BFS order (``LAN-00`` = DMZ root,
|
||||||
|
then children, then grandchildren), so reverse-name order is a
|
||||||
|
correct leaf-first topological sort for the tree. Cross-edges
|
||||||
|
are membership-only — they don't introduce parent/child
|
||||||
|
relationships, so the BFS numbering remains valid.
|
||||||
|
"""
|
||||||
|
return sorted((lan["name"] for lan in lans), reverse=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _topology_compose_path(topology_id: str) -> Path:
|
||||||
|
return Path(f"decnet-topology-{topology_id[:8]}-compose.yml")
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("engine.deploy_topology")
|
||||||
|
async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> None:
|
||||||
|
"""Deploy a persisted MazeNET topology.
|
||||||
|
|
||||||
|
Assumes ``repo`` has the topology in ``pending`` state. Creates one
|
||||||
|
Docker bridge network per LAN, writes a per-topology compose file,
|
||||||
|
and brings all deckies up. Marks ``active`` on success, ``failed``
|
||||||
|
on exception (partial state left for later teardown).
|
||||||
|
"""
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
raise ValueError(f"topology {topology_id!r} not found")
|
||||||
|
|
||||||
|
await transition_status(repo, topology_id, TopologyStatus.DEPLOYING)
|
||||||
|
|
||||||
|
client = docker.from_env()
|
||||||
|
lans = hydrated["lans"]
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
|
||||||
|
try:
|
||||||
|
for lan in lans:
|
||||||
|
net_name = _topology_network_name(topology_id, lan["name"])
|
||||||
|
# DMZ LAN is publicly routable; internal LANs are isolated
|
||||||
|
# from the host's default egress.
|
||||||
|
internal = not lan["is_dmz"]
|
||||||
|
create_bridge_network(
|
||||||
|
client, net_name, lan["subnet"], internal=internal
|
||||||
|
)
|
||||||
|
write_topology_compose(hydrated, compose_path)
|
||||||
|
console.print(
|
||||||
|
f"[bold cyan]Topology compose file written[/] → {compose_path}"
|
||||||
|
)
|
||||||
|
if dry_run:
|
||||||
|
log.info("topology %s dry-run complete", topology_id)
|
||||||
|
return
|
||||||
|
_compose_with_retry("up", "--build", "-d", compose_file=compose_path)
|
||||||
|
except Exception as exc:
|
||||||
|
log.error("topology %s deploy failed: %s", topology_id, exc)
|
||||||
|
await transition_status(
|
||||||
|
repo, topology_id, TopologyStatus.FAILED, reason=str(exc)
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
await transition_status(repo, topology_id, TopologyStatus.ACTIVE)
|
||||||
|
log.info("topology %s deployed n_lans=%d", topology_id, len(lans))
|
||||||
|
|
||||||
|
|
||||||
|
@_traced("engine.teardown_topology")
|
||||||
|
async def teardown_topology(repo, topology_id: str) -> None:
|
||||||
|
"""Tear down a persisted MazeNET topology.
|
||||||
|
|
||||||
|
Legal from ``active|degraded|failed|deploying``. Brings compose
|
||||||
|
down, removes each LAN's Docker bridge network in leaf-first order,
|
||||||
|
and marks ``torn_down``.
|
||||||
|
"""
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
raise ValueError(f"topology {topology_id!r} not found")
|
||||||
|
|
||||||
|
await transition_status(repo, topology_id, TopologyStatus.TEARING_DOWN)
|
||||||
|
|
||||||
|
client = docker.from_env()
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
|
||||||
|
if compose_path.exists():
|
||||||
|
try:
|
||||||
|
_compose("down", "--remove-orphans", compose_file=compose_path)
|
||||||
|
except subprocess.CalledProcessError as exc:
|
||||||
|
log.warning(
|
||||||
|
"topology %s compose down failed (continuing): %s",
|
||||||
|
topology_id, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
for lan_name in _teardown_order(hydrated["lans"]):
|
||||||
|
net_name = _topology_network_name(topology_id, lan_name)
|
||||||
|
remove_bridge_network(client, net_name)
|
||||||
|
|
||||||
|
if compose_path.exists():
|
||||||
|
compose_path.unlink()
|
||||||
|
|
||||||
|
await transition_status(repo, topology_id, TopologyStatus.TORN_DOWN)
|
||||||
|
log.info("topology %s torn down", topology_id)
|
||||||
|
|
||||||
|
|
||||||
def _print_status(config: DecnetConfig) -> None:
|
def _print_status(config: DecnetConfig) -> None:
|
||||||
table = Table(title="Deployed Deckies", show_lines=True)
|
table = Table(title="Deployed Deckies", show_lines=True)
|
||||||
table.add_column("Decky")
|
table.add_column("Decky")
|
||||||
|
|||||||
@@ -227,6 +227,60 @@ def remove_macvlan_network(client: docker.DockerClient) -> None:
|
|||||||
n.remove()
|
n.remove()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Plain Docker bridge networks (MazeNET topologies — one per LAN)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def create_bridge_network(
|
||||||
|
client: docker.DockerClient,
|
||||||
|
name: str,
|
||||||
|
subnet: str,
|
||||||
|
*,
|
||||||
|
internal: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""Create (or reuse) a plain Docker bridge network and return its id.
|
||||||
|
|
||||||
|
``internal=True`` blocks outbound routing via the host — used for
|
||||||
|
non-DMZ MazeNET LANs so deckies can only reach what the bridge
|
||||||
|
deckies let them reach.
|
||||||
|
"""
|
||||||
|
for net in client.networks.list(names=[name]):
|
||||||
|
pools = (net.attrs.get("IPAM") or {}).get("Config") or []
|
||||||
|
cur = pools[0] if pools else {}
|
||||||
|
if net.attrs.get("Driver") == "bridge" and cur.get("Subnet") == subnet:
|
||||||
|
return net.id
|
||||||
|
for cid in (net.attrs.get("Containers") or {}):
|
||||||
|
try:
|
||||||
|
net.disconnect(cid, force=True)
|
||||||
|
except docker.errors.APIError:
|
||||||
|
pass
|
||||||
|
net.remove()
|
||||||
|
|
||||||
|
net = client.networks.create(
|
||||||
|
name=name,
|
||||||
|
driver="bridge",
|
||||||
|
internal=internal,
|
||||||
|
ipam=docker.types.IPAMConfig(
|
||||||
|
driver="default",
|
||||||
|
pool_configs=[docker.types.IPAMPool(subnet=subnet)],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return net.id
|
||||||
|
|
||||||
|
|
||||||
|
def remove_bridge_network(client: docker.DockerClient, name: str) -> None:
|
||||||
|
for net in client.networks.list(names=[name]):
|
||||||
|
for cid in (net.attrs.get("Containers") or {}):
|
||||||
|
try:
|
||||||
|
net.disconnect(cid, force=True)
|
||||||
|
except docker.errors.APIError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
net.remove()
|
||||||
|
except docker.errors.APIError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Host-side macvlan interface (hairpin fix)
|
# Host-side macvlan interface (hairpin fix)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
130
decnet/topology/compose.py
Normal file
130
decnet/topology/compose.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
"""Compose-file generator for a MazeNET topology.
|
||||||
|
|
||||||
|
Produces a ``docker-compose.yml`` dict given a hydrated topology
|
||||||
|
(the output of :func:`decnet.topology.persistence.hydrate`). The
|
||||||
|
compose file references each LAN as an ``external: true`` network —
|
||||||
|
the deployer creates the Docker bridge networks via the SDK before
|
||||||
|
invoking ``docker compose up``.
|
||||||
|
|
||||||
|
Layout:
|
||||||
|
* Each decky has a "base" container holding the LAN IPs. Multi-homed
|
||||||
|
(bridge) deckies list every LAN they belong to under ``networks``
|
||||||
|
with the per-LAN ``ipv4_address``.
|
||||||
|
* Bridge deckies with ``forwards_l3=True`` get ``net.ipv4.ip_forward=1``
|
||||||
|
baked in via compose ``sysctls`` plus ``NET_ADMIN`` in ``cap_add``.
|
||||||
|
* Service containers share the base namespace via
|
||||||
|
``network_mode: service:<base>``, matching the flat composer.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from decnet.services.registry import get_service
|
||||||
|
|
||||||
|
_DEFAULT_BASE_IMAGE = "debian:bookworm-slim"
|
||||||
|
|
||||||
|
_DOCKER_LOGGING = {
|
||||||
|
"driver": "json-file",
|
||||||
|
"options": {"max-size": "10m", "max-file": "5"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _network_name(topology_id: str, lan_name: str) -> str:
|
||||||
|
"""Docker network name for a given (topology, LAN) pair."""
|
||||||
|
return f"decnet_t_{topology_id[:8]}_{lan_name.lower()}"
|
||||||
|
|
||||||
|
|
||||||
|
def _container_name(topology_id: str, decky_name: str) -> str:
|
||||||
|
"""Container name for a decky base in a topology."""
|
||||||
|
return f"decnet_t_{topology_id[:8]}_{decky_name}"
|
||||||
|
|
||||||
|
|
||||||
|
def generate_topology_compose(hydrated: dict[str, Any]) -> dict:
|
||||||
|
"""Build the compose dict for a hydrated topology.
|
||||||
|
|
||||||
|
``hydrated`` is the shape returned by
|
||||||
|
:func:`decnet.topology.persistence.hydrate`.
|
||||||
|
"""
|
||||||
|
topology = hydrated["topology"]
|
||||||
|
topology_id = topology["id"]
|
||||||
|
lans = hydrated["lans"]
|
||||||
|
deckies = hydrated["deckies"]
|
||||||
|
|
||||||
|
lan_by_name = {lan["name"]: lan for lan in lans}
|
||||||
|
|
||||||
|
services: dict[str, dict] = {}
|
||||||
|
|
||||||
|
for decky in deckies:
|
||||||
|
cfg = decky["decky_config"]
|
||||||
|
name = cfg["name"]
|
||||||
|
ips_by_lan: dict[str, str] = cfg["ips_by_lan"]
|
||||||
|
forwards_l3: bool = cfg.get("forwards_l3", False)
|
||||||
|
svc_names: list[str] = decky["services"]
|
||||||
|
|
||||||
|
base_key = name
|
||||||
|
nets: dict[str, dict] = {}
|
||||||
|
for lan_name, ip in ips_by_lan.items():
|
||||||
|
if lan_name not in lan_by_name:
|
||||||
|
raise ValueError(
|
||||||
|
f"decky {name!r} references unknown LAN {lan_name!r}"
|
||||||
|
)
|
||||||
|
nets[_network_name(topology_id, lan_name)] = {"ipv4_address": ip}
|
||||||
|
|
||||||
|
base: dict = {
|
||||||
|
"image": _DEFAULT_BASE_IMAGE,
|
||||||
|
"container_name": _container_name(topology_id, name),
|
||||||
|
"hostname": name,
|
||||||
|
"command": ["sleep", "infinity"],
|
||||||
|
"restart": "unless-stopped",
|
||||||
|
"networks": nets,
|
||||||
|
"cap_add": ["NET_ADMIN"],
|
||||||
|
"logging": _DOCKER_LOGGING,
|
||||||
|
}
|
||||||
|
if forwards_l3:
|
||||||
|
base["sysctls"] = {"net.ipv4.ip_forward": 1}
|
||||||
|
|
||||||
|
services[base_key] = base
|
||||||
|
|
||||||
|
for svc_name in svc_names:
|
||||||
|
svc = get_service(svc_name)
|
||||||
|
if svc is None or svc.fleet_singleton:
|
||||||
|
continue
|
||||||
|
fragment = svc.compose_fragment(name, service_cfg={})
|
||||||
|
if "build" in fragment:
|
||||||
|
fragment["build"].setdefault("args", {}).setdefault(
|
||||||
|
"BASE_IMAGE", _DEFAULT_BASE_IMAGE
|
||||||
|
)
|
||||||
|
fragment.setdefault("environment", {})
|
||||||
|
fragment["environment"]["HOSTNAME"] = name
|
||||||
|
fragment["network_mode"] = f"service:{base_key}"
|
||||||
|
fragment["depends_on"] = [base_key]
|
||||||
|
fragment.pop("hostname", None)
|
||||||
|
fragment.pop("networks", None)
|
||||||
|
fragment["logging"] = _DOCKER_LOGGING
|
||||||
|
services[f"{name}-{svc_name}"] = fragment
|
||||||
|
|
||||||
|
networks: dict[str, dict] = {
|
||||||
|
_network_name(topology_id, lan["name"]): {
|
||||||
|
"external": True,
|
||||||
|
"name": _network_name(topology_id, lan["name"]),
|
||||||
|
}
|
||||||
|
for lan in lans
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"version": "3.8",
|
||||||
|
"services": services,
|
||||||
|
"networks": networks,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def write_topology_compose(hydrated: dict[str, Any], output_path: Path) -> Path:
|
||||||
|
"""Write the compose dict for a hydrated topology and return the path."""
|
||||||
|
data = generate_topology_compose(hydrated)
|
||||||
|
output_path.write_text(
|
||||||
|
yaml.dump(data, default_flow_style=False, sort_keys=False)
|
||||||
|
)
|
||||||
|
return output_path
|
||||||
102
tests/topology/test_compose.py
Normal file
102
tests/topology/test_compose.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
"""MazeNET compose-generator + teardown-order tests."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from decnet.engine.deployer import _teardown_order
|
||||||
|
from decnet.topology.compose import (
|
||||||
|
_container_name,
|
||||||
|
_network_name,
|
||||||
|
generate_topology_compose,
|
||||||
|
)
|
||||||
|
from decnet.topology.config import TopologyConfig
|
||||||
|
from decnet.topology.generator import generate
|
||||||
|
from decnet.topology.persistence import hydrate, persist
|
||||||
|
from decnet.web.db.factory import get_repository
|
||||||
|
|
||||||
|
|
||||||
|
def _cfg(**kw) -> TopologyConfig:
|
||||||
|
base = dict(
|
||||||
|
name="cmp",
|
||||||
|
depth=2,
|
||||||
|
branching_factor=2,
|
||||||
|
deckies_per_lan_min=1,
|
||||||
|
deckies_per_lan_max=1,
|
||||||
|
cross_edge_probability=0.0,
|
||||||
|
randomize_services=False,
|
||||||
|
services_explicit=["ssh"],
|
||||||
|
seed=9,
|
||||||
|
)
|
||||||
|
base.update(kw)
|
||||||
|
return TopologyConfig(**base)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
async def repo(tmp_path):
|
||||||
|
r = get_repository(db_path=str(tmp_path / "compose.db"))
|
||||||
|
await r.initialize()
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_compose_has_one_network_per_lan(repo):
|
||||||
|
plan = generate(_cfg())
|
||||||
|
tid = await persist(repo, plan)
|
||||||
|
hydrated = await hydrate(repo, tid)
|
||||||
|
|
||||||
|
data = generate_topology_compose(hydrated)
|
||||||
|
assert set(data["networks"].keys()) == {
|
||||||
|
_network_name(tid, lan.name) for lan in plan.lans
|
||||||
|
}
|
||||||
|
for net in data["networks"].values():
|
||||||
|
assert net["external"] is True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_compose_multi_home_bridge_decky(repo):
|
||||||
|
plan = generate(_cfg())
|
||||||
|
tid = await persist(repo, plan)
|
||||||
|
hydrated = await hydrate(repo, tid)
|
||||||
|
data = generate_topology_compose(hydrated)
|
||||||
|
|
||||||
|
# Every bridge decky (multi-homed) must list ≥2 networks in its base.
|
||||||
|
for decky in hydrated["deckies"]:
|
||||||
|
cfg = decky["decky_config"]
|
||||||
|
base = data["services"][cfg["name"]]
|
||||||
|
assert base["container_name"] == _container_name(tid, cfg["name"])
|
||||||
|
assert len(base["networks"]) == len(cfg["ips_by_lan"])
|
||||||
|
for lan_name, ip in cfg["ips_by_lan"].items():
|
||||||
|
net_key = _network_name(tid, lan_name)
|
||||||
|
assert base["networks"][net_key]["ipv4_address"] == ip
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_compose_forwards_l3_sets_sysctl(repo):
|
||||||
|
# Force every bridge to forward L3, then assert at least one base has it.
|
||||||
|
plan = generate(_cfg(bridge_forward_probability=1.0))
|
||||||
|
tid = await persist(repo, plan)
|
||||||
|
hydrated = await hydrate(repo, tid)
|
||||||
|
data = generate_topology_compose(hydrated)
|
||||||
|
|
||||||
|
forwarders = [
|
||||||
|
d for d in hydrated["deckies"]
|
||||||
|
if d["decky_config"].get("forwards_l3")
|
||||||
|
]
|
||||||
|
assert forwarders, "expected at least one forwarding bridge decky"
|
||||||
|
for d in forwarders:
|
||||||
|
base = data["services"][d["decky_config"]["name"]]
|
||||||
|
assert base["sysctls"]["net.ipv4.ip_forward"] == 1
|
||||||
|
assert "NET_ADMIN" in base["cap_add"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_teardown_order_is_leaf_first():
|
||||||
|
lans = [
|
||||||
|
{"name": "LAN-00"},
|
||||||
|
{"name": "LAN-01"},
|
||||||
|
{"name": "LAN-02"},
|
||||||
|
{"name": "LAN-03"},
|
||||||
|
]
|
||||||
|
order = _teardown_order(lans)
|
||||||
|
assert order == ["LAN-03", "LAN-02", "LAN-01", "LAN-00"]
|
||||||
|
# DMZ is last — nothing should be torn down after LAN-00.
|
||||||
|
assert order[-1] == "LAN-00"
|
||||||
Reference in New Issue
Block a user