Files
DECNET/tests/topology/test_deploy.py
anti 2ca6533666 fix(topology): anchor compose path to run dir, stop install-dir litter
_topology_compose_path returned a CWD-relative Path, so every
deploy/mutate/dry-run wrote decnet-topology-<id8>-compose.yml into the
process CWD (the install dir). Teardown computed the same relative path
against its own CWD, so when it differed the unlink() missed the orphan
and files accumulated forever.

Anchor to $DECNET_RUN_DIR (default /var/lib/decnet/topologies, tempdir
fallback) so write and teardown always agree regardless of CWD.
2026-06-18 21:24:00 -04:00

257 lines
8.9 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
"""Deploy/teardown integration tests for MazeNET topologies.
Docker-touching paths live behind ``@pytest.mark.live`` per
feedback_skip_heavy_tests.md. The non-live path here exercises dry-run
deploy (compose file is written, repo status is left untouched) and the
state-machine around failure/teardown using a stub repo.
"""
from __future__ import annotations
from pathlib import Path
from unittest.mock import patch
import pytest
from decnet.engine.deployer import (
_teardown_order,
_topology_compose_path,
deploy_topology,
teardown_topology,
)
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.persistence import persist
from decnet.topology.status import TopologyStatus
from decnet.web.db.factory import get_repository
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="dep",
depth=2,
branching_factor=2,
deckies_per_lan_min=1,
deckies_per_lan_max=1,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=11,
)
base.update(kw)
return TopologyConfig(**base)
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "dep.db"))
await r.initialize()
return r
@pytest.mark.anyio
async def test_dry_run_writes_compose_and_preserves_pending(repo, tmp_path, monkeypatch):
monkeypatch.setenv("DECNET_RUN_DIR", str(tmp_path))
plan = generate(_cfg())
tid = await persist(repo, plan)
await deploy_topology(repo, tid, dry_run=True)
compose_path = _topology_compose_path(tid)
assert compose_path.exists(), "dry run must emit a compose file"
topo = await repo.get_topology(tid)
assert topo.status == TopologyStatus.PENDING, (
"dry run must not transition status"
)
@pytest.mark.anyio
async def test_deploy_failure_transitions_to_failed(repo, tmp_path, monkeypatch):
"""If compose-up fails, status lands at FAILED with the reason logged."""
monkeypatch.chdir(tmp_path)
plan = generate(_cfg())
tid = await persist(repo, plan)
class _BoomClient:
def __init__(self):
self.networks = self
def list(self, names=None, filters=None): # noqa: ARG002
return []
def create(self, *a, **kw): # noqa: ARG002
raise RuntimeError("boom: docker daemon unreachable")
with patch("decnet.engine.deployer.docker.from_env", return_value=_BoomClient()):
with pytest.raises(RuntimeError, match="boom"):
await deploy_topology(repo, tid)
topo = await repo.get_topology(tid)
assert topo.status == TopologyStatus.FAILED
events = await repo.list_topology_status_events(tid)
# Events are returned newest-first.
last = events[0]
assert last["to_status"] == TopologyStatus.FAILED
assert "boom" in (last["reason"] or "")
@pytest.mark.anyio
async def test_deploy_failure_rolls_back_created_networks(repo, tmp_path, monkeypatch):
"""Networks created before the failing op must be removed on rollback.
Reproduces the ``Pool overlaps`` regression: a failed deploy left
partial networks alive and the next deploy hit an IPAM conflict."""
monkeypatch.chdir(tmp_path)
plan = generate(_cfg())
tid = await persist(repo, plan)
class _PartialClient:
def __init__(self):
self.networks = self
self.created: list[str] = []
self.removed: list[str] = []
self._call = 0
self._created_objs: dict[str, _FakeNet] = {}
def list(self, names=None, filters=None): # noqa: ARG002
if not names:
return []
return [self._created_objs[n] for n in names if n in self._created_objs]
def create(self, name, *a, **kw): # noqa: ARG002
self._call += 1
# Succeed on the first N-1 creates, blow up on the last.
if self._call >= 2:
raise RuntimeError("boom: pool overlap")
self.created.append(name)
obj = _FakeNet(name, self)
self._created_objs[name] = obj
return obj
class _FakeNet:
def __init__(self, name, client):
self.name = name
self.id = f"id-{name}"
self.attrs = {"Containers": {}}
self._client = client
def remove(self):
self._client.removed.append(self.name)
self._client._created_objs.pop(self.name, None)
fake = _PartialClient()
with patch("decnet.engine.deployer.docker.from_env", return_value=fake):
with patch("decnet.engine.deployer._compose") as mock_down:
with pytest.raises(RuntimeError, match="boom"):
await deploy_topology(repo, tid)
# compose down is invoked only when compose was actually started
# OR a partial compose file exists; create_bridge_network failed
# before write_topology_compose, so _compose should not have run.
mock_down.assert_not_called()
# Every network created this attempt must have been removed on rollback.
assert set(fake.removed) == set(fake.created)
topo = await repo.get_topology(tid)
assert topo.status == TopologyStatus.FAILED
@pytest.mark.anyio
async def test_teardown_from_failed_marks_torn_down(repo, tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
plan = generate(_cfg())
tid = await persist(repo, plan)
# Drive it into FAILED directly via the legal path.
from decnet.topology.persistence import transition_status
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
await transition_status(repo, tid, TopologyStatus.FAILED, reason="test")
class _StubClient:
def __init__(self):
self.networks = self
def list(self, names=None, filters=None): # noqa: ARG002
return []
with patch("decnet.engine.deployer.docker.from_env", return_value=_StubClient()):
await teardown_topology(repo, tid)
topo = await repo.get_topology(tid)
assert topo.status == TopologyStatus.TORN_DOWN
def test_teardown_order_is_stable():
lans = [{"name": f"LAN-{i:02d}"} for i in range(5)]
assert _teardown_order(lans) == [
"LAN-04", "LAN-03", "LAN-02", "LAN-01", "LAN-00",
]
@pytest.mark.live
@pytest.mark.anyio
async def test_deploy_and_teardown_against_real_docker(repo, tmp_path, monkeypatch):
"""End-to-end: create real Docker bridge networks, verify, tear down.
Skipped on CI; run locally with ``pytest -m live tests/topology``.
Does NOT run ``docker compose up`` — that's exercised by the flat
fleet tests. This test covers the topology-specific paths only
(LAN network creation, multi-home bridge wiring, teardown order).
"""
monkeypatch.chdir(tmp_path)
docker = pytest.importorskip("docker")
try:
client = docker.from_env()
client.ping()
except Exception as exc: # pragma: no cover - environment-specific
pytest.skip(f"docker daemon not reachable: {exc}")
plan = generate(_cfg(depth=1, branching_factor=1))
tid = await persist(repo, plan)
from decnet.topology.compose import _network_name
try:
await deploy_topology(repo, tid, dry_run=True)
# Dry run doesn't create networks. Now exercise the real path by
# creating just the networks (no compose up) and tearing down.
from decnet.network import create_bridge_network, remove_bridge_network
for lan in plan.lans:
create_bridge_network(
client,
_network_name(tid, lan.name),
lan.subnet,
internal=not lan.is_dmz,
)
existing = {n.name for n in client.networks.list()}
for lan in plan.lans:
assert _network_name(tid, lan.name) in existing
finally:
for lan in plan.lans:
remove_bridge_network(client, _network_name(tid, lan.name))
remaining = {n.name for n in client.networks.list()}
for lan in plan.lans:
assert _network_name(tid, lan.name) not in remaining
# Compose artifact cleanup
p = _topology_compose_path(tid)
if p.exists():
p.unlink()
# Sanity: Path roundtrip still resolvable
assert isinstance(Path(str(p)), Path)
def test_compose_path_is_absolute_and_cwd_independent(tmp_path, monkeypatch):
"""Regression: a CWD-relative compose path littered the install dir and
let teardown's unlink() miss orphans. Path must be absolute and stable
across CWD changes so write and teardown always agree."""
monkeypatch.setenv("DECNET_RUN_DIR", str(tmp_path))
tid = "abcdef1234567890"
monkeypatch.chdir(tmp_path)
p1 = _topology_compose_path(tid)
sub = tmp_path / "elsewhere"
sub.mkdir()
monkeypatch.chdir(sub)
p2 = _topology_compose_path(tid)
assert p1.is_absolute()
assert p1 == p2, "compose path must not depend on process CWD"
assert p1.parent == tmp_path