diff --git a/decnet/engine/deployer.py b/decnet/engine/deployer.py index baec41f..c788158 100644 --- a/decnet/engine/deployer.py +++ b/decnet/engine/deployer.py @@ -60,7 +60,19 @@ def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = N # "project name must not be empty". cmd = ["docker", "compose", "-p", "decnet", "-f", str(compose_file), *args] merged = {**os.environ, **(env or {})} - subprocess.run(cmd, check=True, env=merged) # nosec B603 + result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603 + if result.stdout: + print(result.stdout, end="") + if result.returncode != 0: + # Docker emits the useful detail ("Address already in use", which IP, + # which port) on stderr. Surface it to the structured log so the + # agent's journal carries it — without this the upstream traceback + # just shows the exit code. + if result.stderr: + log.error("docker compose %s failed: %s", " ".join(args), result.stderr.strip()) + raise subprocess.CalledProcessError( + result.returncode, cmd, result.stdout, result.stderr + ) _PERMANENT_ERRORS = ( @@ -114,6 +126,8 @@ def _compose_with_retry( else: if result.stderr: console.print(f"[red]{result.stderr.strip()}[/]") + log.error("docker compose %s failed after %d attempts: %s", + " ".join(args), retries, result.stderr.strip()) raise last_exc @@ -162,6 +176,15 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False, save_state(config, compose_path) + # Pre-up cleanup: a prior half-failed `up` can leave containers still + # holding the IPs/ports this run wants, which surfaces as the recurring + # "Address already in use" from Docker's IPAM. Best-effort — ignore + # failure (e.g. nothing to tear down on a clean host). + try: + _compose("down", "--remove-orphans", compose_file=compose_path) + except subprocess.CalledProcessError: + log.debug("pre-up cleanup: compose down failed (likely nothing to remove)") + build_env = {"DOCKER_BUILDKIT": "1"} if parallel else {} console.print("[bold cyan]Building images and starting deckies...[/]") diff --git a/decnet/network.py b/decnet/network.py index 1d183be..17b0527 100644 --- a/decnet/network.py +++ b/decnet/network.py @@ -152,9 +152,20 @@ def _ensure_network( for net in client.networks.list(names=[MACVLAN_NETWORK_NAME]): if net.attrs.get("Driver") == driver: - return # right driver, leave it alone - # Wrong driver — tear it down. Disconnect any live containers first - # so `remove()` doesn't refuse with ErrNetworkInUse. + # Same driver — but if the IPAM pool drifted (different subnet, + # gateway, or ip-range than this deploy asks for), reusing it + # hands out addresses from the old pool and we race the real LAN. + # Compare and rebuild on mismatch. + pools = (net.attrs.get("IPAM") or {}).get("Config") or [] + cur = pools[0] if pools else {} + if ( + cur.get("Subnet") == subnet + and cur.get("Gateway") == gateway + and cur.get("IPRange") == ip_range + ): + return # right driver AND matching pool, leave it alone + # Driver mismatch OR IPAM drift — tear it down. Disconnect any live + # containers first so `remove()` doesn't refuse with ErrNetworkInUse. for cid in (net.attrs.get("Containers") or {}): try: net.disconnect(cid, force=True) diff --git a/tests/test_deployer.py b/tests/test_deployer.py index fb85609..d220a2d 100644 --- a/tests/test_deployer.py +++ b/tests/test_deployer.py @@ -40,6 +40,7 @@ class TestCompose: @patch("decnet.engine.deployer.subprocess.run") def test_compose_constructs_correct_command(self, mock_run): from decnet.engine.deployer import _compose + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") _compose("up", "-d", compose_file=Path("test.yml")) mock_run.assert_called_once() cmd = mock_run.call_args[0][0] @@ -50,6 +51,7 @@ class TestCompose: @patch("decnet.engine.deployer.subprocess.run") def test_compose_passes_env(self, mock_run): from decnet.engine.deployer import _compose + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") _compose("build", env={"DOCKER_BUILDKIT": "1"}) _, kwargs = mock_run.call_args assert "DOCKER_BUILDKIT" in kwargs["env"] diff --git a/tests/test_network.py b/tests/test_network.py index fa6a744..8db05c4 100644 --- a/tests/test_network.py +++ b/tests/test_network.py @@ -76,12 +76,22 @@ class TestIpsToRange: # --------------------------------------------------------------------------- class TestCreateMacvlanNetwork: - def _make_client(self, existing=None, existing_driver="macvlan"): + def _make_client(self, existing=None, existing_driver="macvlan", + ipam_subnet="192.168.1.0/24", ipam_gateway="192.168.1.1", + ipam_range="192.168.1.96/27"): client = MagicMock() nets = [MagicMock(name=n) for n in (existing or [])] for net, n in zip(nets, (existing or [])): net.name = n - net.attrs = {"Driver": existing_driver, "Containers": {}} + net.attrs = { + "Driver": existing_driver, + "Containers": {}, + "IPAM": {"Config": [{ + "Subnet": ipam_subnet, + "Gateway": ipam_gateway, + "IPRange": ipam_range, + }]}, + } client.networks.list.return_value = nets return client @@ -99,18 +109,38 @@ class TestCreateMacvlanNetwork: create_macvlan_network(client, "eth0", "192.168.1.0/24", "192.168.1.1", "192.168.1.96/27") client.networks.create.assert_not_called() + def test_rebuilds_when_ipam_subnet_drifted(self): + """Existing net matches driver+name, but IPAM pool is stale. Reusing it + hands out addresses from the old pool — surfaces as 'Address already in + use'. Must tear down + recreate.""" + client = self._make_client([MACVLAN_NETWORK_NAME], ipam_subnet="10.0.0.0/24") + old_net = client.networks.list.return_value[0] + create_macvlan_network(client, "eth0", "192.168.1.0/24", "192.168.1.1", "192.168.1.96/27") + old_net.remove.assert_called_once() + client.networks.create.assert_called_once() + # --------------------------------------------------------------------------- # create_ipvlan_network # --------------------------------------------------------------------------- class TestCreateIpvlanNetwork: - def _make_client(self, existing=None, existing_driver="ipvlan"): + def _make_client(self, existing=None, existing_driver="ipvlan", + ipam_subnet="192.168.1.0/24", ipam_gateway="192.168.1.1", + ipam_range="192.168.1.96/27"): client = MagicMock() nets = [MagicMock(name=n) for n in (existing or [])] for net, n in zip(nets, (existing or [])): net.name = n - net.attrs = {"Driver": existing_driver, "Containers": {}} + net.attrs = { + "Driver": existing_driver, + "Containers": {}, + "IPAM": {"Config": [{ + "Subnet": ipam_subnet, + "Gateway": ipam_gateway, + "IPRange": ipam_range, + }]}, + } client.networks.list.return_value = nets return client