fix(deploy): prevent 'Address already in use' from stale IPAM and half-torn-down containers

Two compounding root causes produced the recurring 'Address already in use'
error on redeploy:

1. _ensure_network only compared driver+name; if a prior deploy's IPAM
   pool drifted (different subnet/gateway/range), Docker kept handing out
   addresses from the old pool and raced the real LAN. Now also compares
   Subnet/Gateway/IPRange and rebuilds on drift.

2. A prior half-failed 'up' could leave containers still holding the IPs
   and ports the new run wants. Run 'compose down --remove-orphans' as a
   best-effort pre-up cleanup so IPAM starts from a clean state.

Also surface docker compose stderr to the structured log on failure so
the agent's journal captures Docker's actual message (which IP, which
port) instead of just the exit code.
This commit is contained in:
2026-04-19 19:59:06 -04:00
parent e8e11b2896
commit 91549e6936
4 changed files with 74 additions and 8 deletions

View File

@@ -40,6 +40,7 @@ class TestCompose:
@patch("decnet.engine.deployer.subprocess.run")
def test_compose_constructs_correct_command(self, mock_run):
from decnet.engine.deployer import _compose
mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
_compose("up", "-d", compose_file=Path("test.yml"))
mock_run.assert_called_once()
cmd = mock_run.call_args[0][0]
@@ -50,6 +51,7 @@ class TestCompose:
@patch("decnet.engine.deployer.subprocess.run")
def test_compose_passes_env(self, mock_run):
from decnet.engine.deployer import _compose
mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
_compose("build", env={"DOCKER_BUILDKIT": "1"})
_, kwargs = mock_run.call_args
assert "DOCKER_BUILDKIT" in kwargs["env"]

View File

@@ -76,12 +76,22 @@ class TestIpsToRange:
# ---------------------------------------------------------------------------
class TestCreateMacvlanNetwork:
def _make_client(self, existing=None, existing_driver="macvlan"):
def _make_client(self, existing=None, existing_driver="macvlan",
ipam_subnet="192.168.1.0/24", ipam_gateway="192.168.1.1",
ipam_range="192.168.1.96/27"):
client = MagicMock()
nets = [MagicMock(name=n) for n in (existing or [])]
for net, n in zip(nets, (existing or [])):
net.name = n
net.attrs = {"Driver": existing_driver, "Containers": {}}
net.attrs = {
"Driver": existing_driver,
"Containers": {},
"IPAM": {"Config": [{
"Subnet": ipam_subnet,
"Gateway": ipam_gateway,
"IPRange": ipam_range,
}]},
}
client.networks.list.return_value = nets
return client
@@ -99,18 +109,38 @@ class TestCreateMacvlanNetwork:
create_macvlan_network(client, "eth0", "192.168.1.0/24", "192.168.1.1", "192.168.1.96/27")
client.networks.create.assert_not_called()
def test_rebuilds_when_ipam_subnet_drifted(self):
"""Existing net matches driver+name, but IPAM pool is stale. Reusing it
hands out addresses from the old pool — surfaces as 'Address already in
use'. Must tear down + recreate."""
client = self._make_client([MACVLAN_NETWORK_NAME], ipam_subnet="10.0.0.0/24")
old_net = client.networks.list.return_value[0]
create_macvlan_network(client, "eth0", "192.168.1.0/24", "192.168.1.1", "192.168.1.96/27")
old_net.remove.assert_called_once()
client.networks.create.assert_called_once()
# ---------------------------------------------------------------------------
# create_ipvlan_network
# ---------------------------------------------------------------------------
class TestCreateIpvlanNetwork:
def _make_client(self, existing=None, existing_driver="ipvlan"):
def _make_client(self, existing=None, existing_driver="ipvlan",
ipam_subnet="192.168.1.0/24", ipam_gateway="192.168.1.1",
ipam_range="192.168.1.96/27"):
client = MagicMock()
nets = [MagicMock(name=n) for n in (existing or [])]
for net, n in zip(nets, (existing or [])):
net.name = n
net.attrs = {"Driver": existing_driver, "Containers": {}}
net.attrs = {
"Driver": existing_driver,
"Containers": {},
"IPAM": {"Config": [{
"Subnet": ipam_subnet,
"Gateway": ipam_gateway,
"IPRange": ipam_range,
}]},
}
client.networks.list.return_value = nets
return client