merge testing->tomerge/main #7

Open
anti wants to merge 242 commits from testing into tomerge/main
4 changed files with 74 additions and 8 deletions
Showing only changes of commit 91549e6936 - Show all commits

View File

@@ -60,7 +60,19 @@ def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = N
# "project name must not be empty".
cmd = ["docker", "compose", "-p", "decnet", "-f", str(compose_file), *args]
merged = {**os.environ, **(env or {})}
subprocess.run(cmd, check=True, env=merged) # nosec B603
result = subprocess.run(cmd, capture_output=True, text=True, env=merged) # nosec B603
if result.stdout:
print(result.stdout, end="")
if result.returncode != 0:
# Docker emits the useful detail ("Address already in use", which IP,
# which port) on stderr. Surface it to the structured log so the
# agent's journal carries it — without this the upstream traceback
# just shows the exit code.
if result.stderr:
log.error("docker compose %s failed: %s", " ".join(args), result.stderr.strip())
raise subprocess.CalledProcessError(
result.returncode, cmd, result.stdout, result.stderr
)
_PERMANENT_ERRORS = (
@@ -114,6 +126,8 @@ def _compose_with_retry(
else:
if result.stderr:
console.print(f"[red]{result.stderr.strip()}[/]")
log.error("docker compose %s failed after %d attempts: %s",
" ".join(args), retries, result.stderr.strip())
raise last_exc
@@ -162,6 +176,15 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False,
save_state(config, compose_path)
# Pre-up cleanup: a prior half-failed `up` can leave containers still
# holding the IPs/ports this run wants, which surfaces as the recurring
# "Address already in use" from Docker's IPAM. Best-effort — ignore
# failure (e.g. nothing to tear down on a clean host).
try:
_compose("down", "--remove-orphans", compose_file=compose_path)
except subprocess.CalledProcessError:
log.debug("pre-up cleanup: compose down failed (likely nothing to remove)")
build_env = {"DOCKER_BUILDKIT": "1"} if parallel else {}
console.print("[bold cyan]Building images and starting deckies...[/]")

View File

@@ -152,9 +152,20 @@ def _ensure_network(
for net in client.networks.list(names=[MACVLAN_NETWORK_NAME]):
if net.attrs.get("Driver") == driver:
return # right driver, leave it alone
# Wrong driver — tear it down. Disconnect any live containers first
# so `remove()` doesn't refuse with ErrNetworkInUse.
# Same driver — but if the IPAM pool drifted (different subnet,
# gateway, or ip-range than this deploy asks for), reusing it
# hands out addresses from the old pool and we race the real LAN.
# Compare and rebuild on mismatch.
pools = (net.attrs.get("IPAM") or {}).get("Config") or []
cur = pools[0] if pools else {}
if (
cur.get("Subnet") == subnet
and cur.get("Gateway") == gateway
and cur.get("IPRange") == ip_range
):
return # right driver AND matching pool, leave it alone
# Driver mismatch OR IPAM drift — tear it down. Disconnect any live
# containers first so `remove()` doesn't refuse with ErrNetworkInUse.
for cid in (net.attrs.get("Containers") or {}):
try:
net.disconnect(cid, force=True)

View File

@@ -40,6 +40,7 @@ class TestCompose:
@patch("decnet.engine.deployer.subprocess.run")
def test_compose_constructs_correct_command(self, mock_run):
from decnet.engine.deployer import _compose
mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
_compose("up", "-d", compose_file=Path("test.yml"))
mock_run.assert_called_once()
cmd = mock_run.call_args[0][0]
@@ -50,6 +51,7 @@ class TestCompose:
@patch("decnet.engine.deployer.subprocess.run")
def test_compose_passes_env(self, mock_run):
from decnet.engine.deployer import _compose
mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
_compose("build", env={"DOCKER_BUILDKIT": "1"})
_, kwargs = mock_run.call_args
assert "DOCKER_BUILDKIT" in kwargs["env"]

View File

@@ -76,12 +76,22 @@ class TestIpsToRange:
# ---------------------------------------------------------------------------
class TestCreateMacvlanNetwork:
def _make_client(self, existing=None, existing_driver="macvlan"):
def _make_client(self, existing=None, existing_driver="macvlan",
ipam_subnet="192.168.1.0/24", ipam_gateway="192.168.1.1",
ipam_range="192.168.1.96/27"):
client = MagicMock()
nets = [MagicMock(name=n) for n in (existing or [])]
for net, n in zip(nets, (existing or [])):
net.name = n
net.attrs = {"Driver": existing_driver, "Containers": {}}
net.attrs = {
"Driver": existing_driver,
"Containers": {},
"IPAM": {"Config": [{
"Subnet": ipam_subnet,
"Gateway": ipam_gateway,
"IPRange": ipam_range,
}]},
}
client.networks.list.return_value = nets
return client
@@ -99,18 +109,38 @@ class TestCreateMacvlanNetwork:
create_macvlan_network(client, "eth0", "192.168.1.0/24", "192.168.1.1", "192.168.1.96/27")
client.networks.create.assert_not_called()
def test_rebuilds_when_ipam_subnet_drifted(self):
"""Existing net matches driver+name, but IPAM pool is stale. Reusing it
hands out addresses from the old pool — surfaces as 'Address already in
use'. Must tear down + recreate."""
client = self._make_client([MACVLAN_NETWORK_NAME], ipam_subnet="10.0.0.0/24")
old_net = client.networks.list.return_value[0]
create_macvlan_network(client, "eth0", "192.168.1.0/24", "192.168.1.1", "192.168.1.96/27")
old_net.remove.assert_called_once()
client.networks.create.assert_called_once()
# ---------------------------------------------------------------------------
# create_ipvlan_network
# ---------------------------------------------------------------------------
class TestCreateIpvlanNetwork:
def _make_client(self, existing=None, existing_driver="ipvlan"):
def _make_client(self, existing=None, existing_driver="ipvlan",
ipam_subnet="192.168.1.0/24", ipam_gateway="192.168.1.1",
ipam_range="192.168.1.96/27"):
client = MagicMock()
nets = [MagicMock(name=n) for n in (existing or [])]
for net, n in zip(nets, (existing or [])):
net.name = n
net.attrs = {"Driver": existing_driver, "Containers": {}}
net.attrs = {
"Driver": existing_driver,
"Containers": {},
"IPAM": {"Config": [{
"Subnet": ipam_subnet,
"Gateway": ipam_gateway,
"IPRange": ipam_range,
}]},
}
client.networks.list.return_value = nets
return client