fix(deploy): redirect DOCKER_CONFIG out of $HOME so ProtectHome doesn't kill builds

The api unit's ProtectHome=read-only made the user's HOME read-only
inside the unit's namespace. docker compose --build then tried to
write ~/.docker/buildx/activity/* and got EROFS — which we'd been
misdiagnosing as a buildx wedge for the last few iterations.

Real fix: set DOCKER_CONFIG and BUILDX_CONFIG in the unit's
Environment= to a path inside ReadWritePaths. Hardening stays on,
docker CLI writes to install_dir/.docker instead of /home/<user>/.docker.

The wedge classifier now detects this case (count==0 + /home/ in
the stderr path) and emits a recipe pointing at the env-var fix
instead of the driver-rebuild path. Test added.

Wiki gets the new branch first since it's the most common cause
on systemd-managed installs.
This commit is contained in:
2026-04-24 22:07:13 -04:00
parent 257624e6a7
commit f8ef0a5cf1
3 changed files with 63 additions and 2 deletions

View File

@@ -163,6 +163,26 @@ class TestComposeWithRetry:
# can see what compose actually said.
assert "Original error" in ei.value.stderr
@patch("decnet.engine.deployer.subprocess.run")
def test_buildx_wedge_protecthome_branch(self, mock_run, monkeypatch):
"""When stderr names a path under /home and no mounts are
leaked, the cause is systemd's ProtectHome — recipe should
point at DOCKER_CONFIG redirection, not driver rebuild."""
from decnet.engine import deployer
monkeypatch.setattr(deployer, "_count_leaked_buildkit_mounts", lambda: 0)
mock_run.return_value = MagicMock(
returncode=1, stdout="",
stderr=("failed to update builder last activity time: open "
"/home/anti/.docker/buildx/activity/.tmp-x: read-only file system"),
)
with pytest.raises(subprocess.CalledProcessError) as ei:
deployer._compose_with_retry("up", "--build")
assert "ProtectHome=read-only" in ei.value.stderr
assert "DOCKER_CONFIG" in ei.value.stderr
assert "BUILDX_CONFIG" in ei.value.stderr
# Driver-rebuild recipe must NOT be the suggested fix here.
assert "buildx create --name decnet-builder" not in ei.value.stderr
@patch("decnet.engine.deployer.subprocess.run")
def test_buildx_wedge_zero_mounts_uses_driver_rebuild_recipe(self, mock_run, monkeypatch):
"""Wedge signature with 0 leaked mounts means the buildx driver
@@ -172,7 +192,9 @@ class TestComposeWithRetry:
monkeypatch.setattr(deployer, "_count_leaked_buildkit_mounts", lambda: 0)
mock_run.return_value = MagicMock(
returncode=1, stdout="",
stderr="failed to update builder last activity time: read-only file system",
# No /home/ path — driver-rebuild branch, not ProtectHome.
stderr="failed to update builder last activity time: open "
"/var/lib/decnet/.docker/buildx/activity/.tmp-x: read-only file system",
)
with pytest.raises(subprocess.CalledProcessError) as ei:
deployer._compose_with_retry("up", "--build")