fix(engine): surface CalledProcessError.stderr in deploy-failure log + status reason
str(CalledProcessError) is just 'Command ... returned non-zero exit
status N' — the stderr (where the buildx recovery hint lives) was
being silently dropped from both the deploy log line and the
persisted 'failed' status reason.
New _format_subprocess_error helper appends .stderr when the
exception is a CalledProcessError. Applied to transition_status
reason and the background-deploy log message so operators and the
UI see the real failure, not just the exit code.
This is what makes the buildx preflight hint from 86b9dec actually
reach the user.
This commit is contained in:
@@ -162,6 +162,21 @@ def _count_leaked_buildkit_mounts() -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def _format_subprocess_error(exc: BaseException) -> str:
|
||||
"""Stringify CalledProcessError so stderr actually shows up.
|
||||
|
||||
The default str(CalledProcessError) is just 'Command ... returned
|
||||
non-zero exit status N', which drops the stderr we carefully stuff
|
||||
our buildx recovery hint into. Status reasons and deploy-failure
|
||||
log lines were losing the payload — surface it here instead.
|
||||
"""
|
||||
if isinstance(exc, subprocess.CalledProcessError):
|
||||
stderr = (exc.stderr or "").strip()
|
||||
if stderr:
|
||||
return f"{exc}: {stderr}"
|
||||
return str(exc)
|
||||
|
||||
|
||||
def _buildx_recovery_hint(extra: str = "") -> str:
|
||||
head = (
|
||||
"Buildx is wedged — Docker's build driver has leaked bind "
|
||||
@@ -505,7 +520,8 @@ async def _deploy_on_agent(repo, topology_id: str, hydrated: dict) -> None:
|
||||
topology_id, host.get("name"), exc,
|
||||
)
|
||||
await transition_status(
|
||||
repo, topology_id, TopologyStatus.FAILED, reason=str(exc)
|
||||
repo, topology_id, TopologyStatus.FAILED,
|
||||
reason=_format_subprocess_error(exc),
|
||||
)
|
||||
raise
|
||||
|
||||
@@ -691,7 +707,8 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N
|
||||
except OSError: # pragma: no cover
|
||||
pass
|
||||
await transition_status(
|
||||
repo, topology_id, TopologyStatus.FAILED, reason=str(exc)
|
||||
repo, topology_id, TopologyStatus.FAILED,
|
||||
reason=_format_subprocess_error(exc),
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@@ -34,7 +34,11 @@ async def _run_deploy(topology_id: str) -> None:
|
||||
except asyncio.CancelledError: # pragma: no cover — shutdown
|
||||
raise
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.error("background deploy of %s failed: %s", topology_id, exc)
|
||||
from decnet.engine.deployer import _format_subprocess_error
|
||||
log.error(
|
||||
"background deploy of %s failed: %s",
|
||||
topology_id, _format_subprocess_error(exc),
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
|
||||
Reference in New Issue
Block a user