diff --git a/decnet/engine/deployer.py b/decnet/engine/deployer.py index c88b30a2..89569cb0 100644 --- a/decnet/engine/deployer.py +++ b/decnet/engine/deployer.py @@ -162,6 +162,21 @@ def _count_leaked_buildkit_mounts() -> int: return 0 +def _format_subprocess_error(exc: BaseException) -> str: + """Stringify CalledProcessError so stderr actually shows up. + + The default str(CalledProcessError) is just 'Command ... returned + non-zero exit status N', which drops the stderr we carefully stuff + our buildx recovery hint into. Status reasons and deploy-failure + log lines were losing the payload — surface it here instead. + """ + if isinstance(exc, subprocess.CalledProcessError): + stderr = (exc.stderr or "").strip() + if stderr: + return f"{exc}: {stderr}" + return str(exc) + + def _buildx_recovery_hint(extra: str = "") -> str: head = ( "Buildx is wedged — Docker's build driver has leaked bind " @@ -505,7 +520,8 @@ async def _deploy_on_agent(repo, topology_id: str, hydrated: dict) -> None: topology_id, host.get("name"), exc, ) await transition_status( - repo, topology_id, TopologyStatus.FAILED, reason=str(exc) + repo, topology_id, TopologyStatus.FAILED, + reason=_format_subprocess_error(exc), ) raise @@ -691,7 +707,8 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N except OSError: # pragma: no cover pass await transition_status( - repo, topology_id, TopologyStatus.FAILED, reason=str(exc) + repo, topology_id, TopologyStatus.FAILED, + reason=_format_subprocess_error(exc), ) raise diff --git a/decnet/web/router/topology/api_deploy_topology.py b/decnet/web/router/topology/api_deploy_topology.py index c5b764b1..3256c015 100644 --- a/decnet/web/router/topology/api_deploy_topology.py +++ b/decnet/web/router/topology/api_deploy_topology.py @@ -34,7 +34,11 @@ async def _run_deploy(topology_id: str) -> None: except asyncio.CancelledError: # pragma: no cover — shutdown raise except Exception as exc: # noqa: BLE001 - log.error("background deploy of %s failed: %s", topology_id, exc) + from decnet.engine.deployer import _format_subprocess_error + log.error( + "background deploy of %s failed: %s", + topology_id, _format_subprocess_error(exc), + ) @router.post(