From 99bc9a8b6db9189e00f148c537b911fb9407d873 Mon Sep 17 00:00:00 2001 From: anti Date: Fri, 24 Apr 2026 22:14:08 -0400 Subject: [PATCH] fix(engine): offload blocking compose to a worker thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit deploy_topology and teardown_topology are async, but every _compose_with_retry / _compose call inside them was running in the main event loop via subprocess.run — which means a multi-minute docker compose --build froze the entire API: other endpoints, mutator events, SSE streams, status polls. The user noticed when a 2-decky deploy blocked everything else for the duration of the build. Wrap both calls in anyio.to_thread.run_sync. Same pattern the mutator engine has been using at engine.py:104 since forever. Per-LAN bridge create/remove docker SDK calls are still synchronous in the loop — they're individually fast (~50-200ms per LAN) and the loops are bounded by topology size, so they don't dominate. Worth revisiting if a 200-LAN deploy turns out to stall noticeably. --- decnet/engine/deployer.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/decnet/engine/deployer.py b/decnet/engine/deployer.py index 0ac7fa32..f73be847 100644 --- a/decnet/engine/deployer.py +++ b/decnet/engine/deployer.py @@ -8,6 +8,7 @@ import subprocess # nosec B404 import time from pathlib import Path +import anyio import docker from rich.console import Console from rich.table import Table @@ -744,7 +745,14 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N console.print( f"[bold cyan]Topology compose file written[/] → {compose_path}" ) - _compose_with_retry("up", "--build", "-d", compose_file=compose_path) + # Offload to a worker thread so the API event loop stays + # responsive during the build — otherwise every other request + # (mutator events, SSE, status polls) waits behind compose. + await anyio.to_thread.run_sync( + lambda: _compose_with_retry( + "up", "--build", "-d", compose_file=compose_path, + ), + ) compose_started = True except Exception as exc: log.error("topology %s deploy failed: %s", topology_id, exc) @@ -808,7 +816,11 @@ async def teardown_topology(repo, topology_id: str) -> None: if compose_path.exists(): try: - _compose("down", "--remove-orphans", compose_file=compose_path) + await anyio.to_thread.run_sync( + lambda: _compose( + "down", "--remove-orphans", compose_file=compose_path, + ), + ) except subprocess.CalledProcessError as exc: log.warning( "topology %s compose down failed (continuing): %s",