From 45039bd62167a5339e3c377485e85944dae358d1 Mon Sep 17 00:00:00 2001 From: anti Date: Fri, 17 Apr 2026 16:23:00 -0400 Subject: [PATCH] fix(cache): lazy-init TTL cache locks to survive event-loop turnover A module-level asyncio.Lock binds to the loop it was first awaited on. Under pytest-anyio (and xdist) each test spins up a new loop; any later test that hit /health or /config would wait on a lock owned by a dead loop and the whole worker would hang. Create the lock on first use and drop it in the test-reset helpers so a fresh loop always gets a fresh lock. --- decnet/web/router/config/api_get_config.py | 3 +++ decnet/web/router/health/api_get_health.py | 11 ++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/decnet/web/router/config/api_get_config.py b/decnet/web/router/config/api_get_config.py index e47cceb..3e751aa 100644 --- a/decnet/web/router/config/api_get_config.py +++ b/decnet/web/router/config/api_get_config.py @@ -24,6 +24,9 @@ _state_locks: dict[str, asyncio.Lock] = {} def _reset_state_cache() -> None: """Reset cached config state — used by tests.""" _state_cache.clear() + # Drop any locks bound to the previous event loop — reusing one from + # a dead loop deadlocks the next test. + _state_locks.clear() async def _get_state_cached(name: str) -> Optional[dict[str, Any]]: diff --git a/decnet/web/router/health/api_get_health.py b/decnet/web/router/health/api_get_health.py index f9bac90..ad39d76 100644 --- a/decnet/web/router/health/api_get_health.py +++ b/decnet/web/router/health/api_get_health.py @@ -24,7 +24,9 @@ _DOCKER_CHECK_INTERVAL = 5.0 # seconds between actual Docker pings # repo.get_total_logs() and filling the aiosqlite queue. _db_component: Optional[ComponentHealth] = None _db_last_check: float = 0.0 -_db_lock = asyncio.Lock() +# Lazy-init — an asyncio.Lock bound to a dead event loop deadlocks any +# later test running under a fresh loop. Create on first use. +_db_lock: Optional[asyncio.Lock] = None _DB_CHECK_INTERVAL = 1.0 # seconds @@ -39,16 +41,19 @@ def _reset_docker_cache() -> None: def _reset_db_cache() -> None: """Reset cached DB liveness — used by tests.""" - global _db_component, _db_last_check + global _db_component, _db_last_check, _db_lock _db_component = None _db_last_check = 0.0 + _db_lock = None async def _check_database_cached() -> ComponentHealth: - global _db_component, _db_last_check + global _db_component, _db_last_check, _db_lock now = time.monotonic() if _db_component is not None and now - _db_last_check < _DB_CHECK_INTERVAL: return _db_component + if _db_lock is None: + _db_lock = asyncio.Lock() async with _db_lock: now = time.monotonic() if _db_component is not None and now - _db_last_check < _DB_CHECK_INTERVAL: