From a2c34cac02cca327c10c71ce75d5fc7d0e83b482 Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 10 May 2026 09:53:25 -0400 Subject: [PATCH] fix(tests): prevent xdist worker OOM from leaked tarpit asyncio task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit asyncio_default_fixture_loop_scope was 'module', so all async tests in a module share one event loop. test_lifespan_startup_and_shutdown patched log_ingestion_worker/log_collector_worker/attacker_profile_worker but not tarpit_watcher_worker — the real while-True coroutine was created as an asyncio task on the shared loop and never cancelled. The xdist worker ran for 4+ hours (confirmed via py-spy + etime=04:48) consuming 15+ GB before OOM-kill. Fixes: - Patch tarpit_watcher_worker in both TestLifespan tests - Change asyncio_default_fixture_loop_scope to 'function' so each test gets its own loop; tasks cannot outlive their test - Add loop_scope='module' to precision_engine which legitimately needs a module-scoped event loop --- pyproject.toml | 2 +- tests/ttp/rule_precision/conftest.py | 2 +- tests/web/test_web_api.py | 10 ++++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 73ac3104..ab7a734c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,7 +116,7 @@ decnet = "decnet.cli:app" [tool.pytest.ini_options] asyncio_mode = "auto" asyncio_debug = "true" -asyncio_default_fixture_loop_scope = "module" +asyncio_default_fixture_loop_scope = "function" addopts = "-v -q -x -n 4 --dist load" norecursedirs = [ "tests/live", diff --git a/tests/ttp/rule_precision/conftest.py b/tests/ttp/rule_precision/conftest.py index c9e86ab2..a2367152 100644 --- a/tests/ttp/rule_precision/conftest.py +++ b/tests/ttp/rule_precision/conftest.py @@ -102,7 +102,7 @@ def compiled_rules() -> list[CompiledRule]: return _load_compiled_rules() -@pytest_asyncio.fixture(scope="module") +@pytest_asyncio.fixture(scope="module", loop_scope="module") async def precision_engine( compiled_rules: list[CompiledRule], ) -> RuleEngine: diff --git a/tests/web/test_web_api.py b/tests/web/test_web_api.py index 988a9d4f..bd462d00 100644 --- a/tests/web/test_web_api.py +++ b/tests/web/test_web_api.py @@ -129,8 +129,9 @@ class TestLifespan: with patch("decnet.web.api.log_ingestion_worker", return_value=asyncio.sleep(0)): with patch("decnet.web.api.log_collector_worker", return_value=asyncio.sleep(0)): with patch("decnet.web.api.attacker_profile_worker", return_value=asyncio.sleep(0)): - async with lifespan(mock_app): - mock_repo.initialize.assert_awaited_once() + with patch("decnet.web.api.tarpit_watcher_worker", return_value=asyncio.sleep(0)): + async with lifespan(mock_app): + mock_repo.initialize.assert_awaited_once() @pytest.mark.asyncio async def test_lifespan_db_retry(self): @@ -155,5 +156,6 @@ class TestLifespan: with patch("decnet.web.api.log_ingestion_worker", return_value=asyncio.sleep(0)): with patch("decnet.web.api.log_collector_worker", return_value=asyncio.sleep(0)): with patch("decnet.web.api.attacker_profile_worker", return_value=asyncio.sleep(0)): - async with lifespan(mock_app): - assert _call_count == 3 + with patch("decnet.web.api.tarpit_watcher_worker", return_value=asyncio.sleep(0)): + async with lifespan(mock_app): + assert _call_count == 3