merge testing->tomerge/main #7
@@ -12,6 +12,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
@@ -285,10 +286,20 @@ async def log_collector_worker(log_file: str) -> None:
|
|||||||
active: dict[str, asyncio.Task[None]] = {}
|
active: dict[str, asyncio.Task[None]] = {}
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
|
# Dedicated thread pool so long-running container log streams don't
|
||||||
|
# saturate the default asyncio executor and starve short-lived
|
||||||
|
# to_thread() calls elsewhere (e.g. load_state in the web API).
|
||||||
|
collector_pool = ThreadPoolExecutor(
|
||||||
|
max_workers=64, thread_name_prefix="decnet-collector",
|
||||||
|
)
|
||||||
|
|
||||||
def _spawn(container_id: str, container_name: str) -> None:
|
def _spawn(container_id: str, container_name: str) -> None:
|
||||||
if container_id not in active or active[container_id].done():
|
if container_id not in active or active[container_id].done():
|
||||||
active[container_id] = asyncio.ensure_future(
|
active[container_id] = asyncio.ensure_future(
|
||||||
asyncio.to_thread(_stream_container, container_id, log_path, json_path),
|
loop.run_in_executor(
|
||||||
|
collector_pool, _stream_container,
|
||||||
|
container_id, log_path, json_path,
|
||||||
|
),
|
||||||
loop=loop,
|
loop=loop,
|
||||||
)
|
)
|
||||||
logger.info("collector: streaming container=%s", container_name)
|
logger.info("collector: streaming container=%s", container_name)
|
||||||
@@ -312,12 +323,15 @@ async def log_collector_worker(log_file: str) -> None:
|
|||||||
if cid and is_service_event(attrs):
|
if cid and is_service_event(attrs):
|
||||||
loop.call_soon_threadsafe(_spawn, cid, name)
|
loop.call_soon_threadsafe(_spawn, cid, name)
|
||||||
|
|
||||||
await asyncio.to_thread(_watch_events)
|
await loop.run_in_executor(collector_pool, _watch_events)
|
||||||
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
logger.info("collector shutdown requested cancelling %d tasks", len(active))
|
logger.info("collector shutdown requested cancelling %d tasks", len(active))
|
||||||
for task in active.values():
|
for task in active.values():
|
||||||
task.cancel()
|
task.cancel()
|
||||||
|
collector_pool.shutdown(wait=False)
|
||||||
raise
|
raise
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.error("collector error: %s", exc)
|
logger.error("collector error: %s", exc)
|
||||||
|
finally:
|
||||||
|
collector_pool.shutdown(wait=False)
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import asyncio
|
|||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import threading
|
import threading
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
@@ -130,12 +131,25 @@ async def sniffer_worker(log_file: str) -> None:
|
|||||||
|
|
||||||
stop_event = threading.Event()
|
stop_event = threading.Event()
|
||||||
|
|
||||||
|
# Dedicated thread pool so the long-running sniff loop doesn't
|
||||||
|
# occupy a slot in the default asyncio executor.
|
||||||
|
sniffer_pool = ThreadPoolExecutor(
|
||||||
|
max_workers=2, thread_name_prefix="decnet-sniffer",
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await asyncio.to_thread(_sniff_loop, interface, log_path, json_path, stop_event)
|
loop = asyncio.get_running_loop()
|
||||||
|
await loop.run_in_executor(
|
||||||
|
sniffer_pool, _sniff_loop,
|
||||||
|
interface, log_path, json_path, stop_event,
|
||||||
|
)
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
logger.info("sniffer: shutdown requested")
|
logger.info("sniffer: shutdown requested")
|
||||||
stop_event.set()
|
stop_event.set()
|
||||||
|
sniffer_pool.shutdown(wait=False)
|
||||||
raise
|
raise
|
||||||
|
finally:
|
||||||
|
sniffer_pool.shutdown(wait=False)
|
||||||
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
raise
|
raise
|
||||||
|
|||||||
94
tests/test_collector_thread_pool.py
Normal file
94
tests/test_collector_thread_pool.py
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
"""Verify that the collector and sniffer use dedicated thread pools
|
||||||
|
instead of the default asyncio executor — preventing starvation of
|
||||||
|
short-lived ``asyncio.to_thread`` calls in the web API layer."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from unittest.mock import patch, MagicMock, AsyncMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from decnet.collector.worker import log_collector_worker
|
||||||
|
from decnet.sniffer.worker import sniffer_worker
|
||||||
|
|
||||||
|
|
||||||
|
class TestCollectorDedicatedPool:
|
||||||
|
"""Collector log streams must NOT use the default asyncio executor."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stream_containers_use_dedicated_pool(self, tmp_path):
|
||||||
|
"""Spawning container log threads should go through a dedicated
|
||||||
|
ThreadPoolExecutor, not the default loop executor."""
|
||||||
|
log_file = str(tmp_path / "decnet.log")
|
||||||
|
|
||||||
|
captured_executors: list[ThreadPoolExecutor | None] = []
|
||||||
|
original_run_in_executor = asyncio.get_event_loop().run_in_executor
|
||||||
|
|
||||||
|
async def _spy_run_in_executor(executor, func, *args):
|
||||||
|
captured_executors.append(executor)
|
||||||
|
# Don't actually run the blocking function — raise to exit.
|
||||||
|
raise asyncio.CancelledError
|
||||||
|
|
||||||
|
fake_container = MagicMock()
|
||||||
|
fake_container.id = "abc123"
|
||||||
|
fake_container.name = "/omega-decky-http"
|
||||||
|
|
||||||
|
fake_client = MagicMock()
|
||||||
|
fake_client.containers.list.return_value = [fake_container]
|
||||||
|
|
||||||
|
mock_docker = MagicMock()
|
||||||
|
mock_docker.from_env.return_value = fake_client
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.dict("sys.modules", {"docker": mock_docker}),
|
||||||
|
patch(
|
||||||
|
"decnet.collector.worker.is_service_container",
|
||||||
|
return_value=True,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
|
with patch.object(loop, "run_in_executor", side_effect=_spy_run_in_executor):
|
||||||
|
with pytest.raises(asyncio.CancelledError):
|
||||||
|
await log_collector_worker(log_file)
|
||||||
|
|
||||||
|
# The executor passed should be a dedicated pool, not None (default).
|
||||||
|
assert len(captured_executors) >= 1
|
||||||
|
for executor in captured_executors:
|
||||||
|
assert executor is not None, (
|
||||||
|
"Collector used default executor (None) — must use a dedicated pool"
|
||||||
|
)
|
||||||
|
assert isinstance(executor, ThreadPoolExecutor)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSnifferDedicatedPool:
|
||||||
|
"""Sniffer sniff loop must NOT use the default asyncio executor."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_sniff_loop_uses_dedicated_pool(self, tmp_path):
|
||||||
|
log_file = str(tmp_path / "decnet.log")
|
||||||
|
|
||||||
|
captured_executors: list[ThreadPoolExecutor | None] = []
|
||||||
|
|
||||||
|
async def _spy_run_in_executor(executor, func, *args):
|
||||||
|
captured_executors.append(executor)
|
||||||
|
raise asyncio.CancelledError
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"decnet.sniffer.worker._interface_exists",
|
||||||
|
return_value=True,
|
||||||
|
),
|
||||||
|
patch.dict("os.environ", {"DECNET_SNIFFER_IFACE": "eth0"}),
|
||||||
|
):
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
with patch.object(loop, "run_in_executor", side_effect=_spy_run_in_executor):
|
||||||
|
with pytest.raises(asyncio.CancelledError):
|
||||||
|
await sniffer_worker(log_file)
|
||||||
|
|
||||||
|
assert len(captured_executors) >= 1
|
||||||
|
for executor in captured_executors:
|
||||||
|
assert executor is not None, (
|
||||||
|
"Sniffer used default executor (None) — must use a dedicated pool"
|
||||||
|
)
|
||||||
|
assert isinstance(executor, ThreadPoolExecutor)
|
||||||
Reference in New Issue
Block a user