From 140d2fbaad9c75d70ddfd3885ec37fb967050638 Mon Sep 17 00:00:00 2001 From: anti Date: Fri, 17 Apr 2026 13:35:43 -0400 Subject: [PATCH] fix: gate embedded sniffer behind DECNET_EMBED_SNIFFER (default off) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The API's lifespan unconditionally spawned a MACVLAN sniffer task, which duplicated the standalone 'decnet sniffer --daemon' process that 'decnet deploy' always starts — causing two workers to sniff the same interface, double events, and wasted CPU. Mirror the existing DECNET_EMBED_PROFILER pattern: sniffer is OFF by default, opt in explicitly. Static regression tests guard against accidental removal of the gate. --- decnet/env.py | 6 +++++ decnet/web/api.py | 23 +++++++++++------ tests/test_embedded_workers.py | 45 ++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 8 deletions(-) create mode 100644 tests/test_embedded_workers.py diff --git a/decnet/env.py b/decnet/env.py index 7cb163b..290e949 100644 --- a/decnet/env.py +++ b/decnet/env.py @@ -59,6 +59,12 @@ DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.lo # which causes events to be skipped or processed twice. DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true" +# Set to "true" to embed the MACVLAN sniffer inside the API process. +# Leave unset (default) when the standalone `decnet sniffer --daemon` is +# running (which `decnet deploy` always does). Embedding both produces two +# workers sniffing the same interface — duplicated events and wasted CPU. +DECNET_EMBED_SNIFFER: bool = os.environ.get("DECNET_EMBED_SNIFFER", "").lower() == "true" + # Set to "true" to mount the Pyinstrument ASGI middleware on the FastAPI app. # Produces per-request HTML flamegraphs under ./profiles/. Off by default so # production and normal dev runs pay zero profiling overhead. diff --git a/decnet/web/api.py b/decnet/web/api.py index c837941..be5c445 100644 --- a/decnet/web/api.py +++ b/decnet/web/api.py @@ -13,6 +13,7 @@ from decnet.env import ( DECNET_CORS_ORIGINS, DECNET_DEVELOPER, DECNET_EMBED_PROFILER, + DECNET_EMBED_SNIFFER, DECNET_INGEST_LOG_FILE, DECNET_PROFILE_DIR, DECNET_PROFILE_REQUESTS, @@ -97,14 +98,20 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: else: log.debug("API startup: profiler not embedded — expecting standalone daemon") - # Start fleet-wide MACVLAN sniffer (fault-isolated — never crashes the API) - try: - from decnet.sniffer import sniffer_worker - if sniffer_task is None or sniffer_task.done(): - sniffer_task = asyncio.create_task(sniffer_worker(_log_file)) - log.debug("API startup sniffer worker started") - except Exception as exc: - log.warning("Sniffer worker failed to start — API continues without sniffing: %s", exc) + # Start fleet-wide MACVLAN sniffer only when explicitly requested. + # Default is OFF because `decnet deploy` always starts a standalone + # `decnet sniffer --daemon` process. Running both against the same + # interface produces duplicated events and wastes CPU. + if DECNET_EMBED_SNIFFER: + try: + from decnet.sniffer import sniffer_worker + if sniffer_task is None or sniffer_task.done(): + sniffer_task = asyncio.create_task(sniffer_worker(_log_file)) + log.info("API startup: embedded sniffer started (DECNET_EMBED_SNIFFER=true)") + except Exception as exc: + log.warning("Sniffer worker failed to start — API continues without sniffing: %s", exc) + else: + log.debug("API startup: sniffer not embedded — expecting standalone daemon") else: log.info("Contract Test Mode: skipping background worker startup") diff --git a/tests/test_embedded_workers.py b/tests/test_embedded_workers.py new file mode 100644 index 0000000..4db04e5 --- /dev/null +++ b/tests/test_embedded_workers.py @@ -0,0 +1,45 @@ +""" +Regression guards for workers that duplicate standalone daemons. + +`decnet deploy` starts standalone `decnet sniffer --daemon` and +`decnet profiler --daemon` processes. The API's lifespan must not spawn +its own copies unless the operator explicitly opts in via env flags. + +These tests are intentionally static: we don't spin up lifespan, because +scapy's sniff thread doesn't cooperate with asyncio cancellation and +hangs pytest teardown. +""" +import importlib +import inspect + + +def test_embed_sniffer_defaults_off(monkeypatch): + monkeypatch.delenv("DECNET_EMBED_SNIFFER", raising=False) + import decnet.env + importlib.reload(decnet.env) + assert decnet.env.DECNET_EMBED_SNIFFER is False + + +def test_embed_sniffer_flag_is_truthy_on_opt_in(monkeypatch): + monkeypatch.setenv("DECNET_EMBED_SNIFFER", "true") + import decnet.env + importlib.reload(decnet.env) + assert decnet.env.DECNET_EMBED_SNIFFER is True + + +def test_api_lifespan_gates_sniffer_on_embed_flag(): + """The lifespan source must reference the gate flag before spawning the + sniffer task — catches accidental removal of the guard in future edits.""" + import decnet.web.api + src = inspect.getsource(decnet.web.api.lifespan) + assert "DECNET_EMBED_SNIFFER" in src, "sniffer gate removed from lifespan" + assert "sniffer_worker" in src + # Gate must appear before the task creation. + assert src.index("DECNET_EMBED_SNIFFER") < src.index("sniffer_worker") + + +def test_api_lifespan_gates_profiler_on_embed_flag(): + import decnet.web.api + src = inspect.getsource(decnet.web.api.lifespan) + assert "DECNET_EMBED_PROFILER" in src + assert src.index("DECNET_EMBED_PROFILER") < src.index("attacker_profile_worker")