fix: gate embedded sniffer behind DECNET_EMBED_SNIFFER (default off)
The API's lifespan unconditionally spawned a MACVLAN sniffer task, which duplicated the standalone 'decnet sniffer --daemon' process that 'decnet deploy' always starts — causing two workers to sniff the same interface, double events, and wasted CPU. Mirror the existing DECNET_EMBED_PROFILER pattern: sniffer is OFF by default, opt in explicitly. Static regression tests guard against accidental removal of the gate.
This commit is contained in:
@@ -59,6 +59,12 @@ DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.lo
|
||||
# which causes events to be skipped or processed twice.
|
||||
DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
|
||||
|
||||
# Set to "true" to embed the MACVLAN sniffer inside the API process.
|
||||
# Leave unset (default) when the standalone `decnet sniffer --daemon` is
|
||||
# running (which `decnet deploy` always does). Embedding both produces two
|
||||
# workers sniffing the same interface — duplicated events and wasted CPU.
|
||||
DECNET_EMBED_SNIFFER: bool = os.environ.get("DECNET_EMBED_SNIFFER", "").lower() == "true"
|
||||
|
||||
# Set to "true" to mount the Pyinstrument ASGI middleware on the FastAPI app.
|
||||
# Produces per-request HTML flamegraphs under ./profiles/. Off by default so
|
||||
# production and normal dev runs pay zero profiling overhead.
|
||||
|
||||
@@ -13,6 +13,7 @@ from decnet.env import (
|
||||
DECNET_CORS_ORIGINS,
|
||||
DECNET_DEVELOPER,
|
||||
DECNET_EMBED_PROFILER,
|
||||
DECNET_EMBED_SNIFFER,
|
||||
DECNET_INGEST_LOG_FILE,
|
||||
DECNET_PROFILE_DIR,
|
||||
DECNET_PROFILE_REQUESTS,
|
||||
@@ -97,14 +98,20 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
else:
|
||||
log.debug("API startup: profiler not embedded — expecting standalone daemon")
|
||||
|
||||
# Start fleet-wide MACVLAN sniffer (fault-isolated — never crashes the API)
|
||||
# Start fleet-wide MACVLAN sniffer only when explicitly requested.
|
||||
# Default is OFF because `decnet deploy` always starts a standalone
|
||||
# `decnet sniffer --daemon` process. Running both against the same
|
||||
# interface produces duplicated events and wastes CPU.
|
||||
if DECNET_EMBED_SNIFFER:
|
||||
try:
|
||||
from decnet.sniffer import sniffer_worker
|
||||
if sniffer_task is None or sniffer_task.done():
|
||||
sniffer_task = asyncio.create_task(sniffer_worker(_log_file))
|
||||
log.debug("API startup sniffer worker started")
|
||||
log.info("API startup: embedded sniffer started (DECNET_EMBED_SNIFFER=true)")
|
||||
except Exception as exc:
|
||||
log.warning("Sniffer worker failed to start — API continues without sniffing: %s", exc)
|
||||
else:
|
||||
log.debug("API startup: sniffer not embedded — expecting standalone daemon")
|
||||
else:
|
||||
log.info("Contract Test Mode: skipping background worker startup")
|
||||
|
||||
|
||||
45
tests/test_embedded_workers.py
Normal file
45
tests/test_embedded_workers.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""
|
||||
Regression guards for workers that duplicate standalone daemons.
|
||||
|
||||
`decnet deploy` starts standalone `decnet sniffer --daemon` and
|
||||
`decnet profiler --daemon` processes. The API's lifespan must not spawn
|
||||
its own copies unless the operator explicitly opts in via env flags.
|
||||
|
||||
These tests are intentionally static: we don't spin up lifespan, because
|
||||
scapy's sniff thread doesn't cooperate with asyncio cancellation and
|
||||
hangs pytest teardown.
|
||||
"""
|
||||
import importlib
|
||||
import inspect
|
||||
|
||||
|
||||
def test_embed_sniffer_defaults_off(monkeypatch):
|
||||
monkeypatch.delenv("DECNET_EMBED_SNIFFER", raising=False)
|
||||
import decnet.env
|
||||
importlib.reload(decnet.env)
|
||||
assert decnet.env.DECNET_EMBED_SNIFFER is False
|
||||
|
||||
|
||||
def test_embed_sniffer_flag_is_truthy_on_opt_in(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_EMBED_SNIFFER", "true")
|
||||
import decnet.env
|
||||
importlib.reload(decnet.env)
|
||||
assert decnet.env.DECNET_EMBED_SNIFFER is True
|
||||
|
||||
|
||||
def test_api_lifespan_gates_sniffer_on_embed_flag():
|
||||
"""The lifespan source must reference the gate flag before spawning the
|
||||
sniffer task — catches accidental removal of the guard in future edits."""
|
||||
import decnet.web.api
|
||||
src = inspect.getsource(decnet.web.api.lifespan)
|
||||
assert "DECNET_EMBED_SNIFFER" in src, "sniffer gate removed from lifespan"
|
||||
assert "sniffer_worker" in src
|
||||
# Gate must appear before the task creation.
|
||||
assert src.index("DECNET_EMBED_SNIFFER") < src.index("sniffer_worker")
|
||||
|
||||
|
||||
def test_api_lifespan_gates_profiler_on_embed_flag():
|
||||
import decnet.web.api
|
||||
src = inspect.getsource(decnet.web.api.lifespan)
|
||||
assert "DECNET_EMBED_PROFILER" in src
|
||||
assert src.index("DECNET_EMBED_PROFILER") < src.index("attacker_profile_worker")
|
||||
Reference in New Issue
Block a user