fix: gate embedded sniffer behind DECNET_EMBED_SNIFFER (default off)

The API's lifespan unconditionally spawned a MACVLAN sniffer task, which
duplicated the standalone 'decnet sniffer --daemon' process that
'decnet deploy' always starts — causing two workers to sniff the same
interface, double events, and wasted CPU.

Mirror the existing DECNET_EMBED_PROFILER pattern: sniffer is OFF by
default, opt in explicitly. Static regression tests guard against
accidental removal of the gate.
This commit is contained in:
2026-04-17 13:35:43 -04:00
parent 064c8760b6
commit 140d2fbaad
3 changed files with 66 additions and 8 deletions

View File

@@ -59,6 +59,12 @@ DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.lo
# which causes events to be skipped or processed twice.
DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
# Set to "true" to embed the MACVLAN sniffer inside the API process.
# Leave unset (default) when the standalone `decnet sniffer --daemon` is
# running (which `decnet deploy` always does). Embedding both produces two
# workers sniffing the same interface — duplicated events and wasted CPU.
DECNET_EMBED_SNIFFER: bool = os.environ.get("DECNET_EMBED_SNIFFER", "").lower() == "true"
# Set to "true" to mount the Pyinstrument ASGI middleware on the FastAPI app.
# Produces per-request HTML flamegraphs under ./profiles/. Off by default so
# production and normal dev runs pay zero profiling overhead.

View File

@@ -13,6 +13,7 @@ from decnet.env import (
DECNET_CORS_ORIGINS,
DECNET_DEVELOPER,
DECNET_EMBED_PROFILER,
DECNET_EMBED_SNIFFER,
DECNET_INGEST_LOG_FILE,
DECNET_PROFILE_DIR,
DECNET_PROFILE_REQUESTS,
@@ -97,14 +98,20 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
else:
log.debug("API startup: profiler not embedded — expecting standalone daemon")
# Start fleet-wide MACVLAN sniffer (fault-isolated — never crashes the API)
try:
from decnet.sniffer import sniffer_worker
if sniffer_task is None or sniffer_task.done():
sniffer_task = asyncio.create_task(sniffer_worker(_log_file))
log.debug("API startup sniffer worker started")
except Exception as exc:
log.warning("Sniffer worker failed to start — API continues without sniffing: %s", exc)
# Start fleet-wide MACVLAN sniffer only when explicitly requested.
# Default is OFF because `decnet deploy` always starts a standalone
# `decnet sniffer --daemon` process. Running both against the same
# interface produces duplicated events and wastes CPU.
if DECNET_EMBED_SNIFFER:
try:
from decnet.sniffer import sniffer_worker
if sniffer_task is None or sniffer_task.done():
sniffer_task = asyncio.create_task(sniffer_worker(_log_file))
log.info("API startup: embedded sniffer started (DECNET_EMBED_SNIFFER=true)")
except Exception as exc:
log.warning("Sniffer worker failed to start — API continues without sniffing: %s", exc)
else:
log.debug("API startup: sniffer not embedded — expecting standalone daemon")
else:
log.info("Contract Test Mode: skipping background worker startup")