From 82ec7f311779641060ca249de5f5e98104c208ac Mon Sep 17 00:00:00 2001 From: anti Date: Wed, 15 Apr 2026 17:49:18 -0400 Subject: [PATCH] fix: gate embedded profiler behind DECNET_EMBED_PROFILER to prevent dual-instance cursor conflict decnet deploy spawns a standalone profiler daemon AND api.py was also starting attacker_profile_worker as an asyncio task inside the web server. Both instances shared the same attacker_worker_cursor key in the state table, causing a race where one instance could skip events already claimed by the other or overwrite the cursor mid-batch. Default is now OFF (embedded profiler disabled). The standalone daemon started by decnet deploy is the single authoritative instance. Set DECNET_EMBED_PROFILER=true only when running decnet api in isolation without a full deploy. --- decnet/env.py | 6 ++++++ decnet/web/api.py | 16 +++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/decnet/env.py b/decnet/env.py index 3b30f9b..f247352 100644 --- a/decnet/env.py +++ b/decnet/env.py @@ -53,6 +53,12 @@ def _require_env(name: str) -> str: # System logging — all microservice daemons append here. DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.log") +# Set to "true" to embed the profiler inside the API process. +# Leave unset (default) when the standalone `decnet profiler --daemon` is +# running — embedding both produces two workers sharing the same DB cursor, +# which causes events to be skipped or processed twice. +DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true" + # API Options DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "0.0.0.0") # nosec B104 DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000) diff --git a/decnet/web/api.py b/decnet/web/api.py index 8d044f5..aac1249 100644 --- a/decnet/web/api.py +++ b/decnet/web/api.py @@ -9,7 +9,7 @@ from fastapi.responses import JSONResponse from pydantic import ValidationError from fastapi.middleware.cors import CORSMiddleware -from decnet.env import DECNET_CORS_ORIGINS, DECNET_DEVELOPER, DECNET_INGEST_LOG_FILE +from decnet.env import DECNET_CORS_ORIGINS, DECNET_DEVELOPER, DECNET_EMBED_PROFILER, DECNET_INGEST_LOG_FILE from decnet.logging import get_logger from decnet.web.dependencies import repo from decnet.collector import log_collector_worker @@ -65,10 +65,16 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: elif not _log_file: log.warning("DECNET_INGEST_LOG_FILE not set — Docker log collection disabled.") - # Start attacker profile rebuild worker - if attacker_task is None or attacker_task.done(): - attacker_task = asyncio.create_task(attacker_profile_worker(repo)) - log.debug("API startup attacker profile worker started") + # Start attacker profile rebuild worker only when explicitly requested. + # Default is OFF because `decnet deploy` always starts a standalone + # `decnet profiler --daemon` process. Running both against the same + # DB cursor causes events to be skipped or double-processed. + if DECNET_EMBED_PROFILER: + if attacker_task is None or attacker_task.done(): + attacker_task = asyncio.create_task(attacker_profile_worker(repo)) + log.info("API startup: embedded profiler started (DECNET_EMBED_PROFILER=true)") + else: + log.debug("API startup: profiler not embedded — expecting standalone daemon") # Start fleet-wide MACVLAN sniffer (fault-isolated — never crashes the API) try: