fix: gate embedded profiler behind DECNET_EMBED_PROFILER to prevent dual-instance cursor conflict
decnet deploy spawns a standalone profiler daemon AND api.py was also starting attacker_profile_worker as an asyncio task inside the web server. Both instances shared the same attacker_worker_cursor key in the state table, causing a race where one instance could skip events already claimed by the other or overwrite the cursor mid-batch. Default is now OFF (embedded profiler disabled). The standalone daemon started by decnet deploy is the single authoritative instance. Set DECNET_EMBED_PROFILER=true only when running decnet api in isolation without a full deploy.
This commit is contained in:
@@ -53,6 +53,12 @@ def _require_env(name: str) -> str:
|
|||||||
# System logging — all microservice daemons append here.
|
# System logging — all microservice daemons append here.
|
||||||
DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.log")
|
DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.log")
|
||||||
|
|
||||||
|
# Set to "true" to embed the profiler inside the API process.
|
||||||
|
# Leave unset (default) when the standalone `decnet profiler --daemon` is
|
||||||
|
# running — embedding both produces two workers sharing the same DB cursor,
|
||||||
|
# which causes events to be skipped or processed twice.
|
||||||
|
DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
|
||||||
|
|
||||||
# API Options
|
# API Options
|
||||||
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "0.0.0.0") # nosec B104
|
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "0.0.0.0") # nosec B104
|
||||||
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
|
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from fastapi.responses import JSONResponse
|
|||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
from decnet.env import DECNET_CORS_ORIGINS, DECNET_DEVELOPER, DECNET_INGEST_LOG_FILE
|
from decnet.env import DECNET_CORS_ORIGINS, DECNET_DEVELOPER, DECNET_EMBED_PROFILER, DECNET_INGEST_LOG_FILE
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.web.dependencies import repo
|
from decnet.web.dependencies import repo
|
||||||
from decnet.collector import log_collector_worker
|
from decnet.collector import log_collector_worker
|
||||||
@@ -65,10 +65,16 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
|||||||
elif not _log_file:
|
elif not _log_file:
|
||||||
log.warning("DECNET_INGEST_LOG_FILE not set — Docker log collection disabled.")
|
log.warning("DECNET_INGEST_LOG_FILE not set — Docker log collection disabled.")
|
||||||
|
|
||||||
# Start attacker profile rebuild worker
|
# Start attacker profile rebuild worker only when explicitly requested.
|
||||||
|
# Default is OFF because `decnet deploy` always starts a standalone
|
||||||
|
# `decnet profiler --daemon` process. Running both against the same
|
||||||
|
# DB cursor causes events to be skipped or double-processed.
|
||||||
|
if DECNET_EMBED_PROFILER:
|
||||||
if attacker_task is None or attacker_task.done():
|
if attacker_task is None or attacker_task.done():
|
||||||
attacker_task = asyncio.create_task(attacker_profile_worker(repo))
|
attacker_task = asyncio.create_task(attacker_profile_worker(repo))
|
||||||
log.debug("API startup attacker profile worker started")
|
log.info("API startup: embedded profiler started (DECNET_EMBED_PROFILER=true)")
|
||||||
|
else:
|
||||||
|
log.debug("API startup: profiler not embedded — expecting standalone daemon")
|
||||||
|
|
||||||
# Start fleet-wide MACVLAN sniffer (fault-isolated — never crashes the API)
|
# Start fleet-wide MACVLAN sniffer (fault-isolated — never crashes the API)
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user