feat: complete OTEL tracing across all services with pipeline bridge and docs

Extends tracing to every remaining module: all 23 API route handlers,
correlation engine, sniffer (fingerprint/p0f/syslog), prober (jarm/hassh/tcpfp),
profiler behavioral analysis, logging subsystem, engine, and mutator.

Bridges the ingester→SSE trace gap by persisting trace_id/span_id columns on
the logs table and creating OTEL span links in the SSE endpoint. Adds log-trace
correlation via _TraceContextFilter injecting otel_trace_id into Python LogRecords.

Includes development/docs/TRACING.md with full span reference (76 spans),
pipeline propagation architecture, quick start guide, and troubleshooting.
This commit is contained in:
2026-04-16 00:58:08 -04:00
parent 04db13afae
commit 70d8ffc607
38 changed files with 577 additions and 124 deletions

View File

@@ -3,6 +3,7 @@ import os
from fastapi import APIRouter, Depends, HTTPException
from decnet.logging import get_logger
from decnet.telemetry import traced as _traced
from decnet.config import DEFAULT_MUTATE_INTERVAL, DecnetConfig, _ROOT
from decnet.engine import deploy as _deploy
from decnet.ini_loader import load_ini_from_string
@@ -27,6 +28,7 @@ router = APIRouter()
500: {"description": "Deployment failed"}
}
)
@_traced("api.deploy_deckies")
async def api_deploy_deckies(req: DeployIniRequest, admin: dict = Depends(require_admin)) -> dict[str, str]:
from decnet.fleet import build_deckies_from_ini

View File

@@ -2,6 +2,7 @@ from typing import Any
from fastapi import APIRouter, Depends
from decnet.telemetry import traced as _traced
from decnet.web.dependencies import require_viewer, repo
router = APIRouter()
@@ -9,5 +10,6 @@ router = APIRouter()
@router.get("/deckies", tags=["Fleet Management"],
responses={401: {"description": "Could not validate credentials"}, 422: {"description": "Validation error"}},)
@_traced("api.get_deckies")
async def get_deckies(user: dict = Depends(require_viewer)) -> list[dict[str, Any]]:
return await repo.get_deckies()

View File

@@ -1,6 +1,7 @@
import os
from fastapi import APIRouter, Depends, HTTPException, Path
from decnet.telemetry import traced as _traced
from decnet.mutator import mutate_decky
from decnet.web.dependencies import require_admin, repo
@@ -12,6 +13,7 @@ router = APIRouter()
tags=["Fleet Management"],
responses={401: {"description": "Could not validate credentials"}, 403: {"description": "Insufficient permissions"}, 404: {"description": "Decky not found"}}
)
@_traced("api.mutate_decky")
async def api_mutate_decky(
decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"),
admin: dict = Depends(require_admin),

View File

@@ -1,5 +1,6 @@
from fastapi import APIRouter, Depends, HTTPException
from decnet.telemetry import traced as _traced
from decnet.config import DecnetConfig
from decnet.web.dependencies import require_admin, repo
from decnet.web.db.models import MutateIntervalRequest
@@ -24,6 +25,7 @@ def _parse_duration(s: str) -> int:
422: {"description": "Validation error"}
},
)
@_traced("api.update_mutate_interval")
async def api_update_mutate_interval(decky_name: str, req: MutateIntervalRequest, admin: dict = Depends(require_admin)) -> dict[str, str]:
state_dict = await repo.get_state("deployment")
if not state_dict: