feat: complete OTEL tracing across all services with pipeline bridge and docs
Extends tracing to every remaining module: all 23 API route handlers, correlation engine, sniffer (fingerprint/p0f/syslog), prober (jarm/hassh/tcpfp), profiler behavioral analysis, logging subsystem, engine, and mutator. Bridges the ingester→SSE trace gap by persisting trace_id/span_id columns on the logs table and creating OTEL span links in the SSE endpoint. Adds log-trace correlation via _TraceContextFilter injecting otel_trace_id into Python LogRecords. Includes development/docs/TRACING.md with full span reference (76 spans), pipeline propagation architecture, quick start guide, and troubleshooting.
This commit is contained in:
@@ -2,6 +2,7 @@ from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_viewer, repo
|
||||
|
||||
router = APIRouter()
|
||||
@@ -15,6 +16,7 @@ router = APIRouter()
|
||||
404: {"description": "Attacker not found"},
|
||||
},
|
||||
)
|
||||
@_traced("api.get_attacker_commands")
|
||||
async def get_attacker_commands(
|
||||
uuid: str,
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
|
||||
@@ -2,6 +2,7 @@ from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_viewer, repo
|
||||
|
||||
router = APIRouter()
|
||||
@@ -15,6 +16,7 @@ router = APIRouter()
|
||||
404: {"description": "Attacker not found"},
|
||||
},
|
||||
)
|
||||
@_traced("api.get_attacker_detail")
|
||||
async def get_attacker_detail(
|
||||
uuid: str,
|
||||
user: dict = Depends(require_viewer),
|
||||
|
||||
@@ -2,6 +2,7 @@ from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_viewer, repo
|
||||
from decnet.web.db.models import AttackersResponse
|
||||
|
||||
@@ -17,6 +18,7 @@ router = APIRouter()
|
||||
422: {"description": "Validation error"},
|
||||
},
|
||||
)
|
||||
@_traced("api.get_attackers")
|
||||
async def get_attackers(
|
||||
limit: int = Query(50, ge=1, le=1000),
|
||||
offset: int = Query(0, ge=0, le=2147483647),
|
||||
|
||||
@@ -2,6 +2,7 @@ from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.auth import get_password_hash, verify_password
|
||||
from decnet.web.dependencies import get_current_user_unchecked, repo
|
||||
from decnet.web.db.models import ChangePasswordRequest
|
||||
@@ -18,6 +19,7 @@ router = APIRouter()
|
||||
422: {"description": "Validation error"}
|
||||
},
|
||||
)
|
||||
@_traced("api.change_password")
|
||||
async def change_password(request: ChangePasswordRequest, current_user: str = Depends(get_current_user_unchecked)) -> dict[str, str]:
|
||||
_user: Optional[dict[str, Any]] = await repo.get_user_by_uuid(current_user)
|
||||
if not _user or not verify_password(request.old_password, _user["password_hash"]):
|
||||
|
||||
@@ -3,6 +3,7 @@ from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, status
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.auth import (
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES,
|
||||
create_access_token,
|
||||
@@ -24,6 +25,7 @@ router = APIRouter()
|
||||
422: {"description": "Validation error"}
|
||||
},
|
||||
)
|
||||
@_traced("api.login")
|
||||
async def login(request: LoginRequest) -> dict[str, Any]:
|
||||
_user: Optional[dict[str, Any]] = await repo.get_user_by_username(request.username)
|
||||
if not _user or not verify_password(request.password, _user["password_hash"]):
|
||||
|
||||
@@ -2,6 +2,7 @@ from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_viewer, repo
|
||||
from decnet.web.db.models import BountyResponse
|
||||
|
||||
@@ -10,6 +11,7 @@ router = APIRouter()
|
||||
|
||||
@router.get("/bounty", response_model=BountyResponse, tags=["Bounty Vault"],
|
||||
responses={401: {"description": "Could not validate credentials"}, 422: {"description": "Validation error"}},)
|
||||
@_traced("api.get_bounties")
|
||||
async def get_bounties(
|
||||
limit: int = Query(50, ge=1, le=1000),
|
||||
offset: int = Query(0, ge=0, le=2147483647),
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from decnet.env import DECNET_DEVELOPER
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_viewer, repo
|
||||
from decnet.web.db.models import UserResponse
|
||||
|
||||
@@ -17,6 +18,7 @@ _DEFAULT_MUTATION_INTERVAL = "30m"
|
||||
401: {"description": "Could not validate credentials"},
|
||||
},
|
||||
)
|
||||
@_traced("api.get_config")
|
||||
async def api_get_config(user: dict = Depends(require_viewer)) -> dict:
|
||||
limits_state = await repo.get_state("config_limits")
|
||||
globals_state = await repo.get_state("config_globals")
|
||||
|
||||
@@ -2,6 +2,7 @@ import uuid as _uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.auth import get_password_hash
|
||||
from decnet.web.dependencies import require_admin, repo
|
||||
from decnet.web.db.models import (
|
||||
@@ -24,6 +25,7 @@ router = APIRouter()
|
||||
422: {"description": "Validation error"},
|
||||
},
|
||||
)
|
||||
@_traced("api.create_user")
|
||||
async def api_create_user(
|
||||
req: CreateUserRequest,
|
||||
admin: dict = Depends(require_admin),
|
||||
@@ -57,6 +59,7 @@ async def api_create_user(
|
||||
404: {"description": "User not found"},
|
||||
},
|
||||
)
|
||||
@_traced("api.delete_user")
|
||||
async def api_delete_user(
|
||||
user_uuid: str,
|
||||
admin: dict = Depends(require_admin),
|
||||
@@ -80,6 +83,7 @@ async def api_delete_user(
|
||||
422: {"description": "Validation error"},
|
||||
},
|
||||
)
|
||||
@_traced("api.update_user_role")
|
||||
async def api_update_user_role(
|
||||
user_uuid: str,
|
||||
req: UpdateUserRoleRequest,
|
||||
@@ -106,6 +110,7 @@ async def api_update_user_role(
|
||||
422: {"description": "Validation error"},
|
||||
},
|
||||
)
|
||||
@_traced("api.reset_user_password")
|
||||
async def api_reset_user_password(
|
||||
user_uuid: str,
|
||||
req: ResetUserPasswordRequest,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from decnet.env import DECNET_DEVELOPER
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_admin, repo
|
||||
|
||||
router = APIRouter()
|
||||
@@ -14,6 +15,7 @@ router = APIRouter()
|
||||
403: {"description": "Admin access required or developer mode not enabled"},
|
||||
},
|
||||
)
|
||||
@_traced("api.reinit")
|
||||
async def api_reinit(admin: dict = Depends(require_admin)) -> dict:
|
||||
if not DECNET_DEVELOPER:
|
||||
raise HTTPException(status_code=403, detail="Developer mode is not enabled")
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_admin, repo
|
||||
from decnet.web.db.models import DeploymentLimitRequest, GlobalMutationIntervalRequest
|
||||
|
||||
@@ -15,6 +16,7 @@ router = APIRouter()
|
||||
422: {"description": "Validation error"},
|
||||
},
|
||||
)
|
||||
@_traced("api.update_deployment_limit")
|
||||
async def api_update_deployment_limit(
|
||||
req: DeploymentLimitRequest,
|
||||
admin: dict = Depends(require_admin),
|
||||
@@ -32,6 +34,7 @@ async def api_update_deployment_limit(
|
||||
422: {"description": "Validation error"},
|
||||
},
|
||||
)
|
||||
@_traced("api.update_global_mutation_interval")
|
||||
async def api_update_global_mutation_interval(
|
||||
req: GlobalMutationIntervalRequest,
|
||||
admin: dict = Depends(require_admin),
|
||||
|
||||
@@ -3,6 +3,7 @@ import os
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.config import DEFAULT_MUTATE_INTERVAL, DecnetConfig, _ROOT
|
||||
from decnet.engine import deploy as _deploy
|
||||
from decnet.ini_loader import load_ini_from_string
|
||||
@@ -27,6 +28,7 @@ router = APIRouter()
|
||||
500: {"description": "Deployment failed"}
|
||||
}
|
||||
)
|
||||
@_traced("api.deploy_deckies")
|
||||
async def api_deploy_deckies(req: DeployIniRequest, admin: dict = Depends(require_admin)) -> dict[str, str]:
|
||||
from decnet.fleet import build_deckies_from_ini
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_viewer, repo
|
||||
|
||||
router = APIRouter()
|
||||
@@ -9,5 +10,6 @@ router = APIRouter()
|
||||
|
||||
@router.get("/deckies", tags=["Fleet Management"],
|
||||
responses={401: {"description": "Could not validate credentials"}, 422: {"description": "Validation error"}},)
|
||||
@_traced("api.get_deckies")
|
||||
async def get_deckies(user: dict = Depends(require_viewer)) -> list[dict[str, Any]]:
|
||||
return await repo.get_deckies()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
from fastapi import APIRouter, Depends, HTTPException, Path
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.mutator import mutate_decky
|
||||
from decnet.web.dependencies import require_admin, repo
|
||||
|
||||
@@ -12,6 +13,7 @@ router = APIRouter()
|
||||
tags=["Fleet Management"],
|
||||
responses={401: {"description": "Could not validate credentials"}, 403: {"description": "Insufficient permissions"}, 404: {"description": "Decky not found"}}
|
||||
)
|
||||
@_traced("api.mutate_decky")
|
||||
async def api_mutate_decky(
|
||||
decky_name: str = Path(..., pattern=r"^[a-z0-9\-]{1,64}$"),
|
||||
admin: dict = Depends(require_admin),
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.config import DecnetConfig
|
||||
from decnet.web.dependencies import require_admin, repo
|
||||
from decnet.web.db.models import MutateIntervalRequest
|
||||
@@ -24,6 +25,7 @@ def _parse_duration(s: str) -> int:
|
||||
422: {"description": "Validation error"}
|
||||
},
|
||||
)
|
||||
@_traced("api.update_mutate_interval")
|
||||
async def api_update_mutate_interval(decky_name: str, req: MutateIntervalRequest, admin: dict = Depends(require_admin)) -> dict[str, str]:
|
||||
state_dict = await repo.get_state("deployment")
|
||||
if not state_dict:
|
||||
|
||||
@@ -3,6 +3,7 @@ from typing import Any
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_viewer, repo
|
||||
from decnet.web.db.models import HealthResponse, ComponentHealth
|
||||
|
||||
@@ -20,6 +21,7 @@ _OPTIONAL_SERVICES = {"sniffer_worker"}
|
||||
503: {"model": HealthResponse, "description": "System unhealthy"},
|
||||
},
|
||||
)
|
||||
@_traced("api.get_health")
|
||||
async def get_health(user: dict = Depends(require_viewer)) -> Any:
|
||||
components: dict[str, ComponentHealth] = {}
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_viewer, repo
|
||||
|
||||
router = APIRouter()
|
||||
@@ -9,6 +10,7 @@ router = APIRouter()
|
||||
|
||||
@router.get("/logs/histogram", tags=["Logs"],
|
||||
responses={401: {"description": "Could not validate credentials"}, 422: {"description": "Validation error"}},)
|
||||
@_traced("api.get_logs_histogram")
|
||||
async def get_logs_histogram(
|
||||
search: Optional[str] = None,
|
||||
start_time: Optional[str] = Query(None),
|
||||
|
||||
@@ -2,6 +2,7 @@ from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_viewer, repo
|
||||
from decnet.web.db.models import LogsResponse
|
||||
|
||||
@@ -10,6 +11,7 @@ router = APIRouter()
|
||||
|
||||
@router.get("/logs", response_model=LogsResponse, tags=["Logs"],
|
||||
responses={401: {"description": "Could not validate credentials"}, 422: {"description": "Validation error"}})
|
||||
@_traced("api.get_logs")
|
||||
async def get_logs(
|
||||
limit: int = Query(50, ge=1, le=1000),
|
||||
offset: int = Query(0, ge=0, le=2147483647),
|
||||
|
||||
@@ -2,6 +2,7 @@ from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_viewer, repo
|
||||
from decnet.web.db.models import StatsResponse
|
||||
|
||||
@@ -10,5 +11,6 @@ router = APIRouter()
|
||||
|
||||
@router.get("/stats", response_model=StatsResponse, tags=["Observability"],
|
||||
responses={401: {"description": "Could not validate credentials"}, 422: {"description": "Validation error"}},)
|
||||
@_traced("api.get_stats")
|
||||
async def get_stats(user: dict = Depends(require_viewer)) -> dict[str, Any]:
|
||||
return await repo.get_stats_summary()
|
||||
|
||||
@@ -7,6 +7,7 @@ from fastapi.responses import StreamingResponse
|
||||
|
||||
from decnet.env import DECNET_DEVELOPER
|
||||
from decnet.logging import get_logger
|
||||
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
|
||||
from decnet.web.dependencies import require_stream_viewer, repo
|
||||
|
||||
log = get_logger("api")
|
||||
@@ -14,6 +15,34 @@ log = get_logger("api")
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _build_trace_links(logs: list[dict]) -> list:
|
||||
"""Build OTEL span links from persisted trace_id/span_id in log rows.
|
||||
|
||||
Returns an empty list when tracing is disabled (no OTEL imports).
|
||||
"""
|
||||
try:
|
||||
from opentelemetry.trace import Link, SpanContext, TraceFlags
|
||||
except ImportError:
|
||||
return []
|
||||
links: list[Link] = []
|
||||
for entry in logs:
|
||||
tid = entry.get("trace_id")
|
||||
sid = entry.get("span_id")
|
||||
if not tid or not sid or tid == "0":
|
||||
continue
|
||||
try:
|
||||
ctx = SpanContext(
|
||||
trace_id=int(tid, 16),
|
||||
span_id=int(sid, 16),
|
||||
is_remote=True,
|
||||
trace_flags=TraceFlags(TraceFlags.SAMPLED),
|
||||
)
|
||||
links.append(Link(ctx))
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
return links
|
||||
|
||||
|
||||
@router.get("/stream", tags=["Observability"],
|
||||
responses={
|
||||
200: {
|
||||
@@ -24,6 +53,7 @@ router = APIRouter()
|
||||
422: {"description": "Validation error"}
|
||||
},
|
||||
)
|
||||
@_traced("api.stream_events")
|
||||
async def stream_events(
|
||||
request: Request,
|
||||
last_event_id: int = Query(0, alias="lastEventId"),
|
||||
@@ -75,7 +105,15 @@ async def stream_events(
|
||||
)
|
||||
if new_logs:
|
||||
last_id = max(entry["id"] for entry in new_logs)
|
||||
yield f"event: message\ndata: {json.dumps({'type': 'logs', 'data': new_logs})}\n\n"
|
||||
# Create a span linking back to the ingestion traces
|
||||
# stored in each log row, closing the pipeline gap.
|
||||
_links = _build_trace_links(new_logs)
|
||||
_tracer = _get_tracer("sse")
|
||||
with _tracer.start_as_current_span(
|
||||
"sse.emit_logs", links=_links,
|
||||
attributes={"log_count": len(new_logs)},
|
||||
):
|
||||
yield f"event: message\ndata: {json.dumps({'type': 'logs', 'data': new_logs})}\n\n"
|
||||
loops_since_stats = stats_interval_sec
|
||||
|
||||
if loops_since_stats >= stats_interval_sec:
|
||||
|
||||
Reference in New Issue
Block a user