feat: complete OTEL tracing across all services with pipeline bridge and docs
Extends tracing to every remaining module: all 23 API route handlers, correlation engine, sniffer (fingerprint/p0f/syslog), prober (jarm/hassh/tcpfp), profiler behavioral analysis, logging subsystem, engine, and mutator. Bridges the ingester→SSE trace gap by persisting trace_id/span_id columns on the logs table and creating OTEL span links in the SSE endpoint. Adds log-trace correlation via _TraceContextFilter injecting otel_trace_id into Python LogRecords. Includes development/docs/TRACING.md with full span reference (76 spans), pipeline propagation architecture, quick start guide, and troubleshooting.
This commit is contained in:
@@ -7,6 +7,11 @@ Usage:
|
||||
|
||||
The returned logger propagates to the root logger (configured in config.py with
|
||||
Rfc5424Formatter), so level control via DECNET_DEVELOPER still applies globally.
|
||||
|
||||
When ``DECNET_DEVELOPER_TRACING`` is active, every LogRecord is enriched with
|
||||
``otel_trace_id`` and ``otel_span_id`` from the current OTEL span context.
|
||||
This lets you correlate log lines with Jaeger traces — click a log entry and
|
||||
jump straight to the span that produced it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -27,6 +32,51 @@ class _ComponentFilter(logging.Filter):
|
||||
return True
|
||||
|
||||
|
||||
class _TraceContextFilter(logging.Filter):
|
||||
"""Injects ``otel_trace_id`` and ``otel_span_id`` onto every LogRecord
|
||||
from the active OTEL span context.
|
||||
|
||||
Installed once by ``enable_trace_context()`` on the root ``decnet`` logger
|
||||
so all child loggers inherit the enrichment via propagation.
|
||||
|
||||
When no span is active, both fields are set to ``"0"`` (cheap string
|
||||
comparison downstream, no None-checks needed).
|
||||
"""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
try:
|
||||
from opentelemetry import trace
|
||||
span = trace.get_current_span()
|
||||
ctx = span.get_span_context()
|
||||
if ctx and ctx.trace_id:
|
||||
record.otel_trace_id = format(ctx.trace_id, "032x") # type: ignore[attr-defined]
|
||||
record.otel_span_id = format(ctx.span_id, "016x") # type: ignore[attr-defined]
|
||||
else:
|
||||
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
||||
record.otel_span_id = "0" # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
||||
record.otel_span_id = "0" # type: ignore[attr-defined]
|
||||
return True
|
||||
|
||||
|
||||
_trace_filter_installed: bool = False
|
||||
|
||||
|
||||
def enable_trace_context() -> None:
|
||||
"""Install the OTEL trace-context filter on the root ``decnet`` logger.
|
||||
|
||||
Called once from ``decnet.telemetry.setup_tracing()`` after the
|
||||
TracerProvider is initialised. Safe to call multiple times (idempotent).
|
||||
"""
|
||||
global _trace_filter_installed
|
||||
if _trace_filter_installed:
|
||||
return
|
||||
root = logging.getLogger("decnet")
|
||||
root.addFilter(_TraceContextFilter())
|
||||
_trace_filter_installed = True
|
||||
|
||||
|
||||
def get_logger(component: str) -> logging.Logger:
|
||||
"""Return a named logger that self-identifies as *component* in RFC 5424.
|
||||
|
||||
|
||||
@@ -13,6 +13,8 @@ import logging.handlers
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
_LOG_FILE_ENV = "DECNET_LOG_FILE"
|
||||
_DEFAULT_LOG_FILE = "/var/log/decnet/decnet.log"
|
||||
_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
|
||||
@@ -22,10 +24,10 @@ _handler: logging.handlers.RotatingFileHandler | None = None
|
||||
_logger: logging.Logger | None = None
|
||||
|
||||
|
||||
def _get_logger() -> logging.Logger:
|
||||
@_traced("logging.init_file_handler")
|
||||
def _init_file_handler() -> logging.Logger:
|
||||
"""One-time initialisation of the rotating file handler."""
|
||||
global _handler, _logger
|
||||
if _logger is not None:
|
||||
return _logger
|
||||
|
||||
log_path = Path(os.environ.get(_LOG_FILE_ENV, _DEFAULT_LOG_FILE))
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -46,6 +48,12 @@ def _get_logger() -> logging.Logger:
|
||||
return _logger
|
||||
|
||||
|
||||
def _get_logger() -> logging.Logger:
|
||||
if _logger is not None:
|
||||
return _logger
|
||||
return _init_file_handler()
|
||||
|
||||
|
||||
def write_syslog(line: str) -> None:
|
||||
"""Write a single RFC 5424 syslog line to the rotating log file."""
|
||||
try:
|
||||
|
||||
@@ -11,6 +11,8 @@ shared utilities for validating and parsing the log_target string.
|
||||
|
||||
import socket
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
|
||||
def parse_log_target(log_target: str) -> tuple[str, int]:
|
||||
"""
|
||||
@@ -23,6 +25,7 @@ def parse_log_target(log_target: str) -> tuple[str, int]:
|
||||
return parts[0], int(parts[1])
|
||||
|
||||
|
||||
@_traced("logging.probe_log_target")
|
||||
def probe_log_target(log_target: str, timeout: float = 2.0) -> bool:
|
||||
"""
|
||||
Return True if the log target is reachable (TCP connect succeeds).
|
||||
|
||||
Reference in New Issue
Block a user