Files
DECNET/decnet/telemetry.py
anti 614780f144 fix(types): P2 — wire _MixinBase + col() across sqlmodel_repo; suppress pydantic/SQLModel column typing false positives
- Add _MixinBase abstract class to _helpers.py: declares _session(),
  _deserialize_attacker(), _assert_pending(), _check_and_bump_version(),
  and list_running_topology_deckies() so mypy can see cross-mixin contracts
- Add _require(val, msg) helper for narrowing T | None → T
- Inherit _MixinBase in all 26 leaf mixin classes
- Wrap SQLAlchemy column method calls (.is_(), .like(), .notin_(), .in_(),
  .contains()) with col() from sqlmodel — fixes attr-defined false positives
  caused by pydantic plugin typing class-level fields as Python value types
- Wrap select(Model.field) with select(col(Model.field)) for column projections
- Add pyproject.toml [[tool.mypy.overrides]] to disable arg-type in
  sqlmodel_repo.*: pydantic plugin resolves .where(Model.field == v) as
  where(bool), a false positive; call-arg still catches real argument errors
- Remove 9 stale # type: ignore comments (logging, helpers, credentials)
- Fix telemetry.py traced() overload no-redef + misc
- Fix logs.py datetime/str operator and nullable PK comparison with col()
- sqlmodel_repo/ now has 0 mypy errors
2026-05-01 00:49:18 -04:00

309 lines
10 KiB
Python

"""
DECNET OpenTelemetry tracing integration.
Controlled entirely by ``DECNET_DEVELOPER_TRACING``. When disabled (the
default), every public export is a zero-cost no-op: no OTEL SDK imports, no
monkey-patching, no middleware, and ``@traced`` returns the original function
object unwrapped.
"""
from __future__ import annotations
import asyncio
import functools
import inspect
from typing import Any, Callable, TypeVar, overload
from decnet.env import DECNET_DEVELOPER_TRACING, DECNET_OTEL_ENDPOINT
from decnet.logging import get_logger
log = get_logger("api")
F = TypeVar("F", bound=Callable[..., Any])
_ENABLED: bool = DECNET_DEVELOPER_TRACING
# ---------------------------------------------------------------------------
# Lazy OTEL imports — only when tracing is enabled
# ---------------------------------------------------------------------------
_tracer_provider: Any = None # TracerProvider | None
def _init_provider() -> None:
"""Initialise the global TracerProvider (called once from setup_tracing)."""
global _tracer_provider
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import Resource
resource = Resource.create({
"service.name": "decnet",
"service.version": "0.2.0",
})
_tracer_provider = TracerProvider(resource=resource)
exporter = OTLPSpanExporter(endpoint=DECNET_OTEL_ENDPOINT, insecure=True)
_tracer_provider.add_span_processor(BatchSpanProcessor(exporter))
trace.set_tracer_provider(_tracer_provider)
log.info("OTEL tracing enabled endpoint=%s", DECNET_OTEL_ENDPOINT)
def setup_tracing(app: Any) -> None:
"""Configure the OTEL TracerProvider and instrument FastAPI.
Call once from the FastAPI lifespan, after DB init. No-op when
``DECNET_DEVELOPER_TRACING`` is not ``"true"``.
"""
if not _ENABLED:
return
try:
_init_provider()
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
FastAPIInstrumentor.instrument_app(app)
from decnet.logging import enable_trace_context
enable_trace_context()
log.info("FastAPI auto-instrumentation active, log-trace correlation enabled")
except Exception as exc:
log.warning("OTEL setup failed — continuing without tracing: %s", exc)
def shutdown_tracing() -> None:
"""Flush and shut down the tracer provider. Safe to call when disabled."""
if _tracer_provider is not None:
try:
_tracer_provider.shutdown()
except Exception: # nosec B110 — best-effort tracer shutdown
pass
# ---------------------------------------------------------------------------
# get_tracer — mirrors get_logger(component) pattern
# ---------------------------------------------------------------------------
class _NoOpSpan:
"""Minimal stand-in so ``with get_tracer(...).start_as_current_span(...)``
works when tracing is disabled."""
def set_attribute(self, key: str, value: Any) -> None:
pass
def set_status(self, *args: Any, **kwargs: Any) -> None:
pass
def record_exception(self, exc: BaseException) -> None:
pass
def __enter__(self) -> "_NoOpSpan":
return self
def __exit__(self, *args: Any) -> None:
pass
class _NoOpTracer:
"""Returned by ``get_tracer()`` when tracing is disabled."""
def start_as_current_span(self, name: str, **kwargs: Any) -> _NoOpSpan:
return _NoOpSpan()
def start_span(self, name: str, **kwargs: Any) -> _NoOpSpan:
return _NoOpSpan()
_tracers: dict[str, Any] = {}
def get_tracer(component: str) -> Any:
"""Return an OTEL Tracer (or a no-op stand-in) for *component*."""
if not _ENABLED:
return _NoOpTracer()
if component not in _tracers:
from opentelemetry import trace
_tracers[component] = trace.get_tracer(f"decnet.{component}")
return _tracers[component]
# ---------------------------------------------------------------------------
# @traced decorator — async + sync, zero overhead when disabled
# ---------------------------------------------------------------------------
@overload
def traced(fn: F) -> F: ...
@overload
def traced(name: str) -> Callable[[F], F]: ...
def traced(fn: Any = None, *, name: str | None = None) -> Any: # type: ignore[misc]
"""Decorator that wraps a function in an OTEL span.
Usage::
@traced # span name = "module.func"
async def my_worker(): ...
@traced("custom.span.name") # explicit span name
def my_sync_func(): ...
When ``DECNET_DEVELOPER_TRACING`` is disabled the original function is
returned **unwrapped** — zero overhead on every call.
"""
# Handle @traced("name") vs @traced vs @traced(name="name")
if fn is None and name is not None:
# Called as @traced("name") or @traced(name="name")
def decorator(f: F) -> F:
return _wrap(f, name)
return decorator
if fn is not None and isinstance(fn, str):
# Called as @traced("name") — fn is actually the name string
span_name = fn
def decorator(f: F) -> F:
return _wrap(f, span_name)
return decorator
if fn is not None and callable(fn):
# Called as @traced (no arguments)
return _wrap(fn, None)
# Fallback: @traced() with no args
def _fallback_decorator(f: F) -> F:
return _wrap(f, name)
return _fallback_decorator
def _wrap(fn: F, span_name: str | None) -> F:
"""Wrap *fn* in a span. Returns *fn* unchanged when tracing is off."""
if not _ENABLED:
return fn
resolved_name = span_name or f"{fn.__module__.rsplit('.', 1)[-1]}.{fn.__qualname__}"
if inspect.iscoroutinefunction(fn):
@functools.wraps(fn)
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
tracer = get_tracer(fn.__module__.split(".")[-1])
with tracer.start_as_current_span(resolved_name) as span:
try:
result = await fn(*args, **kwargs)
return result
except Exception as exc:
span.record_exception(exc)
raise
return async_wrapper # type: ignore[return-value]
else:
@functools.wraps(fn)
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
tracer = get_tracer(fn.__module__.split(".")[-1])
with tracer.start_as_current_span(resolved_name) as span:
try:
result = fn(*args, **kwargs)
return result
except Exception as exc:
span.record_exception(exc)
raise
return sync_wrapper # type: ignore[return-value]
# ---------------------------------------------------------------------------
# TracedRepository — proxy wrapper for BaseRepository
# ---------------------------------------------------------------------------
def wrap_repository(repo: Any) -> Any:
"""Wrap *repo* in a dynamic tracing proxy. Returns *repo* unchanged when disabled.
Instead of mirroring every method signature (which drifts when concrete
repos add extra kwargs beyond the ABC), this proxy introspects the inner
repo at construction time and wraps every public async method in a span
via ``__getattr__``. Sync attributes are forwarded directly.
"""
if not _ENABLED:
return repo
tracer = get_tracer("db")
class TracedRepository:
"""Dynamic proxy — wraps every async method call in a DB span."""
def __init__(self, inner: Any) -> None:
self._inner = inner
def __getattr__(self, name: str) -> Any:
attr = getattr(self._inner, name)
if asyncio.iscoroutinefunction(attr):
@functools.wraps(attr)
async def _traced_method(*args: Any, **kwargs: Any) -> Any:
with tracer.start_as_current_span(f"db.{name}") as span:
try:
return await attr(*args, **kwargs)
except Exception as exc:
span.record_exception(exc)
raise
return _traced_method
return attr
return TracedRepository(repo)
# ---------------------------------------------------------------------------
# Cross-stage trace context propagation
# ---------------------------------------------------------------------------
# The DECNET pipeline is decoupled via JSON files:
# collector -> .json file -> ingester -> DB -> profiler
#
# To show the full journey of an event in Jaeger, we embed W3C trace context
# into the JSON records. The collector injects it; the ingester extracts it
# and continues the trace as a child span.
def inject_context(record: dict[str, Any]) -> None:
"""Inject current OTEL trace context into *record* under ``_trace``.
No-op when tracing is disabled. The ``_trace`` key is stripped by the
ingester after extraction — it never reaches the DB.
"""
if not _ENABLED:
return
try:
from opentelemetry.propagate import inject
carrier: dict[str, str] = {}
inject(carrier)
if carrier:
record["_trace"] = carrier
except Exception: # nosec B110 — trace injection is optional
pass
def extract_context(record: dict[str, Any]) -> Any:
"""Extract OTEL trace context from *record* and return it.
Returns ``None`` when tracing is disabled or no context is present.
Removes the ``_trace`` key from the record so it doesn't leak into the DB.
"""
if not _ENABLED:
record.pop("_trace", None)
return None
try:
carrier = record.pop("_trace", None)
if not carrier:
return None
from opentelemetry.propagate import extract
return extract(carrier)
except Exception:
return None
def start_span_with_context(tracer: Any, name: str, context: Any = None) -> Any:
"""Start a span, optionally as a child of an extracted context.
Returns a context manager span. When *context* is ``None``, creates a
root span (normal behavior).
"""
if not _ENABLED:
return _NoOpSpan()
if context is not None:
return tracer.start_as_current_span(name, context=context)
return tracer.start_as_current_span(name)