merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

@@ -0,0 +1,92 @@
"""
DECNET application logging helpers.
Usage:
from decnet.logging import get_logger
log = get_logger("engine") # APP-NAME in RFC 5424 output becomes "engine"
The returned logger propagates to the root logger (configured in config.py with
Rfc5424Formatter), so level control via DECNET_DEVELOPER still applies globally.
When ``DECNET_DEVELOPER_TRACING`` is active, every LogRecord is enriched with
``otel_trace_id`` and ``otel_span_id`` from the current OTEL span context.
This lets you correlate log lines with Jaeger traces — click a log entry and
jump straight to the span that produced it.
"""
from __future__ import annotations
import logging
class _ComponentFilter(logging.Filter):
"""Injects *decnet_component* onto every LogRecord so Rfc5424Formatter can
use it as the RFC 5424 APP-NAME field instead of the hardcoded "decnet"."""
def __init__(self, component: str) -> None:
super().__init__()
self.component = component
def filter(self, record: logging.LogRecord) -> bool:
record.decnet_component = self.component # type: ignore[attr-defined]
return True
class _TraceContextFilter(logging.Filter):
"""Injects ``otel_trace_id`` and ``otel_span_id`` onto every LogRecord
from the active OTEL span context.
Installed once by ``enable_trace_context()`` on the root ``decnet`` logger
so all child loggers inherit the enrichment via propagation.
When no span is active, both fields are set to ``"0"`` (cheap string
comparison downstream, no None-checks needed).
"""
def filter(self, record: logging.LogRecord) -> bool:
try:
from opentelemetry import trace
span = trace.get_current_span()
ctx = span.get_span_context()
if ctx and ctx.trace_id:
record.otel_trace_id = format(ctx.trace_id, "032x") # type: ignore[attr-defined]
record.otel_span_id = format(ctx.span_id, "016x") # type: ignore[attr-defined]
else:
record.otel_trace_id = "0" # type: ignore[attr-defined]
record.otel_span_id = "0" # type: ignore[attr-defined]
except Exception:
record.otel_trace_id = "0" # type: ignore[attr-defined]
record.otel_span_id = "0" # type: ignore[attr-defined]
return True
_trace_filter_installed: bool = False
def enable_trace_context() -> None:
"""Install the OTEL trace-context filter on the root ``decnet`` logger.
Called once from ``decnet.telemetry.setup_tracing()`` after the
TracerProvider is initialised. Safe to call multiple times (idempotent).
"""
global _trace_filter_installed
if _trace_filter_installed:
return
root = logging.getLogger("decnet")
root.addFilter(_TraceContextFilter())
_trace_filter_installed = True
def get_logger(component: str) -> logging.Logger:
"""Return a named logger that self-identifies as *component* in RFC 5424.
Valid components: cli, engine, api, mutator, collector.
The logger is named ``decnet.<component>`` and propagates normally, so the
root handler (Rfc5424Formatter + level gate from DECNET_DEVELOPER) handles
output. Calling this function multiple times for the same component is safe.
"""
logger = logging.getLogger(f"decnet.{component}")
if not any(isinstance(f, _ComponentFilter) for f in logger.filters):
logger.addFilter(_ComponentFilter(component))
return logger

View File

@@ -13,29 +13,37 @@ import logging.handlers
import os
from pathlib import Path
from decnet.logging.inode_aware_handler import InodeAwareRotatingFileHandler
from decnet.privdrop import chown_to_invoking_user, chown_tree_to_invoking_user
from decnet.telemetry import traced as _traced
_LOG_FILE_ENV = "DECNET_LOG_FILE"
_DEFAULT_LOG_FILE = "/var/log/decnet/decnet.log"
_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
_BACKUP_COUNT = 5
_handler: logging.handlers.RotatingFileHandler | None = None
_handler: InodeAwareRotatingFileHandler | None = None
_logger: logging.Logger | None = None
def _get_logger() -> logging.Logger:
@_traced("logging.init_file_handler")
def _init_file_handler() -> logging.Logger:
"""One-time initialisation of the rotating file handler."""
global _handler, _logger
if _logger is not None:
return _logger
log_path = Path(os.environ.get(_LOG_FILE_ENV, _DEFAULT_LOG_FILE))
log_path.parent.mkdir(parents=True, exist_ok=True)
# When running under sudo, hand the parent dir back to the invoking user
# so a subsequent non-root `decnet api` can also write to it.
chown_tree_to_invoking_user(log_path.parent)
_handler = logging.handlers.RotatingFileHandler(
_handler = InodeAwareRotatingFileHandler(
log_path,
maxBytes=_MAX_BYTES,
backupCount=_BACKUP_COUNT,
encoding="utf-8",
)
chown_to_invoking_user(log_path)
_handler.setFormatter(logging.Formatter("%(message)s"))
_logger = logging.getLogger("decnet.syslog")
@@ -46,6 +54,12 @@ def _get_logger() -> logging.Logger:
return _logger
def _get_logger() -> logging.Logger:
if _logger is not None:
return _logger
return _init_file_handler()
def write_syslog(line: str) -> None:
"""Write a single RFC 5424 syslog line to the rotating log file."""
try:

View File

@@ -11,6 +11,8 @@ shared utilities for validating and parsing the log_target string.
import socket
from decnet.telemetry import traced as _traced
def parse_log_target(log_target: str) -> tuple[str, int]:
"""
@@ -23,6 +25,7 @@ def parse_log_target(log_target: str) -> tuple[str, int]:
return parts[0], int(parts[1])
@_traced("logging.probe_log_target")
def probe_log_target(log_target: str, timeout: float = 2.0) -> bool:
"""
Return True if the log target is reachable (TCP connect succeeds).

View File

@@ -0,0 +1,60 @@
"""
RotatingFileHandler that detects external deletion or rotation.
Stdlib ``RotatingFileHandler`` holds an open file descriptor for the
lifetime of the handler. If the target file is deleted (``rm``) or
rotated out (``logrotate`` without ``copytruncate``), the handler keeps
writing to the now-orphaned inode until its own size-based rotation
finally triggers — silently losing every line in between.
Stdlib ``WatchedFileHandler`` solves exactly this problem but doesn't
rotate by size. This subclass combines both: before each emit we stat
the configured path and compare its inode/device to the currently open
file; on mismatch we close and reopen.
Cheap: one ``os.stat`` per log record. Matches the pattern used by
``decnet/collector/worker.py:_reopen_if_needed``.
"""
from __future__ import annotations
import logging
import logging.handlers
import os
class InodeAwareRotatingFileHandler(logging.handlers.RotatingFileHandler):
"""RotatingFileHandler that reopens the target on external rotation/deletion."""
def _should_reopen(self) -> bool:
if self.stream is None:
return True
try:
disk_stat = os.stat(self.baseFilename)
except FileNotFoundError:
return True
except OSError:
return False
try:
open_stat = os.fstat(self.stream.fileno())
except OSError:
return True
return (disk_stat.st_ino != open_stat.st_ino
or disk_stat.st_dev != open_stat.st_dev)
def emit(self, record: logging.LogRecord) -> None:
if self._should_reopen():
try:
if self.stream is not None:
self.close()
except Exception: # nosec B110
pass
try:
self.stream = self._open()
except OSError:
# A logging handler MUST NOT crash its caller. If we can't
# reopen (e.g. file is root-owned after `sudo decnet deploy`
# and the current process is non-root), defer to the stdlib
# error path, which just prints a traceback to stderr.
self.handleError(record)
return
super().emit(record)

View File

@@ -5,7 +5,7 @@ Produces fully-compliant syslog messages:
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
Facility: local0 (16)
PEN for structured data: decnet@55555
PEN for structured data: relay@55555
"""
from __future__ import annotations
@@ -16,7 +16,7 @@ from typing import Any
FACILITY_LOCAL0 = 16
NILVALUE = "-"
_SD_ID = "decnet@55555"
_SD_ID = "relay@55555"
SEVERITY_INFO = 6
SEVERITY_WARNING = 4