merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
"""
|
||||
DECNET application logging helpers.
|
||||
|
||||
Usage:
|
||||
from decnet.logging import get_logger
|
||||
log = get_logger("engine") # APP-NAME in RFC 5424 output becomes "engine"
|
||||
|
||||
The returned logger propagates to the root logger (configured in config.py with
|
||||
Rfc5424Formatter), so level control via DECNET_DEVELOPER still applies globally.
|
||||
|
||||
When ``DECNET_DEVELOPER_TRACING`` is active, every LogRecord is enriched with
|
||||
``otel_trace_id`` and ``otel_span_id`` from the current OTEL span context.
|
||||
This lets you correlate log lines with Jaeger traces — click a log entry and
|
||||
jump straight to the span that produced it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
|
||||
class _ComponentFilter(logging.Filter):
|
||||
"""Injects *decnet_component* onto every LogRecord so Rfc5424Formatter can
|
||||
use it as the RFC 5424 APP-NAME field instead of the hardcoded "decnet"."""
|
||||
|
||||
def __init__(self, component: str) -> None:
|
||||
super().__init__()
|
||||
self.component = component
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
record.decnet_component = self.component # type: ignore[attr-defined]
|
||||
return True
|
||||
|
||||
|
||||
class _TraceContextFilter(logging.Filter):
|
||||
"""Injects ``otel_trace_id`` and ``otel_span_id`` onto every LogRecord
|
||||
from the active OTEL span context.
|
||||
|
||||
Installed once by ``enable_trace_context()`` on the root ``decnet`` logger
|
||||
so all child loggers inherit the enrichment via propagation.
|
||||
|
||||
When no span is active, both fields are set to ``"0"`` (cheap string
|
||||
comparison downstream, no None-checks needed).
|
||||
"""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
try:
|
||||
from opentelemetry import trace
|
||||
span = trace.get_current_span()
|
||||
ctx = span.get_span_context()
|
||||
if ctx and ctx.trace_id:
|
||||
record.otel_trace_id = format(ctx.trace_id, "032x") # type: ignore[attr-defined]
|
||||
record.otel_span_id = format(ctx.span_id, "016x") # type: ignore[attr-defined]
|
||||
else:
|
||||
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
||||
record.otel_span_id = "0" # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
||||
record.otel_span_id = "0" # type: ignore[attr-defined]
|
||||
return True
|
||||
|
||||
|
||||
_trace_filter_installed: bool = False
|
||||
|
||||
|
||||
def enable_trace_context() -> None:
|
||||
"""Install the OTEL trace-context filter on the root ``decnet`` logger.
|
||||
|
||||
Called once from ``decnet.telemetry.setup_tracing()`` after the
|
||||
TracerProvider is initialised. Safe to call multiple times (idempotent).
|
||||
"""
|
||||
global _trace_filter_installed
|
||||
if _trace_filter_installed:
|
||||
return
|
||||
root = logging.getLogger("decnet")
|
||||
root.addFilter(_TraceContextFilter())
|
||||
_trace_filter_installed = True
|
||||
|
||||
|
||||
def get_logger(component: str) -> logging.Logger:
|
||||
"""Return a named logger that self-identifies as *component* in RFC 5424.
|
||||
|
||||
Valid components: cli, engine, api, mutator, collector.
|
||||
|
||||
The logger is named ``decnet.<component>`` and propagates normally, so the
|
||||
root handler (Rfc5424Formatter + level gate from DECNET_DEVELOPER) handles
|
||||
output. Calling this function multiple times for the same component is safe.
|
||||
"""
|
||||
logger = logging.getLogger(f"decnet.{component}")
|
||||
if not any(isinstance(f, _ComponentFilter) for f in logger.filters):
|
||||
logger.addFilter(_ComponentFilter(component))
|
||||
return logger
|
||||
|
||||
@@ -13,29 +13,37 @@ import logging.handlers
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from decnet.logging.inode_aware_handler import InodeAwareRotatingFileHandler
|
||||
from decnet.privdrop import chown_to_invoking_user, chown_tree_to_invoking_user
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
_LOG_FILE_ENV = "DECNET_LOG_FILE"
|
||||
_DEFAULT_LOG_FILE = "/var/log/decnet/decnet.log"
|
||||
_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
|
||||
_BACKUP_COUNT = 5
|
||||
|
||||
_handler: logging.handlers.RotatingFileHandler | None = None
|
||||
_handler: InodeAwareRotatingFileHandler | None = None
|
||||
_logger: logging.Logger | None = None
|
||||
|
||||
|
||||
def _get_logger() -> logging.Logger:
|
||||
@_traced("logging.init_file_handler")
|
||||
def _init_file_handler() -> logging.Logger:
|
||||
"""One-time initialisation of the rotating file handler."""
|
||||
global _handler, _logger
|
||||
if _logger is not None:
|
||||
return _logger
|
||||
|
||||
log_path = Path(os.environ.get(_LOG_FILE_ENV, _DEFAULT_LOG_FILE))
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# When running under sudo, hand the parent dir back to the invoking user
|
||||
# so a subsequent non-root `decnet api` can also write to it.
|
||||
chown_tree_to_invoking_user(log_path.parent)
|
||||
|
||||
_handler = logging.handlers.RotatingFileHandler(
|
||||
_handler = InodeAwareRotatingFileHandler(
|
||||
log_path,
|
||||
maxBytes=_MAX_BYTES,
|
||||
backupCount=_BACKUP_COUNT,
|
||||
encoding="utf-8",
|
||||
)
|
||||
chown_to_invoking_user(log_path)
|
||||
_handler.setFormatter(logging.Formatter("%(message)s"))
|
||||
|
||||
_logger = logging.getLogger("decnet.syslog")
|
||||
@@ -46,6 +54,12 @@ def _get_logger() -> logging.Logger:
|
||||
return _logger
|
||||
|
||||
|
||||
def _get_logger() -> logging.Logger:
|
||||
if _logger is not None:
|
||||
return _logger
|
||||
return _init_file_handler()
|
||||
|
||||
|
||||
def write_syslog(line: str) -> None:
|
||||
"""Write a single RFC 5424 syslog line to the rotating log file."""
|
||||
try:
|
||||
|
||||
@@ -11,6 +11,8 @@ shared utilities for validating and parsing the log_target string.
|
||||
|
||||
import socket
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
|
||||
def parse_log_target(log_target: str) -> tuple[str, int]:
|
||||
"""
|
||||
@@ -23,6 +25,7 @@ def parse_log_target(log_target: str) -> tuple[str, int]:
|
||||
return parts[0], int(parts[1])
|
||||
|
||||
|
||||
@_traced("logging.probe_log_target")
|
||||
def probe_log_target(log_target: str, timeout: float = 2.0) -> bool:
|
||||
"""
|
||||
Return True if the log target is reachable (TCP connect succeeds).
|
||||
|
||||
60
decnet/logging/inode_aware_handler.py
Normal file
60
decnet/logging/inode_aware_handler.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""
|
||||
RotatingFileHandler that detects external deletion or rotation.
|
||||
|
||||
Stdlib ``RotatingFileHandler`` holds an open file descriptor for the
|
||||
lifetime of the handler. If the target file is deleted (``rm``) or
|
||||
rotated out (``logrotate`` without ``copytruncate``), the handler keeps
|
||||
writing to the now-orphaned inode until its own size-based rotation
|
||||
finally triggers — silently losing every line in between.
|
||||
|
||||
Stdlib ``WatchedFileHandler`` solves exactly this problem but doesn't
|
||||
rotate by size. This subclass combines both: before each emit we stat
|
||||
the configured path and compare its inode/device to the currently open
|
||||
file; on mismatch we close and reopen.
|
||||
|
||||
Cheap: one ``os.stat`` per log record. Matches the pattern used by
|
||||
``decnet/collector/worker.py:_reopen_if_needed``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
import os
|
||||
|
||||
|
||||
class InodeAwareRotatingFileHandler(logging.handlers.RotatingFileHandler):
|
||||
"""RotatingFileHandler that reopens the target on external rotation/deletion."""
|
||||
|
||||
def _should_reopen(self) -> bool:
|
||||
if self.stream is None:
|
||||
return True
|
||||
try:
|
||||
disk_stat = os.stat(self.baseFilename)
|
||||
except FileNotFoundError:
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
try:
|
||||
open_stat = os.fstat(self.stream.fileno())
|
||||
except OSError:
|
||||
return True
|
||||
return (disk_stat.st_ino != open_stat.st_ino
|
||||
or disk_stat.st_dev != open_stat.st_dev)
|
||||
|
||||
def emit(self, record: logging.LogRecord) -> None:
|
||||
if self._should_reopen():
|
||||
try:
|
||||
if self.stream is not None:
|
||||
self.close()
|
||||
except Exception: # nosec B110
|
||||
pass
|
||||
try:
|
||||
self.stream = self._open()
|
||||
except OSError:
|
||||
# A logging handler MUST NOT crash its caller. If we can't
|
||||
# reopen (e.g. file is root-owned after `sudo decnet deploy`
|
||||
# and the current process is non-root), defer to the stdlib
|
||||
# error path, which just prints a traceback to stderr.
|
||||
self.handleError(record)
|
||||
return
|
||||
super().emit(record)
|
||||
@@ -5,7 +5,7 @@ Produces fully-compliant syslog messages:
|
||||
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
|
||||
|
||||
Facility: local0 (16)
|
||||
PEN for structured data: decnet@55555
|
||||
PEN for structured data: relay@55555
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,7 +16,7 @@ from typing import Any
|
||||
|
||||
FACILITY_LOCAL0 = 16
|
||||
NILVALUE = "-"
|
||||
_SD_ID = "decnet@55555"
|
||||
_SD_ID = "relay@55555"
|
||||
|
||||
SEVERITY_INFO = 6
|
||||
SEVERITY_WARNING = 4
|
||||
|
||||
Reference in New Issue
Block a user