feat(collector): publish system.log per ingested event (DEBT-031 worker 5)
log_collector_worker connects the bus at startup, builds a thread-safe system.log publisher, and hands it to each container-stream thread through _stream_container's new publish_fn parameter. Publishing fires right after the JSON record is written — same rate-limiter path, no extra parsing, compact payload (decky/service/event_type/attacker_ip/ timestamp) so subscribers can redraw without re-reading the DB. Bus stays optional: if get_bus() fails or DECNET_BUS_ENABLED=false the factory returns a no-op publisher and the stream thread calls it unconditionally. Hook failures are logged and never abort the thread.
This commit is contained in:
@@ -7,6 +7,7 @@ The ingester tails the .json file; rsyslog can consume the .log file independent
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -15,11 +16,19 @@ import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from decnet.bus import topics as _topics
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.bus.publish import make_thread_safe_publisher
|
||||
from decnet.logging import get_logger
|
||||
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer, inject_context as _inject_ctx
|
||||
|
||||
# Collector publish signature: ``publish_fn(parsed_event_dict)``. Callable
|
||||
# from the container-stream threads; the worker wraps it around a thread-safe
|
||||
# bus publisher that marshals onto the asyncio loop.
|
||||
CollectorPublishFn = Callable[[dict[str, Any]], None]
|
||||
|
||||
logger = get_logger("collector")
|
||||
|
||||
# ─── Ingestion rate limiter ───────────────────────────────────────────────────
|
||||
@@ -274,7 +283,12 @@ def _reopen_if_needed(path: Path, fh: Optional[Any]) -> Any:
|
||||
|
||||
|
||||
@_traced("collector.stream_container")
|
||||
def _stream_container(container_id: str, log_path: Path, json_path: Path) -> None:
|
||||
def _stream_container(
|
||||
container_id: str,
|
||||
log_path: Path,
|
||||
json_path: Path,
|
||||
publish_fn: CollectorPublishFn | None = None,
|
||||
) -> None:
|
||||
"""Stream logs from one container and append to the host log files."""
|
||||
import docker # type: ignore[import]
|
||||
|
||||
@@ -309,6 +323,13 @@ def _stream_container(container_id: str, log_path: Path, json_path: Path) -> Non
|
||||
jf = _reopen_if_needed(json_path, jf)
|
||||
jf.write(json.dumps(parsed) + "\n")
|
||||
jf.flush()
|
||||
if publish_fn is not None:
|
||||
try:
|
||||
publish_fn(parsed)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"collector: bus publish failed: %s", exc,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"collector: rate-limited decky=%s service=%s type=%s attacker=%s",
|
||||
@@ -328,6 +349,41 @@ def _stream_container(container_id: str, log_path: Path, json_path: Path) -> Non
|
||||
pass
|
||||
|
||||
|
||||
# ─── Bus plumbing ─────────────────────────────────────────────────────────────
|
||||
|
||||
def _make_system_log_publisher(
|
||||
bus: Any, loop: asyncio.AbstractEventLoop,
|
||||
) -> CollectorPublishFn:
|
||||
"""Factory: returns a ``publish_fn(parsed)`` for use by stream threads.
|
||||
|
||||
When *bus* is ``None`` the returned callable is a no-op, so the stream
|
||||
thread can call it unconditionally. Otherwise each call is marshalled
|
||||
onto *loop* (the asyncio event loop that owns the bus socket) via
|
||||
``make_thread_safe_publisher``.
|
||||
"""
|
||||
raw_publish = make_thread_safe_publisher(bus, loop) if bus is not None else None
|
||||
if raw_publish is None:
|
||||
return lambda _parsed: None
|
||||
|
||||
topic = _topics.system(_topics.SYSTEM_LOG)
|
||||
|
||||
def _publish(parsed: dict[str, Any]) -> None:
|
||||
event_type = parsed.get("event_type", "")
|
||||
raw_publish(
|
||||
topic,
|
||||
{
|
||||
"decky": parsed.get("decky", ""),
|
||||
"service": parsed.get("service", ""),
|
||||
"event_type": event_type,
|
||||
"attacker_ip": parsed.get("attacker_ip", "Unknown"),
|
||||
"timestamp": parsed.get("timestamp", ""),
|
||||
},
|
||||
event_type,
|
||||
)
|
||||
|
||||
return _publish
|
||||
|
||||
|
||||
# ─── Async collector ──────────────────────────────────────────────────────────
|
||||
|
||||
async def log_collector_worker(log_file: str) -> None:
|
||||
@@ -347,6 +403,19 @@ async def log_collector_worker(log_file: str) -> None:
|
||||
active: dict[str, asyncio.Task[None]] = {}
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
# Optional bus wiring — per-line system.log publish. Fan-in from many
|
||||
# container-stream threads is handled by make_thread_safe_publisher,
|
||||
# which marshals each publish onto this loop.
|
||||
bus = None
|
||||
try:
|
||||
bus = get_bus(client_name="collector")
|
||||
await bus.connect()
|
||||
except Exception as exc:
|
||||
logger.warning("collector: bus unavailable, continuing without publish: %s", exc)
|
||||
bus = None
|
||||
|
||||
_publish_log = _make_system_log_publisher(bus, loop)
|
||||
|
||||
# Dedicated thread pool so long-running container log streams don't
|
||||
# saturate the default asyncio executor and starve short-lived
|
||||
# to_thread() calls elsewhere (e.g. load_state in the web API).
|
||||
@@ -359,7 +428,7 @@ async def log_collector_worker(log_file: str) -> None:
|
||||
active[container_id] = asyncio.ensure_future(
|
||||
loop.run_in_executor(
|
||||
collector_pool, _stream_container,
|
||||
container_id, log_path, json_path,
|
||||
container_id, log_path, json_path, _publish_log,
|
||||
),
|
||||
loop=loop,
|
||||
)
|
||||
@@ -396,3 +465,6 @@ async def log_collector_worker(log_file: str) -> None:
|
||||
logger.error("collector error: %s", exc)
|
||||
finally:
|
||||
collector_pool.shutdown(wait=False)
|
||||
if bus is not None:
|
||||
with contextlib.suppress(Exception):
|
||||
await bus.close()
|
||||
|
||||
0
tests/collector/__init__.py
Normal file
0
tests/collector/__init__.py
Normal file
185
tests/collector/test_collector_bus.py
Normal file
185
tests/collector/test_collector_bus.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""Bus wiring for the collector (DEBT-031, worker 5).
|
||||
|
||||
Collector streams logs from Docker containers in a thread pool — can't be
|
||||
exercised cleanly under pytest. These tests pin the two things that
|
||||
actually carry the contract:
|
||||
|
||||
1. ``_stream_container`` invokes ``publish_fn(parsed)`` right after writing
|
||||
the JSON record, and skips publish when the hook is absent.
|
||||
2. ``_make_system_log_publisher`` routes under ``system.log`` with the
|
||||
expected compact payload shape.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from decnet.bus.fake import FakeBus
|
||||
from decnet.collector.worker import (
|
||||
_make_system_log_publisher,
|
||||
_stream_container,
|
||||
)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def bus() -> FakeBus:
|
||||
b = FakeBus()
|
||||
await b.connect()
|
||||
yield b
|
||||
await b.close()
|
||||
|
||||
|
||||
# ─── Thread-safe publisher factory ───────────────────────────────────────────
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_publisher_routes_under_system_log(bus: FakeBus) -> None:
|
||||
loop = asyncio.get_running_loop()
|
||||
publish = _make_system_log_publisher(bus, loop)
|
||||
|
||||
sub = bus.subscribe("system.log")
|
||||
async with sub:
|
||||
publish({
|
||||
"timestamp": "2026-04-21 10:00:00",
|
||||
"decky": "decky-a",
|
||||
"service": "ssh",
|
||||
"event_type": "auth_fail",
|
||||
"attacker_ip": "1.2.3.4",
|
||||
})
|
||||
event = await asyncio.wait_for(sub.__anext__(), timeout=2.0)
|
||||
|
||||
assert event.topic == "system.log"
|
||||
assert event.type == "auth_fail"
|
||||
assert event.payload == {
|
||||
"decky": "decky-a",
|
||||
"service": "ssh",
|
||||
"event_type": "auth_fail",
|
||||
"attacker_ip": "1.2.3.4",
|
||||
"timestamp": "2026-04-21 10:00:00",
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_publisher_no_bus_is_noop() -> None:
|
||||
# get_bus() failure path returns None → publisher is a no-op callable.
|
||||
loop = asyncio.get_running_loop()
|
||||
publish = _make_system_log_publisher(None, loop)
|
||||
# Must be safely invocable; no exception, no hang.
|
||||
publish({"event_type": "anything"})
|
||||
|
||||
|
||||
# ─── Stream-thread integration: publish_fn wiring ────────────────────────────
|
||||
|
||||
class _FakeContainer:
|
||||
"""Minimal duck-typed stand-in for docker.Container.logs(stream=True)."""
|
||||
|
||||
def __init__(self, lines: list[bytes]) -> None:
|
||||
self._lines = lines
|
||||
|
||||
def logs(self, stream=True, follow=True, stdout=True, stderr=False):
|
||||
yield from self._lines
|
||||
|
||||
|
||||
class _FakeDockerClient:
|
||||
def __init__(self, container: _FakeContainer) -> None:
|
||||
self.containers = self # so .get() lookup below works
|
||||
self._container = container
|
||||
|
||||
def get(self, _container_id: str) -> _FakeContainer:
|
||||
return self._container
|
||||
|
||||
|
||||
def _make_rfc5424_line() -> str:
|
||||
# Crafted to pass _RFC5424_RE in collector.worker.
|
||||
return (
|
||||
"<134>1 2026-04-21T10:00:00+00:00 decky-a ssh - auth_fail "
|
||||
"[decnet@32473 src_ip=\"1.2.3.4\"] failed password"
|
||||
)
|
||||
|
||||
|
||||
def test_stream_container_invokes_publish_fn(monkeypatch, tmp_path):
|
||||
line = _make_rfc5424_line()
|
||||
fake_container = _FakeContainer([line.encode() + b"\n"])
|
||||
fake_client = _FakeDockerClient(fake_container)
|
||||
|
||||
import docker as _docker_mod
|
||||
monkeypatch.setattr(_docker_mod, "from_env", lambda: fake_client)
|
||||
|
||||
captured: list[dict] = []
|
||||
_stream_container(
|
||||
"cid-xyz",
|
||||
tmp_path / "decnet.log",
|
||||
tmp_path / "decnet.json",
|
||||
publish_fn=lambda parsed: captured.append(parsed),
|
||||
)
|
||||
|
||||
# One parseable line → one publish call with the parsed dict.
|
||||
assert len(captured) == 1
|
||||
assert captured[0]["decky"] == "decky-a"
|
||||
assert captured[0]["service"] == "ssh"
|
||||
assert captured[0]["event_type"] == "auth_fail"
|
||||
|
||||
# JSON file still written — bus publishing is additive, not a replacement.
|
||||
jf = (tmp_path / "decnet.json").read_text().strip().splitlines()
|
||||
assert len(jf) == 1
|
||||
assert json.loads(jf[0])["event_type"] == "auth_fail"
|
||||
|
||||
|
||||
def test_stream_container_runs_without_publish_fn(monkeypatch, tmp_path):
|
||||
# Pre-bus behavior: no publish_fn, no crash, JSON still written.
|
||||
line = _make_rfc5424_line()
|
||||
fake_container = _FakeContainer([line.encode() + b"\n"])
|
||||
fake_client = _FakeDockerClient(fake_container)
|
||||
|
||||
import docker as _docker_mod
|
||||
monkeypatch.setattr(_docker_mod, "from_env", lambda: fake_client)
|
||||
|
||||
_stream_container(
|
||||
"cid-xyz",
|
||||
tmp_path / "decnet.log",
|
||||
tmp_path / "decnet.json",
|
||||
)
|
||||
|
||||
jf = (tmp_path / "decnet.json").read_text().strip().splitlines()
|
||||
assert len(jf) == 1
|
||||
|
||||
|
||||
def test_stream_container_swallows_publish_failures(monkeypatch, tmp_path):
|
||||
# Hook failure must not abort the stream thread.
|
||||
line = _make_rfc5424_line()
|
||||
fake_container = _FakeContainer([line.encode() + b"\n"])
|
||||
fake_client = _FakeDockerClient(fake_container)
|
||||
|
||||
import docker as _docker_mod
|
||||
monkeypatch.setattr(_docker_mod, "from_env", lambda: fake_client)
|
||||
|
||||
def _boom(_parsed):
|
||||
raise RuntimeError("transport exploded")
|
||||
|
||||
# Must not raise.
|
||||
_stream_container(
|
||||
"cid-xyz",
|
||||
tmp_path / "decnet.log",
|
||||
tmp_path / "decnet.json",
|
||||
publish_fn=_boom,
|
||||
)
|
||||
|
||||
jf = (tmp_path / "decnet.json").read_text().strip().splitlines()
|
||||
assert len(jf) == 1
|
||||
|
||||
|
||||
# ─── Bus-disabled escape hatch ───────────────────────────────────────────────
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_collector_degrades_cleanly_when_bus_disabled(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
from decnet.bus.factory import get_bus
|
||||
|
||||
monkeypatch.setenv("DECNET_BUS_ENABLED", "false")
|
||||
b = get_bus(client_name="collector")
|
||||
await b.connect()
|
||||
await b.publish("system.log", {"event_type": "auth_fail"}, event_type="auth_fail")
|
||||
await b.close()
|
||||
Reference in New Issue
Block a user