feat(correlation,profiler): publish attacker.observed on first sighting (DEBT-031 worker 3)
CorrelationEngine gains an optional publish_fn hook fired once per unique attacker IP. The profiler worker — sole caller of the engine today — carries the bus physically, builds a thread-safe publisher, and wraps it with the attacker.observed topic before handing it in. Bus stays optional: if get_bus() fails or DECNET_BUS_ENABLED=false, the engine runs publish_fn=None and the worker degrades to DB-only. Hook failures log a warning and never break ingestion.
This commit is contained in:
@@ -24,6 +24,7 @@ from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
from rich.table import Table
|
||||
|
||||
@@ -33,17 +34,35 @@ from decnet.logging.syslog_formatter import (
|
||||
SEVERITY_WARNING,
|
||||
format_rfc5424,
|
||||
)
|
||||
from decnet.logging import get_logger
|
||||
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
|
||||
|
||||
log = get_logger("correlation.engine")
|
||||
|
||||
|
||||
# ``publish_fn(event_type, payload_dict)``. Sync to avoid rippling
|
||||
# ``async`` through every call site of :meth:`CorrelationEngine.ingest`;
|
||||
# the caller wraps bus-publish via
|
||||
# :func:`decnet.bus.publish.make_thread_safe_publisher`, which is safe to
|
||||
# invoke from any thread including the event-loop thread.
|
||||
CorrelationPublishFn = Callable[[str, dict[str, Any]], None]
|
||||
|
||||
|
||||
class CorrelationEngine:
|
||||
def __init__(self) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
publish_fn: CorrelationPublishFn | None = None,
|
||||
) -> None:
|
||||
# attacker_ip → chronological list of events (only events with an IP)
|
||||
self._events: dict[str, list[LogEvent]] = defaultdict(list)
|
||||
# Total lines parsed (including no-IP and non-DECNET lines)
|
||||
self.lines_parsed: int = 0
|
||||
# Total events indexed (had an attacker_ip)
|
||||
self.events_indexed: int = 0
|
||||
# Optional bus hook — invoked on first-sighting of an attacker IP.
|
||||
# Always fires exactly once per IP for the lifetime of the engine.
|
||||
self._publish_fn = publish_fn
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Ingestion #
|
||||
@@ -61,8 +80,23 @@ class CorrelationEngine:
|
||||
if event is None:
|
||||
return None
|
||||
if event.attacker_ip:
|
||||
first_sighting = event.attacker_ip not in self._events
|
||||
self._events[event.attacker_ip].append(event)
|
||||
self.events_indexed += 1
|
||||
if first_sighting and self._publish_fn is not None:
|
||||
try:
|
||||
self._publish_fn(
|
||||
"observed",
|
||||
{
|
||||
"attacker_ip": event.attacker_ip,
|
||||
"decky": event.decky,
|
||||
"service": event.service,
|
||||
"event_type": event.event_type,
|
||||
"first_seen": event.timestamp.isoformat(),
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
log.warning("correlation publish hook failed: %s", exc)
|
||||
return event
|
||||
|
||||
@_traced("correlation.ingest_file")
|
||||
|
||||
@@ -13,11 +13,15 @@ Complexity per cycle: O(new_logs + affected_ips) instead of O(total_logs²).
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus import topics as _topics
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.bus.publish import make_thread_safe_publisher
|
||||
from decnet.correlation.engine import CorrelationEngine
|
||||
from decnet.correlation.parser import LogEvent
|
||||
from decnet.logging import get_logger
|
||||
@@ -50,18 +54,44 @@ class _WorkerState:
|
||||
async def attacker_profile_worker(repo: BaseRepository, *, interval: int = 30) -> None:
|
||||
"""Periodically updates the Attacker table incrementally. Designed to run as an asyncio Task."""
|
||||
logger.info("attacker profile worker started interval=%ds", interval)
|
||||
state = _WorkerState()
|
||||
|
||||
# Optional bus wiring — correlator-family publishes ride on the profiler
|
||||
# worker because CorrelationEngine lives inside it. If the bus is off or
|
||||
# unreachable the engine runs with publish_fn=None and downstream degrades
|
||||
# to DB-only.
|
||||
bus = None
|
||||
try:
|
||||
bus = get_bus(client_name="profiler")
|
||||
await bus.connect()
|
||||
except Exception as exc:
|
||||
logger.warning("profiler: bus unavailable, continuing without publish: %s", exc)
|
||||
bus = None
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
raw_publish = make_thread_safe_publisher(bus, loop) if bus is not None else None
|
||||
|
||||
def _publish_attacker(event_type: str, payload: dict[str, Any]) -> None:
|
||||
if raw_publish is None:
|
||||
return
|
||||
raw_publish(_topics.attacker(event_type), payload, event_type)
|
||||
|
||||
state = _WorkerState(engine=CorrelationEngine(publish_fn=_publish_attacker))
|
||||
_saved_cursor = await repo.get_state(_STATE_KEY)
|
||||
if _saved_cursor:
|
||||
state.last_log_id = _saved_cursor.get("last_log_id", 0)
|
||||
state.initialized = True
|
||||
logger.info("attacker worker: resumed from cursor last_log_id=%d", state.last_log_id)
|
||||
while True:
|
||||
await asyncio.sleep(interval)
|
||||
try:
|
||||
await _incremental_update(repo, state)
|
||||
except Exception as exc:
|
||||
logger.error("attacker worker: update failed: %s", exc)
|
||||
try:
|
||||
while True:
|
||||
await asyncio.sleep(interval)
|
||||
try:
|
||||
await _incremental_update(repo, state)
|
||||
except Exception as exc:
|
||||
logger.error("attacker worker: update failed: %s", exc)
|
||||
finally:
|
||||
if bus is not None:
|
||||
with contextlib.suppress(Exception):
|
||||
await bus.close()
|
||||
|
||||
|
||||
@_traced("profiler.incremental_update")
|
||||
|
||||
Reference in New Issue
Block a user