merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
@@ -24,25 +24,68 @@ from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
from rich.table import Table
|
||||
|
||||
from decnet.correlation.graph import AttackerTraversal, TraversalHop
|
||||
from decnet.correlation.graph import AttackerTraversal, MutationMarker, TraversalHop
|
||||
from decnet.correlation.parser import LogEvent, parse_line
|
||||
from decnet.logging.syslog_formatter import (
|
||||
SEVERITY_WARNING,
|
||||
format_rfc5424,
|
||||
)
|
||||
from decnet.logging import get_logger
|
||||
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
|
||||
|
||||
log = get_logger("correlation.engine")
|
||||
|
||||
|
||||
# Decky-name prefix reserved for DECNET's own infrastructure workers
|
||||
# that log attacker IPs without representing actual decoy hops. The
|
||||
# prober is the canonical example: when it fingerprints an attacker's
|
||||
# externally-exposed services, it writes events with
|
||||
# ``hostname=decnet-prober`` and ``target_ip=<attacker IP>``. The parser
|
||||
# pulls ``target_ip`` into ``attacker_ip`` so the prober event is
|
||||
# co-indexed with that attacker — but it's outbound recon from the
|
||||
# master, not the attacker traversing into another decoy. Excluding the
|
||||
# whole ``decnet-*`` namespace from distinct-decky counts and hop paths
|
||||
# avoids labelling every fingerprinted attacker as a "traversal."
|
||||
_INTERNAL_DECKY_PREFIX = "decnet-"
|
||||
|
||||
|
||||
def _is_internal_decky(name: str) -> bool:
|
||||
"""True if ``name`` is a DECNET internal worker (prober, etc.) — not a real decoy."""
|
||||
return bool(name) and name.startswith(_INTERNAL_DECKY_PREFIX)
|
||||
|
||||
|
||||
# ``publish_fn(event_type, payload_dict)``. Sync to avoid rippling
|
||||
# ``async`` through every call site of :meth:`CorrelationEngine.ingest`;
|
||||
# the caller wraps bus-publish via
|
||||
# :func:`decnet.bus.publish.make_thread_safe_publisher`, which is safe to
|
||||
# invoke from any thread including the event-loop thread.
|
||||
CorrelationPublishFn = Callable[[str, dict[str, Any]], None]
|
||||
|
||||
|
||||
class CorrelationEngine:
|
||||
def __init__(self) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
publish_fn: CorrelationPublishFn | None = None,
|
||||
) -> None:
|
||||
# attacker_ip → chronological list of events (only events with an IP)
|
||||
self._events: dict[str, list[LogEvent]] = defaultdict(list)
|
||||
# decky_name → chronological list of mutation events. Sibling
|
||||
# index to ``_events``; traversals() joins them by time window.
|
||||
self._mutations: dict[str, list[LogEvent]] = defaultdict(list)
|
||||
# Total lines parsed (including no-IP and non-DECNET lines)
|
||||
self.lines_parsed: int = 0
|
||||
# Total events indexed (had an attacker_ip)
|
||||
self.events_indexed: int = 0
|
||||
# Total mutation events indexed (kind="mutation")
|
||||
self.mutations_indexed: int = 0
|
||||
# Optional bus hook — invoked on first-sighting of an attacker IP.
|
||||
# Always fires exactly once per IP for the lifetime of the engine.
|
||||
self._publish_fn = publish_fn
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Ingestion #
|
||||
@@ -59,11 +102,31 @@ class CorrelationEngine:
|
||||
event = parse_line(line)
|
||||
if event is None:
|
||||
return None
|
||||
if event.kind == "mutation":
|
||||
self._mutations[event.decky].append(event)
|
||||
self.mutations_indexed += 1
|
||||
return event
|
||||
if event.attacker_ip:
|
||||
first_sighting = event.attacker_ip not in self._events
|
||||
self._events[event.attacker_ip].append(event)
|
||||
self.events_indexed += 1
|
||||
if first_sighting and self._publish_fn is not None:
|
||||
try:
|
||||
self._publish_fn(
|
||||
"observed",
|
||||
{
|
||||
"attacker_ip": event.attacker_ip,
|
||||
"decky": event.decky,
|
||||
"service": event.service,
|
||||
"event_type": event.event_type,
|
||||
"first_seen": event.timestamp.isoformat(),
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
log.warning("correlation publish hook failed: %s", exc)
|
||||
return event
|
||||
|
||||
@_traced("correlation.ingest_file")
|
||||
def ingest_file(self, path: Path) -> int:
|
||||
"""
|
||||
Parse every line of *path* and index it.
|
||||
@@ -73,12 +136,18 @@ class CorrelationEngine:
|
||||
with open(path) as fh:
|
||||
for line in fh:
|
||||
self.ingest(line)
|
||||
_tracer = _get_tracer("correlation")
|
||||
with _tracer.start_as_current_span("correlation.ingest_file.summary") as _span:
|
||||
_span.set_attribute("lines_parsed", self.lines_parsed)
|
||||
_span.set_attribute("events_indexed", self.events_indexed)
|
||||
_span.set_attribute("unique_ips", len(self._events))
|
||||
return self.events_indexed
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Query #
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
@_traced("correlation.traversals")
|
||||
def traversals(self, min_deckies: int = 2) -> list[AttackerTraversal]:
|
||||
"""
|
||||
Return all attackers that touched at least *min_deckies* distinct
|
||||
@@ -86,14 +155,36 @@ class CorrelationEngine:
|
||||
"""
|
||||
result: list[AttackerTraversal] = []
|
||||
for ip, events in self._events.items():
|
||||
if len({e.decky for e in events}) < min_deckies:
|
||||
# Exclude internal-infrastructure events (e.g. prober) from
|
||||
# distinct-decky counting and the hop list. They aren't
|
||||
# attacker movement — they're outbound recon co-indexed by
|
||||
# attacker IP. Without this filter every fingerprinted
|
||||
# attacker shows up as a 2-decky "traversal" with a bogus
|
||||
# ``dmz-gateway → decnet-prober`` path.
|
||||
decoy_events = [e for e in events if not _is_internal_decky(e.decky)]
|
||||
if len({e.decky for e in decoy_events}) < min_deckies:
|
||||
continue
|
||||
hops = sorted(
|
||||
(TraversalHop(e.timestamp, e.decky, e.service, e.event_type)
|
||||
for e in events),
|
||||
for e in decoy_events),
|
||||
key=lambda h: h.timestamp,
|
||||
)
|
||||
result.append(AttackerTraversal(attacker_ip=ip, hops=hops))
|
||||
# Per-attacker mutation markers: any mutation on a touched
|
||||
# decky between first_seen and last_seen. Window is
|
||||
# inclusive on both ends so a creation-at-T0 + first-contact-
|
||||
# at-T0 race still attaches the marker.
|
||||
first_ts = hops[0].timestamp
|
||||
last_ts = hops[-1].timestamp
|
||||
touched = {h.decky for h in hops}
|
||||
markers: list[MutationMarker] = []
|
||||
for decky in touched:
|
||||
for mev in self._mutations.get(decky, ()):
|
||||
if first_ts <= mev.timestamp <= last_ts:
|
||||
markers.append(_marker_from_event(mev))
|
||||
markers.sort(key=lambda m: m.timestamp)
|
||||
result.append(AttackerTraversal(
|
||||
attacker_ip=ip, hops=hops, mutations_during=markers,
|
||||
))
|
||||
return sorted(result, key=lambda t: t.first_seen)
|
||||
|
||||
def all_attackers(self) -> dict[str, int]:
|
||||
@@ -135,6 +226,7 @@ class CorrelationEngine:
|
||||
)
|
||||
return table
|
||||
|
||||
@_traced("correlation.report_json")
|
||||
def report_json(self, min_deckies: int = 2) -> dict:
|
||||
"""Serialisable dict representation of all traversals."""
|
||||
return {
|
||||
@@ -147,6 +239,63 @@ class CorrelationEngine:
|
||||
"traversals": [t.to_dict() for t in self.traversals(min_deckies)],
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Credential reuse #
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
async def correlate_credential_reuse(
|
||||
self,
|
||||
repo: Any,
|
||||
min_targets: int = 2,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Detect cross-target credential reuse and persist findings.
|
||||
|
||||
Groups every ``Credential`` row by ``(secret_sha256, secret_kind,
|
||||
principal)``. Groups crossing *min_targets* distinct
|
||||
``(decky, service)`` pairs are folded into ``CredentialReuse`` via
|
||||
:meth:`BaseRepository.upsert_credential_reuse` — one upsert per
|
||||
underlying credential row, since the upsert itself dedups on the
|
||||
unique key and recomputes aggregates from the credentials table.
|
||||
|
||||
Returns the upsert results that flipped ``inserted`` or
|
||||
``changed``, so the caller can publish ``credential.reuse.detected``
|
||||
for each new or grown finding without re-querying.
|
||||
"""
|
||||
results: list[dict[str, Any]] = []
|
||||
candidates = await repo.find_credential_reuse_candidates(min_targets)
|
||||
for group in candidates:
|
||||
# Per-group flags: each credential in a group hits the same
|
||||
# CredentialReuse row, so several upserts may flip
|
||||
# ``inserted``/``changed`` along the way. Collapse to one
|
||||
# publish per group keyed by the final state — otherwise a
|
||||
# group of N creds emits N partial reuse.detected events
|
||||
# with intermediate target_counts.
|
||||
final_row: dict[str, Any] | None = None
|
||||
saw_insert = False
|
||||
saw_change = False
|
||||
for cred in group["credentials"]:
|
||||
row = await repo.upsert_credential_reuse(
|
||||
secret_sha256=group["secret_sha256"],
|
||||
secret_kind=group["secret_kind"],
|
||||
principal=group["principal"],
|
||||
attacker_uuid=cred.get("attacker_uuid"),
|
||||
attacker_ip=cred["attacker_ip"],
|
||||
decky=cred["decky_name"],
|
||||
service=cred["service"],
|
||||
attempt_count=int(cred.get("attempt_count") or 1),
|
||||
)
|
||||
if row is None:
|
||||
continue
|
||||
final_row = row
|
||||
saw_insert = saw_insert or bool(row.get("inserted"))
|
||||
saw_change = saw_change or bool(row.get("changed"))
|
||||
if final_row is not None and (saw_insert or saw_change):
|
||||
final_row["inserted"] = saw_insert
|
||||
final_row["changed"] = saw_change
|
||||
results.append(final_row)
|
||||
return results
|
||||
|
||||
@_traced("correlation.traversal_syslog_lines")
|
||||
def traversal_syslog_lines(self, min_deckies: int = 2) -> list[str]:
|
||||
"""
|
||||
Emit one RFC 5424 syslog line per detected traversal.
|
||||
@@ -177,6 +326,26 @@ class CorrelationEngine:
|
||||
# Helpers #
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def _marker_from_event(event: LogEvent) -> MutationMarker:
|
||||
"""Build a :class:`MutationMarker` from a parsed ``decky_mutated`` log event.
|
||||
|
||||
The mutator emits ``old_services``/``new_services`` as comma-joined
|
||||
strings in the SD params (the RFC 5424 grammar doesn't have native
|
||||
lists). We split them back on the way out — empty string ⇒ empty
|
||||
list, matching the creation/retirement emission sites.
|
||||
"""
|
||||
def _split(s: str) -> list[str]:
|
||||
return [p for p in s.split(",") if p]
|
||||
|
||||
return MutationMarker(
|
||||
timestamp=event.timestamp,
|
||||
decky=event.decky,
|
||||
old_services=_split(event.fields.get("old_services", "")),
|
||||
new_services=_split(event.fields.get("new_services", "")),
|
||||
trigger=event.fields.get("trigger", ""),
|
||||
)
|
||||
|
||||
|
||||
def _fmt_duration(seconds: float) -> str:
|
||||
if seconds < 60:
|
||||
return f"{seconds:.0f}s"
|
||||
|
||||
113
decnet/correlation/event_kinds.py
Normal file
113
decnet/correlation/event_kinds.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Classify RFC 5424 event_type strings as interaction vs. scan vs. noise.
|
||||
|
||||
Used by:
|
||||
- The attacker detail endpoint to split services into "scanned" and
|
||||
"interacted with" buckets, distinguishing port scanners from
|
||||
attackers who actually engaged.
|
||||
- The profiler worker to filter command-family events when extracting
|
||||
executed-command history.
|
||||
|
||||
Classification is conservative: an unknown event_type defaults to
|
||||
``scan`` rather than ``interaction``. That way a new service template
|
||||
emitting a fresh verb shows up as "scanned" on the dashboard — visible
|
||||
but not over-credited. Adding it to ``INTERACTION_EVENT_TYPES`` is
|
||||
always a deliberate promotion.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Literal
|
||||
|
||||
# Events that mean the attacker did something past reconnaissance —
|
||||
# executed a command, sent mail, uploaded a file, subscribed to a topic.
|
||||
# A service with ≥1 of these from a given attacker is "interacted with".
|
||||
INTERACTION_EVENT_TYPES: frozenset[str] = frozenset({
|
||||
# Shell / command-family — lifted from the profiler's original
|
||||
# command-extraction frozenset; this module is now the source of
|
||||
# truth for that vocabulary too.
|
||||
"command",
|
||||
"exec",
|
||||
"query",
|
||||
"input",
|
||||
"shell_input",
|
||||
"execute",
|
||||
"run",
|
||||
"sql_query",
|
||||
"redis_command",
|
||||
"ldap_search",
|
||||
# SMTP meaningful engagement — once MAIL FROM / RCPT TO lands the
|
||||
# attacker is trying to send mail, not just banner-grab.
|
||||
# message_accepted is the DATA-commit moment.
|
||||
"mail_from",
|
||||
"rcpt_to",
|
||||
"rcpt_denied",
|
||||
"message_accepted",
|
||||
# File / payload activity
|
||||
"file_captured",
|
||||
"upload",
|
||||
"download_attempt",
|
||||
"retr", # FTP retrieve
|
||||
# Pub/sub operational use (vs. mere connection)
|
||||
"publish",
|
||||
"subscribe",
|
||||
# A recorded TTY session is always an interaction — sessrec only
|
||||
# writes when there was PTY input.
|
||||
"session_recorded",
|
||||
})
|
||||
|
||||
|
||||
# Events that are DECNET-internal or protocol-framework noise rather
|
||||
# than attacker-caused signal. Dropped from both buckets.
|
||||
NOISE_EVENT_TYPES: frozenset[str] = frozenset({
|
||||
"startup",
|
||||
"shutdown",
|
||||
"config_error",
|
||||
"parse_error",
|
||||
"unknown_packet",
|
||||
"unknown_opcode",
|
||||
"unknown_command",
|
||||
"protocol_error",
|
||||
})
|
||||
|
||||
|
||||
EventKind = Literal["interaction", "scan", "noise"]
|
||||
|
||||
|
||||
def classify_event(event_type: str) -> EventKind:
|
||||
"""Return the kind label for a single event_type string."""
|
||||
if event_type in INTERACTION_EVENT_TYPES:
|
||||
return "interaction"
|
||||
if event_type in NOISE_EVENT_TYPES:
|
||||
return "noise"
|
||||
return "scan"
|
||||
|
||||
|
||||
def bucket_services(
|
||||
pairs: list[tuple[str, str]],
|
||||
) -> dict[str, list[str]]:
|
||||
"""Group distinct service names into scanned vs. interacted buckets.
|
||||
|
||||
*pairs* is an iterable of ``(service, event_type)`` tuples — the
|
||||
shape the repo returns from a ``SELECT DISTINCT service, event_type``
|
||||
query. A service is placed in ``interacted`` if any of its events
|
||||
classifies as interaction; otherwise in ``scanned`` if any event
|
||||
classifies as scan; noise-only services are dropped.
|
||||
|
||||
Return shape: ``{"interacted": [...sorted...], "scanned": [...sorted...]}``.
|
||||
Buckets are disjoint by construction.
|
||||
"""
|
||||
best: dict[str, EventKind] = {}
|
||||
for service, event_type in pairs:
|
||||
kind = classify_event(event_type)
|
||||
current = best.get(service)
|
||||
# Rank: interaction > scan > noise > unset.
|
||||
if current == "interaction":
|
||||
continue
|
||||
if kind == "interaction":
|
||||
best[service] = "interaction"
|
||||
elif kind == "scan" and current != "interaction":
|
||||
best[service] = "scan"
|
||||
elif kind == "noise" and current is None:
|
||||
best[service] = "noise"
|
||||
interacted = sorted(s for s, k in best.items() if k == "interaction")
|
||||
scanned = sorted(s for s, k in best.items() if k == "scan")
|
||||
return {"interacted": interacted, "scanned": scanned}
|
||||
@@ -8,10 +8,29 @@ by reading the unique decky sequence from the hop list.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class MutationMarker:
|
||||
"""A substrate transition that occurred during an attacker's traversal.
|
||||
|
||||
Emitted by the mutator (or deploy/teardown) and consumed by the
|
||||
correlation engine so ``AttackerTraversal.to_dict()`` can interleave
|
||||
substrate-change markers chronologically with attacker hops — an
|
||||
interaction with ``decky-03@T5`` followed by a mutation at ``T6`` and
|
||||
another interaction at ``T7`` is a substrate transition mid-session,
|
||||
not a silent discontinuity.
|
||||
"""
|
||||
|
||||
timestamp: datetime
|
||||
decky: str
|
||||
old_services: list[str]
|
||||
new_services: list[str]
|
||||
trigger: str # creation | retirement | scheduled | operator | …
|
||||
|
||||
|
||||
@dataclass
|
||||
class TraversalHop:
|
||||
"""A single event in an attacker's traversal through the deception network."""
|
||||
@@ -31,6 +50,10 @@ class AttackerTraversal:
|
||||
|
||||
attacker_ip: str
|
||||
hops: list[TraversalHop] # chronologically sorted
|
||||
# Substrate-change markers on deckies this attacker touched, bounded
|
||||
# by first_seen/last_seen. Empty for legacy attacker-only ingest;
|
||||
# populated once mutation events flow through the engine.
|
||||
mutations_during: list[MutationMarker] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def first_seen(self) -> datetime:
|
||||
@@ -62,6 +85,35 @@ class AttackerTraversal:
|
||||
"""Human-readable traversal path: decky-01 → decky-03 → decky-07"""
|
||||
return " → ".join(self.deckies)
|
||||
|
||||
def timeline(self) -> list[dict]:
|
||||
"""Chronologically interleaved hops and mutation markers.
|
||||
|
||||
Each entry carries a ``kind`` discriminant (``hop`` | ``mutation``)
|
||||
so JSON consumers can render them distinctly. Mutations of
|
||||
deckies the attacker never touched are already filtered out at
|
||||
the engine; here we just merge by timestamp.
|
||||
"""
|
||||
merged: list[tuple[datetime, dict]] = []
|
||||
for h in self.hops:
|
||||
merged.append((h.timestamp, {
|
||||
"kind": "hop",
|
||||
"timestamp": h.timestamp.isoformat(),
|
||||
"decky": h.decky,
|
||||
"service": h.service,
|
||||
"event_type": h.event_type,
|
||||
}))
|
||||
for m in self.mutations_during:
|
||||
merged.append((m.timestamp, {
|
||||
"kind": "mutation",
|
||||
"timestamp": m.timestamp.isoformat(),
|
||||
"decky": m.decky,
|
||||
"old_services": m.old_services,
|
||||
"new_services": m.new_services,
|
||||
"trigger": m.trigger,
|
||||
}))
|
||||
merged.sort(key=lambda kv: kv[0])
|
||||
return [entry for _, entry in merged]
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"attacker_ip": self.attacker_ip,
|
||||
@@ -81,4 +133,15 @@ class AttackerTraversal:
|
||||
}
|
||||
for h in self.hops
|
||||
],
|
||||
"mutations_during": [
|
||||
{
|
||||
"timestamp": m.timestamp.isoformat(),
|
||||
"decky": m.decky,
|
||||
"old_services": m.old_services,
|
||||
"new_services": m.new_services,
|
||||
"trigger": m.trigger,
|
||||
}
|
||||
for m in self.mutations_during
|
||||
],
|
||||
"timeline": self.timeline(),
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ the fields needed for cross-decky correlation: attacker IP, decky name,
|
||||
service, event type, and timestamp.
|
||||
|
||||
Log format (produced by decnet.logging.syslog_formatter):
|
||||
<PRI>1 TIMESTAMP HOSTNAME APP-NAME - MSGID [decnet@55555 k1="v1" k2="v2"] [MSG]
|
||||
<PRI>1 TIMESTAMP HOSTNAME APP-NAME - MSGID [relay@55555 k1="v1" k2="v2"] [MSG]
|
||||
|
||||
The attacker IP may appear under several field names depending on service:
|
||||
src_ip — ftp, smtp, http, most services
|
||||
@@ -17,8 +17,9 @@ The attacker IP may appear under several field names depending on service:
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Literal
|
||||
|
||||
# RFC 5424 line structure
|
||||
_RFC5424_RE = re.compile(
|
||||
@@ -31,14 +32,31 @@ _RFC5424_RE = re.compile(
|
||||
r"(.+)$", # 5: SD element + optional MSG
|
||||
)
|
||||
|
||||
# Structured data block: [decnet@55555 k="v" ...]
|
||||
_SD_BLOCK_RE = re.compile(r'\[decnet@55555\s+(.*?)\]', re.DOTALL)
|
||||
# Structured data block: [relay@55555 k="v" ...]
|
||||
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||
|
||||
# Individual param: key="value" (with escaped chars inside value)
|
||||
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
||||
|
||||
# Field names to probe for attacker IP, in priority order
|
||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip")
|
||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
|
||||
|
||||
# Native syslog producers (sshd, pam_unix routed through rsyslog) emit
|
||||
# free prose with no SD block. Pull the remote address out of idiomatic
|
||||
# anchors first ("from <ip>", "rhost=<ip>"), then fall back to the first
|
||||
# IPv4 in the line. Anchored matches keep us from picking the local
|
||||
# listener in "Connection from X port Y on Z port 22".
|
||||
_IPV4 = r"\d{1,3}(?:\.\d{1,3}){3}"
|
||||
_IPV6 = r"[0-9a-fA-F:]+:[0-9a-fA-F:]+"
|
||||
_IP_RE = rf"(?:{_IPV4}|{_IPV6})"
|
||||
_MSG_IP_ANCHORED_RE = re.compile(
|
||||
rf"\b(?:from|rhost[:=]|client[:=]|src[:=])\s*({_IP_RE})",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_MSG_IP_BARE_RE = re.compile(rf"\b({_IPV4})\b")
|
||||
|
||||
|
||||
EventKind = Literal["attacker", "mutation"]
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -52,6 +70,12 @@ class LogEvent:
|
||||
attacker_ip: str | None # extracted from SD params; None if not present
|
||||
fields: dict[str, str] # all structured data params
|
||||
raw: str # original log line (stripped)
|
||||
# ``attacker`` = service-emitted event keyed on a source IP (the
|
||||
# existing correlation input). ``mutation`` = ``mutator`` worker
|
||||
# event — same RFC 5424 wire format but routed into a separate
|
||||
# per-decky index so substrate transitions can be interleaved into
|
||||
# attacker traversals without polluting the per-IP event stream.
|
||||
kind: EventKind = field(default="attacker")
|
||||
|
||||
|
||||
def _parse_sd_params(sd_rest: str) -> dict[str, str]:
|
||||
@@ -66,10 +90,17 @@ def _parse_sd_params(sd_rest: str) -> dict[str, str]:
|
||||
return params
|
||||
|
||||
|
||||
def _extract_attacker_ip(fields: dict[str, str]) -> str | None:
|
||||
def _extract_attacker_ip(fields: dict[str, str], msg: str = "") -> str | None:
|
||||
for fname in _IP_FIELDS:
|
||||
if fname in fields:
|
||||
return fields[fname]
|
||||
if msg:
|
||||
anchored = _MSG_IP_ANCHORED_RE.search(msg)
|
||||
if anchored:
|
||||
return anchored.group(1)
|
||||
bare = _MSG_IP_BARE_RE.search(msg)
|
||||
if bare:
|
||||
return bare.group(1)
|
||||
return None
|
||||
|
||||
|
||||
@@ -99,7 +130,20 @@ def parse_line(line: str) -> LogEvent | None:
|
||||
return None
|
||||
|
||||
fields = _parse_sd_params(sd_rest)
|
||||
attacker_ip = _extract_attacker_ip(fields)
|
||||
if sd_rest.startswith("-"):
|
||||
msg = sd_rest[1:].lstrip()
|
||||
else:
|
||||
tail = re.search(r'\]\s+(.+)$', sd_rest)
|
||||
msg = tail.group(1).strip() if tail else ""
|
||||
attacker_ip = _extract_attacker_ip(fields, msg)
|
||||
|
||||
# Mutator-emitted transitions arrive on the same ingest stream but
|
||||
# belong in the substrate-state index, not the per-IP attacker one.
|
||||
kind: EventKind = (
|
||||
"mutation"
|
||||
if service == "mutator" and event_type == "decky_mutated"
|
||||
else "attacker"
|
||||
)
|
||||
|
||||
return LogEvent(
|
||||
timestamp=timestamp,
|
||||
@@ -109,4 +153,5 @@ def parse_line(line: str) -> LogEvent | None:
|
||||
attacker_ip=attacker_ip,
|
||||
fields=fields,
|
||||
raw=line,
|
||||
kind=kind,
|
||||
)
|
||||
|
||||
153
decnet/correlation/reuse_worker.py
Normal file
153
decnet/correlation/reuse_worker.py
Normal file
@@ -0,0 +1,153 @@
|
||||
"""Long-running credential-reuse correlator.
|
||||
|
||||
Loops :meth:`CorrelationEngine.correlate_credential_reuse` over the
|
||||
credentials table and publishes ``credential.reuse.detected`` for every
|
||||
new or grown ``CredentialReuse`` row. Mirrors the mutator's bus-wake +
|
||||
slow-tick pattern from :mod:`decnet.mutator.engine`: woken on
|
||||
``credential.captured`` and ``attacker.observed`` for sub-second latency,
|
||||
falls back to a 60s poll if the bus is unavailable.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
|
||||
from decnet.bus import topics as _topics
|
||||
from decnet.bus.base import BaseBus
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.bus.publish import (
|
||||
publish_safely,
|
||||
run_control_listener_signal as _run_control_listener_signal,
|
||||
run_health_heartbeat as _run_health_heartbeat,
|
||||
)
|
||||
from decnet.correlation.engine import CorrelationEngine
|
||||
from decnet.logging import get_logger
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
|
||||
log = get_logger("correlation.reuse_worker")
|
||||
|
||||
_DEFAULT_POLL_SECS = 60.0
|
||||
_DEFAULT_MIN_TARGETS = 2
|
||||
|
||||
|
||||
async def run_reuse_loop(
|
||||
repo: BaseRepository,
|
||||
*,
|
||||
poll_interval_secs: float = _DEFAULT_POLL_SECS,
|
||||
min_targets: int = _DEFAULT_MIN_TARGETS,
|
||||
shutdown: asyncio.Event | None = None,
|
||||
) -> None:
|
||||
"""Run the credential-reuse correlator until cancelled.
|
||||
|
||||
*shutdown* is an optional external stop signal; the loop also exits
|
||||
cleanly on ``CancelledError`` and ``KeyboardInterrupt``. The
|
||||
*min_targets* threshold is the minimum number of distinct
|
||||
``(decky, service)`` pairs a secret must touch before it's persisted
|
||||
as a reuse finding.
|
||||
"""
|
||||
log.info(
|
||||
"reuse correlator started poll_interval_secs=%s min_targets=%s",
|
||||
poll_interval_secs, min_targets,
|
||||
)
|
||||
|
||||
bus: BaseBus | None = None
|
||||
wake = asyncio.Event()
|
||||
wake_tasks: list[asyncio.Task] = []
|
||||
heartbeat_task: asyncio.Task | None = None
|
||||
try:
|
||||
candidate = get_bus(client_name="reuse-correlator")
|
||||
await candidate.connect()
|
||||
bus = candidate
|
||||
wake_tasks.append(asyncio.create_task(
|
||||
_wake_on(bus, wake, _topics.credential(_topics.CREDENTIAL_CAPTURED)),
|
||||
))
|
||||
wake_tasks.append(asyncio.create_task(
|
||||
_wake_on(bus, wake, _topics.attacker(_topics.ATTACKER_OBSERVED)),
|
||||
))
|
||||
heartbeat_task = asyncio.create_task(
|
||||
_run_health_heartbeat(bus, "reuse-correlator"),
|
||||
)
|
||||
wake_tasks.append(asyncio.create_task(
|
||||
_run_control_listener_signal(bus, "reuse-correlator"),
|
||||
))
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning(
|
||||
"reuse correlator: bus unavailable, running in poll-only mode: %s",
|
||||
exc,
|
||||
)
|
||||
|
||||
engine = CorrelationEngine()
|
||||
if shutdown is None:
|
||||
shutdown = asyncio.Event()
|
||||
|
||||
try:
|
||||
while not shutdown.is_set():
|
||||
try:
|
||||
results = await engine.correlate_credential_reuse(
|
||||
repo, min_targets=min_targets,
|
||||
)
|
||||
except Exception: # noqa: BLE001
|
||||
log.exception("reuse correlator: tick failed")
|
||||
results = []
|
||||
|
||||
for row in results:
|
||||
await publish_safely(
|
||||
bus,
|
||||
_topics.credential(_topics.CREDENTIAL_REUSE_DETECTED),
|
||||
{
|
||||
"id": row.get("id"),
|
||||
"secret_kind": row.get("secret_kind"),
|
||||
"target_count": row.get("target_count"),
|
||||
"attacker_uuids": row.get("attacker_uuids"),
|
||||
"attacker_ips": row.get("attacker_ips"),
|
||||
"deckies": row.get("deckies"),
|
||||
"services": row.get("services"),
|
||||
},
|
||||
event_type=_topics.CREDENTIAL_REUSE_DETECTED,
|
||||
)
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
wake.wait(), timeout=float(poll_interval_secs),
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
wake.clear()
|
||||
except (asyncio.CancelledError, KeyboardInterrupt):
|
||||
log.info("reuse correlator stopped")
|
||||
finally:
|
||||
for t in wake_tasks:
|
||||
t.cancel()
|
||||
if heartbeat_task is not None:
|
||||
heartbeat_task.cancel()
|
||||
for t in (*wake_tasks, heartbeat_task):
|
||||
if t is None:
|
||||
continue
|
||||
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||
await t
|
||||
if bus is not None:
|
||||
with contextlib.suppress(Exception):
|
||||
await bus.close()
|
||||
|
||||
|
||||
async def _wake_on(bus: BaseBus, wake: asyncio.Event, pattern: str) -> None:
|
||||
"""Flip *wake* every time *pattern* fires on the bus.
|
||||
|
||||
Survives transient subscriber errors by logging and exiting; the
|
||||
poll-interval fallback keeps the loop alive in poll-only mode.
|
||||
"""
|
||||
try:
|
||||
sub = bus.subscribe(pattern)
|
||||
async with sub:
|
||||
async for _event in sub:
|
||||
wake.set()
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning(
|
||||
"reuse correlator: subscriber for %s died (%s); falling back to poll",
|
||||
pattern, exc,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["run_reuse_loop"]
|
||||
Reference in New Issue
Block a user