""" Behavioral and timing analysis for DECNET attacker profiles. This module is the orchestrator: it composes the topical sub-modules (`timing`, `classify`, `tools`, `phases`, `fingerprint`) into the single `attacker_behavior` record persisted by the profiler worker. The individual detectors live in sibling modules: - `timing.py` — inter-arrival-time statistics - `classify.py` — behavior bucket (beaconing / scanning / …) - `tools.py` — C2 beacon cadence + HTTP-header tool attribution - `phases.py` — recon → exfil phase sequencing - `fingerprint.py` — sniffer + prober TCP/OS fingerprint rollup Their public symbols are re-exported here for backward compatibility with callers and tests that import directly from `decnet.profiler.behavioral`. """ from __future__ import annotations import json from typing import Any from decnet.correlation.parser import LogEvent from decnet.telemetry import traced as _traced, get_tracer as _get_tracer from .classify import classify_behavior from .fingerprint import sniffer_rollup from .phases import phase_sequence from .timing import timing_stats from .tools import detect_tools_from_headers, guess_tool, guess_tools __all__ = [ "build_behavior_record", "classify_behavior", "detect_tools_from_headers", "guess_tool", "guess_tools", "phase_sequence", "sniffer_rollup", "timing_stats", ] @_traced("profiler.build_behavior_record") def build_behavior_record(events: list[LogEvent]) -> dict[str, Any]: """ Build the dict to persist in the `attacker_behavior` table. Callers (profiler worker) pre-serialize JSON-typed fields; we do the JSON encoding here to keep the repo layer schema-agnostic. """ # Timing stats are computed across *all* events (not filtered), because # a C2 beacon often reuses the same "connection" event_type on each # check-in. Filtering would throw that signal away. stats = timing_stats(events) services = {e.service for e in events} behavior = classify_behavior(stats, len(services)) rollup = sniffer_rollup(events) phase = phase_sequence(events) # Combine beacon-timing tool matches with header-based detections. beacon_tools = guess_tools(stats.get("mean_iat_s"), stats.get("cv")) header_tools = detect_tools_from_headers(events) all_tools: list[str] = list(dict.fromkeys(beacon_tools + header_tools)) # dedup, preserve order # Promote TCP-level scanner identification to tool_guesses. # p0f fingerprints nmap from the TCP handshake alone — this fires even # when no HTTP service is present, making it far more reliable than the # header-based path for raw port scans. if rollup["os_guess"] == "nmap" and "nmap" not in all_tools: all_tools.insert(0, "nmap") # Beacon-specific projection: only surface interval/jitter when we've # classified the flow as beaconing (otherwise these numbers are noise). beacon_interval_s: float | None = None beacon_jitter_pct: float | None = None if behavior == "beaconing": beacon_interval_s = stats.get("mean_iat_s") cv = stats.get("cv") beacon_jitter_pct = round(cv * 100, 2) if cv is not None else None _tracer = _get_tracer("profiler") with _tracer.start_as_current_span("profiler.behavior_summary") as _span: _span.set_attribute("behavior_class", behavior) _span.set_attribute("os_guess", rollup["os_guess"] or "unknown") _span.set_attribute("tool_count", len(all_tools)) _span.set_attribute("event_count", stats.get("event_count", 0)) if all_tools: _span.set_attribute("tools", ",".join(all_tools)) kex_list = rollup.get("kex_order_raw") or [] ssh_banners = rollup.get("ssh_client_banners") or [] return { "os_guess": rollup["os_guess"], "hop_distance": rollup["hop_distance"], "tcp_fingerprint": json.dumps(rollup["tcp_fingerprint"]), "kex_order_raw": json.dumps(kex_list) if kex_list else None, "ssh_client_banners": json.dumps(ssh_banners) if ssh_banners else None, "retransmit_count": rollup["retransmit_count"], "behavior_class": behavior, "beacon_interval_s": beacon_interval_s, "beacon_jitter_pct": beacon_jitter_pct, "tool_guesses": json.dumps(all_tools), "timing_stats": json.dumps(stats), "phase_sequence": json.dumps(phase), }