refactor(profiler): split behavioral.py into topical modules
Break the 603-line behavioral.py into timing/classify/tools/phases/fingerprint sibling modules plus a slim orchestrator. Public API unchanged: behavioral.py re-exports every previously-exposed symbol, so worker.py and existing tests keep working with zero import changes. No behavior change; all 64 profiler tests pass.
This commit is contained in:
162
decnet/profiler/fingerprint.py
Normal file
162
decnet/profiler/fingerprint.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""OS / TCP fingerprint rollup for DECNET attacker profiles.
|
||||
|
||||
Consumes sniffer-emitted `tcp_syn_fingerprint` / `tcp_flow_timing` events and
|
||||
active prober `tcpfp_fingerprint` events; derives a per-attacker summary
|
||||
(os_guess, hop_distance, tcp_fingerprint snapshot, retransmit_count).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import statistics
|
||||
from collections import Counter
|
||||
from typing import Any
|
||||
|
||||
from decnet.correlation.parser import LogEvent
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
# Sniffer-emitted packet events that feed into fingerprint rollup.
|
||||
_SNIFFER_SYN_EVENT: str = "tcp_syn_fingerprint"
|
||||
_SNIFFER_FLOW_EVENT: str = "tcp_flow_timing"
|
||||
# Prober-emitted active-probe result (SYN-ACK fingerprint of attacker machine).
|
||||
_PROBER_TCPFP_EVENT: str = "tcpfp_fingerprint"
|
||||
|
||||
# Canonical initial TTL for each coarse OS bucket. Used to derive hop
|
||||
# distance when only the observed TTL is available (prober path).
|
||||
_INITIAL_TTL: dict[str, int] = {
|
||||
"linux": 64,
|
||||
"windows": 128,
|
||||
"embedded": 255,
|
||||
}
|
||||
|
||||
|
||||
def _os_from_ttl(ttl_str: str | None) -> str | None:
|
||||
"""Derive a coarse OS guess from observed TTL when p0f has no match."""
|
||||
if not ttl_str:
|
||||
return None
|
||||
try:
|
||||
ttl = int(ttl_str)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if 55 <= ttl <= 70:
|
||||
return "linux"
|
||||
if 115 <= ttl <= 135:
|
||||
return "windows"
|
||||
if 235 <= ttl <= 255:
|
||||
return "embedded"
|
||||
return None
|
||||
|
||||
|
||||
def _int_or_none(v: Any) -> int | None:
|
||||
if v is None or v == "":
|
||||
return None
|
||||
try:
|
||||
return int(v)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
@_traced("profiler.sniffer_rollup")
|
||||
def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
|
||||
"""
|
||||
Roll up sniffer-emitted `tcp_syn_fingerprint` and `tcp_flow_timing`
|
||||
events into a per-attacker summary.
|
||||
|
||||
OS guess priority:
|
||||
1. Modal p0f label from os_guess field (if not "unknown"/empty).
|
||||
2. TTL-based coarse bucket (linux / windows / embedded) as fallback.
|
||||
Hop distance: median of non-zero reported values only.
|
||||
"""
|
||||
os_guesses: list[str] = []
|
||||
ttl_values: list[str] = []
|
||||
hops: list[int] = []
|
||||
tcp_fp: dict[str, Any] | None = None
|
||||
retransmits = 0
|
||||
|
||||
for e in events:
|
||||
if e.event_type == _SNIFFER_SYN_EVENT:
|
||||
og = e.fields.get("os_guess")
|
||||
if og and og != "unknown":
|
||||
os_guesses.append(og)
|
||||
|
||||
# Collect raw TTL for fallback OS derivation.
|
||||
ttl_raw = e.fields.get("ttl") or e.fields.get("initial_ttl")
|
||||
if ttl_raw:
|
||||
ttl_values.append(ttl_raw)
|
||||
|
||||
# Only include hop distances that are valid and non-zero.
|
||||
hop_raw = e.fields.get("hop_distance")
|
||||
if hop_raw:
|
||||
try:
|
||||
hop_val = int(hop_raw)
|
||||
if hop_val > 0:
|
||||
hops.append(hop_val)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Keep the latest fingerprint snapshot.
|
||||
tcp_fp = {
|
||||
"window": _int_or_none(e.fields.get("window")),
|
||||
"wscale": _int_or_none(e.fields.get("wscale")),
|
||||
"mss": _int_or_none(e.fields.get("mss")),
|
||||
"options_sig": e.fields.get("options_sig", ""),
|
||||
"has_sack": e.fields.get("has_sack") == "true",
|
||||
"has_timestamps": e.fields.get("has_timestamps") == "true",
|
||||
}
|
||||
|
||||
elif e.event_type == _SNIFFER_FLOW_EVENT:
|
||||
try:
|
||||
retransmits += int(e.fields.get("retransmits", "0"))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
elif e.event_type == _PROBER_TCPFP_EVENT:
|
||||
# Active-probe result: prober sent SYN to attacker, got SYN-ACK back.
|
||||
# Field names differ from the passive sniffer (different emitter).
|
||||
ttl_raw = e.fields.get("ttl")
|
||||
if ttl_raw:
|
||||
ttl_values.append(ttl_raw)
|
||||
|
||||
# Derive hop distance from observed TTL vs canonical initial TTL.
|
||||
os_hint = _os_from_ttl(ttl_raw)
|
||||
if os_hint:
|
||||
initial = _INITIAL_TTL.get(os_hint)
|
||||
if initial:
|
||||
try:
|
||||
hop_val = initial - int(ttl_raw)
|
||||
if hop_val > 0:
|
||||
hops.append(hop_val)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Prober uses window_size/window_scale/options_order instead of
|
||||
# the sniffer's window/wscale/options_sig.
|
||||
tcp_fp = {
|
||||
"window": _int_or_none(e.fields.get("window_size")),
|
||||
"wscale": _int_or_none(e.fields.get("window_scale")),
|
||||
"mss": _int_or_none(e.fields.get("mss")),
|
||||
"options_sig": e.fields.get("options_order", ""),
|
||||
"has_sack": e.fields.get("sack_ok") == "1",
|
||||
"has_timestamps": e.fields.get("timestamp") == "1",
|
||||
}
|
||||
|
||||
# Mode for the OS bucket — most frequently observed label.
|
||||
os_guess: str | None = None
|
||||
if os_guesses:
|
||||
os_guess = Counter(os_guesses).most_common(1)[0][0]
|
||||
else:
|
||||
# TTL-based fallback: use the most common observed TTL value.
|
||||
if ttl_values:
|
||||
modal_ttl = Counter(ttl_values).most_common(1)[0][0]
|
||||
os_guess = _os_from_ttl(modal_ttl)
|
||||
|
||||
# Median hop distance (robust to the occasional weird TTL).
|
||||
hop_distance: int | None = None
|
||||
if hops:
|
||||
hop_distance = int(statistics.median(hops))
|
||||
|
||||
return {
|
||||
"os_guess": os_guess,
|
||||
"hop_distance": hop_distance,
|
||||
"tcp_fingerprint": tcp_fp or {},
|
||||
"retransmit_count": retransmits,
|
||||
}
|
||||
Reference in New Issue
Block a user