feat(profiler): wire p0f-v2 matcher into sniffer_rollup priority chain
The ~30-signature hand-rolled p0f-lite table in decnet/sniffer/p0f.py
misses most real-world attackers (yesterday's SLOW SCAN being a
textbook case — 9 hours of events, 19 hits, os_guess = NULL). The
375-sig vendored p0f v2 DB was already there; this commit actually
calls it.
New resolution chain in sniffer_rollup:
1. Enabled OS-fingerprint providers (p0f-v2 default, via
DECNET_OSFP_PROVIDERS) tried in declared order. Provider with
highest-confidence match across all enabled sources wins.
2. Modal os_guess label from the sniffer's hand-rolled p0f.py.
Kept as fallback because v2's DB predates post-2006 kernels.
3. TTL bucket (linux / windows / embedded). Coarse but never wrong.
Wiring details:
- _match_via_osfp_providers: never raises — factory / provider
failures collapse to None and the chain falls through to the
old modal-label / TTL path. A corrupt .fp file or misconfigured
DECNET_OSFP_PROVIDERS must never wedge a profile rebuild.
- tcp_fp_context tracks whether the LATEST tcp_fp snapshot came
from a passive SYN ('syn' → p0f.fp) or an active prober probe
('synack' → p0fa.fp). Routes to the right sig list.
- initial-TTL normalisation via decnet.sniffer.p0f.initial_ttl.
Observation's TTL may be N hops below the OS's initial; v2
signatures match on the canonical bucket.
Soft-field semantics on Signature.score(): df and total_len are now
skip-checked when the observation is missing them. Sniffer doesn't
currently emit either SD field; a literal-constraint sig
shouldn't hard-reject a match solely because of upstream
incompleteness. Hard fields (window, ttl, options_sig, quirks)
still hard-reject on absent/mismatched input — those are the real
discriminators. Promote df / total_len back to hard the moment the
sniffer starts emitting them.
+2 integration tests on TestSnifferRollup, +2 soft-field tests on
test_signature. Full regression: 166 tests across tests/prober/osfp
+ tests/profiler all green.
This commit is contained in:
@@ -151,29 +151,38 @@ class Signature:
|
||||
|
||||
def score(self, obs: dict[str, Any]) -> Optional[float]:
|
||||
"""Return a confidence in [0, 1] on match, or None if any field
|
||||
rejects the observation."""
|
||||
rejects the observation.
|
||||
|
||||
Soft-field semantics: ``df`` and ``total_len`` are treated as
|
||||
"skip check when observation is missing" — the sniffer doesn't
|
||||
currently emit either, and a literal-constraint sig shouldn't
|
||||
reject a match solely because the observation is upstream-
|
||||
incomplete. Hard fields (``window``, ``ttl``, ``options_sig``,
|
||||
``quirks``) still hard-reject on absent or mismatched input —
|
||||
those are the real discriminators."""
|
||||
mss = obs.get("mss")
|
||||
# Window
|
||||
# Window (hard)
|
||||
if not self.wss.matches(obs.get("window"), mss):
|
||||
return None
|
||||
# TTL — initial-TTL bucket must match exactly. The profiler is
|
||||
# expected to have rounded the observed TTL up to the nearest
|
||||
# bucket already via decnet.sniffer.p0f.initial_ttl.
|
||||
# bucket already via decnet.sniffer.p0f.initial_ttl. (hard)
|
||||
obs_ttl = obs.get("ttl")
|
||||
if obs_ttl is None or obs_ttl != self.ttl:
|
||||
return None
|
||||
# DF (None on the sig side = wildcard)
|
||||
# DF (soft — skip when unknown)
|
||||
if self.df is not None:
|
||||
obs_df = obs.get("df")
|
||||
if obs_df is None or bool(obs_df) != self.df:
|
||||
if obs_df is not None and bool(obs_df) != self.df:
|
||||
return None
|
||||
# Total length
|
||||
if not self.total_len.matches(obs.get("total_len")):
|
||||
# Total length (soft — skip when unknown)
|
||||
obs_total = obs.get("total_len")
|
||||
if obs_total is not None and not self.total_len.matches(obs_total):
|
||||
return None
|
||||
# Options
|
||||
# Options (hard)
|
||||
if not _options_match(self.options, obs.get("options_sig")):
|
||||
return None
|
||||
# Quirks — must match as a set.
|
||||
# Quirks — must match as a set. (hard)
|
||||
obs_quirks = obs.get("quirks") or frozenset()
|
||||
if not isinstance(obs_quirks, frozenset):
|
||||
obs_quirks = frozenset(obs_quirks)
|
||||
|
||||
@@ -7,13 +7,18 @@ active prober `tcpfp_fingerprint` events; derives a per-attacker summary
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import statistics
|
||||
from collections import Counter
|
||||
from typing import Any
|
||||
from typing import Any, Optional
|
||||
|
||||
from decnet.correlation.parser import LogEvent
|
||||
from decnet.prober.osfp import OsMatch, get_all_providers
|
||||
from decnet.sniffer.p0f import initial_ttl as _initial_ttl_bucket
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
_log = logging.getLogger("decnet.profiler.fingerprint")
|
||||
|
||||
# Sniffer-emitted packet events that feed into fingerprint rollup.
|
||||
_SNIFFER_SYN_EVENT: str = "tcp_syn_fingerprint"
|
||||
_SNIFFER_FLOW_EVENT: str = "tcp_flow_timing"
|
||||
@@ -59,6 +64,70 @@ def _int_or_none(v: Any) -> int | None:
|
||||
return None
|
||||
|
||||
|
||||
def _match_via_osfp_providers(
|
||||
tcp_fp: dict[str, Any] | None,
|
||||
modal_ttl: str | None,
|
||||
context: str,
|
||||
) -> Optional[OsMatch]:
|
||||
"""Feed the current tcp_fp snapshot through every enabled OS-fingerprint
|
||||
provider and return the best match, or None.
|
||||
|
||||
Must never raise — factory / provider failures collapse to None so a
|
||||
corrupt .fp file or misconfigured DECNET_OSFP_PROVIDERS env var can't
|
||||
wedge the profile rebuild for an entire attacker. Worst case: the
|
||||
caller falls back to the modal-label / TTL-bucket path that existed
|
||||
before this wiring.
|
||||
"""
|
||||
if not tcp_fp:
|
||||
return None
|
||||
# Convert the observed TTL (which may be N hops below the initial TTL
|
||||
# the remote OS uses) to the canonical initial-TTL bucket the p0f v2
|
||||
# DB expects (32 / 64 / 128 / 255).
|
||||
try:
|
||||
ttl_int = int(modal_ttl) if modal_ttl is not None else None
|
||||
except (TypeError, ValueError):
|
||||
ttl_int = None
|
||||
initial_ttl_bucket = _initial_ttl_bucket(ttl_int) if ttl_int is not None else None
|
||||
|
||||
obs: dict[str, Any] = {
|
||||
"window": tcp_fp.get("window"),
|
||||
"wscale": tcp_fp.get("wscale"),
|
||||
"mss": tcp_fp.get("mss"),
|
||||
"options_sig": tcp_fp.get("options_sig"),
|
||||
"ttl": initial_ttl_bucket,
|
||||
# DF and total_len are not captured today — passed as None so
|
||||
# Signature.score treats them as soft fields (skip check when
|
||||
# missing). Promote to hard fields once the sniffer/prober
|
||||
# emit them on tcp_syn_fingerprint / tcpfp_fingerprint.
|
||||
"df": None,
|
||||
"total_len": None,
|
||||
# Sniffer doesn't yet emit a quirks SD field, so the matcher
|
||||
# sees an empty set — which matches signatures with no quirks
|
||||
# (the common case) but not signatures with specific quirks.
|
||||
# That's correct behaviour, not a bug.
|
||||
"quirks": frozenset(),
|
||||
"context": context,
|
||||
}
|
||||
|
||||
best: Optional[OsMatch] = None
|
||||
try:
|
||||
providers = get_all_providers()
|
||||
except Exception as exc: # noqa: BLE001 — must not propagate
|
||||
_log.warning("osfp: provider init failed, skipping match: %s", exc)
|
||||
return None
|
||||
for provider in providers:
|
||||
try:
|
||||
match = provider.match(obs)
|
||||
except Exception as exc: # noqa: BLE001 — must not propagate
|
||||
_log.warning("osfp: provider %s raised during match: %s", provider.name, exc)
|
||||
continue
|
||||
if match is None:
|
||||
continue
|
||||
if best is None or match.confidence > best.confidence:
|
||||
best = match
|
||||
return best
|
||||
|
||||
|
||||
@_traced("profiler.sniffer_rollup")
|
||||
def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
|
||||
"""
|
||||
@@ -74,6 +143,9 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
|
||||
ttl_values: list[str] = []
|
||||
hops: list[int] = []
|
||||
tcp_fp: dict[str, Any] | None = None
|
||||
# Tracks which event set tcp_fp last — picks the provider "context"
|
||||
# (syn vs synack) when we feed the p0f-v2 matcher below.
|
||||
tcp_fp_context: str = "syn"
|
||||
retransmits = 0
|
||||
kex_order_raw: list[str] = []
|
||||
_kex_seen: set[str] = set()
|
||||
@@ -110,6 +182,7 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
|
||||
"has_sack": e.fields.get("has_sack") == "true",
|
||||
"has_timestamps": e.fields.get("has_timestamps") == "true",
|
||||
}
|
||||
tcp_fp_context = "syn"
|
||||
|
||||
elif e.event_type == _SNIFFER_FLOW_EVENT:
|
||||
try:
|
||||
@@ -164,16 +237,30 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
|
||||
"has_sack": e.fields.get("sack_ok") == "1",
|
||||
"has_timestamps": e.fields.get("timestamp") == "1",
|
||||
}
|
||||
tcp_fp_context = "synack" # prober sent SYN, captured attacker's SYN-ACK
|
||||
|
||||
# Mode for the OS bucket — most frequently observed label.
|
||||
# OS-guess resolution chain:
|
||||
# 1. p0f-v2 (or whichever providers DECNET_OSFP_PROVIDERS enables)
|
||||
# matched against the latest tcp_fp snapshot — the 375-sig
|
||||
# vendored DB is far more discriminating than what follows.
|
||||
# 2. Modal sniffer-emitted label from the old ~10-sig hand-rolled
|
||||
# table in decnet/sniffer/p0f.py. Kept as fallback because the
|
||||
# vendored v2 DB predates post-2006 kernels.
|
||||
# 3. TTL bucket (linux / windows / embedded). Coarse but never
|
||||
# lies when at least one TCP packet was seen.
|
||||
os_guess: str | None = None
|
||||
if os_guesses:
|
||||
modal_ttl = Counter(ttl_values).most_common(1)[0][0] if ttl_values else None
|
||||
|
||||
osfp_match = _match_via_osfp_providers(tcp_fp, modal_ttl, tcp_fp_context)
|
||||
if osfp_match is not None:
|
||||
# Render "Linux" + "2.6.x kernel" as "Linux 2.6.x kernel" — a single
|
||||
# string fits the existing os_guess column contract. Flavor can be
|
||||
# empty for generic signatures, in which case we just emit the OS.
|
||||
os_guess = osfp_match.os if not osfp_match.flavor else f"{osfp_match.os} {osfp_match.flavor}"
|
||||
elif os_guesses:
|
||||
os_guess = Counter(os_guesses).most_common(1)[0][0]
|
||||
else:
|
||||
# TTL-based fallback: use the most common observed TTL value.
|
||||
if ttl_values:
|
||||
modal_ttl = Counter(ttl_values).most_common(1)[0][0]
|
||||
os_guess = _os_from_ttl(modal_ttl)
|
||||
elif modal_ttl is not None:
|
||||
os_guess = _os_from_ttl(modal_ttl)
|
||||
|
||||
# Median hop distance (robust to the occasional weird TTL).
|
||||
hop_distance: int | None = None
|
||||
|
||||
@@ -63,6 +63,21 @@ def test_score_df_wildcard_on_signature_matches_either() -> None:
|
||||
assert sig.score(_obs(df=False)) is not None
|
||||
|
||||
|
||||
def test_score_df_none_on_observation_is_soft_skip() -> None:
|
||||
"""When the observation lacks df (sniffer doesn't emit it today),
|
||||
a signature with a specific df constraint must still match rather
|
||||
than hard-reject. Rationale in the score() docstring."""
|
||||
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:df-required")
|
||||
assert sig.score(_obs(df=None)) is not None
|
||||
|
||||
|
||||
def test_score_total_len_none_on_observation_is_soft_skip() -> None:
|
||||
"""Same soft-field semantics for total_len — the profiler adapter
|
||||
passes None when the sniffer / prober didn't capture it."""
|
||||
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:len-specific")
|
||||
assert sig.score(_obs(total_len=None)) is not None
|
||||
|
||||
|
||||
def test_score_options_order_mismatch_returns_none() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:ordered")
|
||||
# Same tokens, different order — must NOT match.
|
||||
|
||||
@@ -510,6 +510,50 @@ class TestSnifferRollup:
|
||||
r = sniffer_rollup(events)
|
||||
assert r["ssh_client_banners"] == []
|
||||
|
||||
# ─── p0f v2 provider wiring (DEBT — unblocks SLOW SCAN attackers) ─────
|
||||
|
||||
def test_p0f_v2_provider_beats_ttl_fallback(self):
|
||||
"""When the sniffer emits os_guess='unknown' (hand-rolled table
|
||||
didn't match) but the TCP quirks DO match a vendored p0f v2
|
||||
signature, the new priority chain must promote the richer
|
||||
v2 match above the coarse TTL bucket.
|
||||
|
||||
Target: Linux 2.6 sig with window=5840, ttl=64, options
|
||||
M1460,S,T,N,W7 — 262-sig p0f.fp has this explicitly."""
|
||||
events = [
|
||||
_mk(0, event_type="tcp_syn_fingerprint",
|
||||
fields={
|
||||
"os_guess": "unknown", # hand-rolled had no match
|
||||
"ttl": "64",
|
||||
"window": "5840",
|
||||
"mss": "1460",
|
||||
"wscale": "7",
|
||||
"options_sig": "M1460,S,T,N,W7",
|
||||
}),
|
||||
]
|
||||
r = sniffer_rollup(events)
|
||||
# Old chain would collapse to the "linux" TTL bucket. New chain
|
||||
# must surface the Linux 2.6-specific match from p0f v2.
|
||||
assert r["os_guess"] is not None
|
||||
assert r["os_guess"].startswith("Linux")
|
||||
assert r["os_guess"] != "linux", (
|
||||
"resolved to the coarse TTL-bucket fallback; p0f-v2 match "
|
||||
f"should have taken priority. Got: {r['os_guess']!r}"
|
||||
)
|
||||
|
||||
def test_p0f_v2_match_falls_back_when_no_tcp_fp(self):
|
||||
"""If the event has no window / mss / options_sig (e.g. a
|
||||
non-fingerprint event or a malformed sniffer row), p0f-v2 must
|
||||
return None and the chain must still resolve to the modal
|
||||
label / TTL fallback the old code used."""
|
||||
events = [
|
||||
_mk(0, event_type="tcp_syn_fingerprint",
|
||||
fields={"os_guess": "linux", "ttl": "64"}),
|
||||
]
|
||||
r = sniffer_rollup(events)
|
||||
# Modal os_guess path: the label "linux" still wins.
|
||||
assert r["os_guess"] == "linux"
|
||||
|
||||
|
||||
# ─── build_behavior_record (composite) ──────────────────────────────────────
|
||||
|
||||
|
||||
Reference in New Issue
Block a user