feat(profiler): wire p0f-v2 matcher into sniffer_rollup priority chain

The ~30-signature hand-rolled p0f-lite table in decnet/sniffer/p0f.py
misses most real-world attackers (yesterday's SLOW SCAN being a
textbook case — 9 hours of events, 19 hits, os_guess = NULL). The
375-sig vendored p0f v2 DB was already there; this commit actually
calls it.

New resolution chain in sniffer_rollup:

  1. Enabled OS-fingerprint providers (p0f-v2 default, via
     DECNET_OSFP_PROVIDERS) tried in declared order. Provider with
     highest-confidence match across all enabled sources wins.
  2. Modal os_guess label from the sniffer's hand-rolled p0f.py.
     Kept as fallback because v2's DB predates post-2006 kernels.
  3. TTL bucket (linux / windows / embedded). Coarse but never wrong.

Wiring details:

- _match_via_osfp_providers: never raises — factory / provider
  failures collapse to None and the chain falls through to the
  old modal-label / TTL path. A corrupt .fp file or misconfigured
  DECNET_OSFP_PROVIDERS must never wedge a profile rebuild.
- tcp_fp_context tracks whether the LATEST tcp_fp snapshot came
  from a passive SYN ('syn' → p0f.fp) or an active prober probe
  ('synack' → p0fa.fp). Routes to the right sig list.
- initial-TTL normalisation via decnet.sniffer.p0f.initial_ttl.
  Observation's TTL may be N hops below the OS's initial; v2
  signatures match on the canonical bucket.

Soft-field semantics on Signature.score(): df and total_len are now
skip-checked when the observation is missing them. Sniffer doesn't
currently emit either SD field; a literal-constraint sig
shouldn't hard-reject a match solely because of upstream
incompleteness. Hard fields (window, ttl, options_sig, quirks)
still hard-reject on absent/mismatched input — those are the real
discriminators. Promote df / total_len back to hard the moment the
sniffer starts emitting them.

+2 integration tests on TestSnifferRollup, +2 soft-field tests on
test_signature. Full regression: 166 tests across tests/prober/osfp
+ tests/profiler all green.
This commit is contained in:
2026-04-24 11:56:50 -04:00
parent 8a430bf725
commit ec1079e78b
4 changed files with 172 additions and 17 deletions

View File

@@ -151,29 +151,38 @@ class Signature:
def score(self, obs: dict[str, Any]) -> Optional[float]: def score(self, obs: dict[str, Any]) -> Optional[float]:
"""Return a confidence in [0, 1] on match, or None if any field """Return a confidence in [0, 1] on match, or None if any field
rejects the observation.""" rejects the observation.
Soft-field semantics: ``df`` and ``total_len`` are treated as
"skip check when observation is missing" — the sniffer doesn't
currently emit either, and a literal-constraint sig shouldn't
reject a match solely because the observation is upstream-
incomplete. Hard fields (``window``, ``ttl``, ``options_sig``,
``quirks``) still hard-reject on absent or mismatched input —
those are the real discriminators."""
mss = obs.get("mss") mss = obs.get("mss")
# Window # Window (hard)
if not self.wss.matches(obs.get("window"), mss): if not self.wss.matches(obs.get("window"), mss):
return None return None
# TTL — initial-TTL bucket must match exactly. The profiler is # TTL — initial-TTL bucket must match exactly. The profiler is
# expected to have rounded the observed TTL up to the nearest # expected to have rounded the observed TTL up to the nearest
# bucket already via decnet.sniffer.p0f.initial_ttl. # bucket already via decnet.sniffer.p0f.initial_ttl. (hard)
obs_ttl = obs.get("ttl") obs_ttl = obs.get("ttl")
if obs_ttl is None or obs_ttl != self.ttl: if obs_ttl is None or obs_ttl != self.ttl:
return None return None
# DF (None on the sig side = wildcard) # DF (soft — skip when unknown)
if self.df is not None: if self.df is not None:
obs_df = obs.get("df") obs_df = obs.get("df")
if obs_df is None or bool(obs_df) != self.df: if obs_df is not None and bool(obs_df) != self.df:
return None return None
# Total length # Total length (soft — skip when unknown)
if not self.total_len.matches(obs.get("total_len")): obs_total = obs.get("total_len")
if obs_total is not None and not self.total_len.matches(obs_total):
return None return None
# Options # Options (hard)
if not _options_match(self.options, obs.get("options_sig")): if not _options_match(self.options, obs.get("options_sig")):
return None return None
# Quirks — must match as a set. # Quirks — must match as a set. (hard)
obs_quirks = obs.get("quirks") or frozenset() obs_quirks = obs.get("quirks") or frozenset()
if not isinstance(obs_quirks, frozenset): if not isinstance(obs_quirks, frozenset):
obs_quirks = frozenset(obs_quirks) obs_quirks = frozenset(obs_quirks)

View File

@@ -7,13 +7,18 @@ active prober `tcpfp_fingerprint` events; derives a per-attacker summary
from __future__ import annotations from __future__ import annotations
import logging
import statistics import statistics
from collections import Counter from collections import Counter
from typing import Any from typing import Any, Optional
from decnet.correlation.parser import LogEvent from decnet.correlation.parser import LogEvent
from decnet.prober.osfp import OsMatch, get_all_providers
from decnet.sniffer.p0f import initial_ttl as _initial_ttl_bucket
from decnet.telemetry import traced as _traced from decnet.telemetry import traced as _traced
_log = logging.getLogger("decnet.profiler.fingerprint")
# Sniffer-emitted packet events that feed into fingerprint rollup. # Sniffer-emitted packet events that feed into fingerprint rollup.
_SNIFFER_SYN_EVENT: str = "tcp_syn_fingerprint" _SNIFFER_SYN_EVENT: str = "tcp_syn_fingerprint"
_SNIFFER_FLOW_EVENT: str = "tcp_flow_timing" _SNIFFER_FLOW_EVENT: str = "tcp_flow_timing"
@@ -59,6 +64,70 @@ def _int_or_none(v: Any) -> int | None:
return None return None
def _match_via_osfp_providers(
tcp_fp: dict[str, Any] | None,
modal_ttl: str | None,
context: str,
) -> Optional[OsMatch]:
"""Feed the current tcp_fp snapshot through every enabled OS-fingerprint
provider and return the best match, or None.
Must never raise — factory / provider failures collapse to None so a
corrupt .fp file or misconfigured DECNET_OSFP_PROVIDERS env var can't
wedge the profile rebuild for an entire attacker. Worst case: the
caller falls back to the modal-label / TTL-bucket path that existed
before this wiring.
"""
if not tcp_fp:
return None
# Convert the observed TTL (which may be N hops below the initial TTL
# the remote OS uses) to the canonical initial-TTL bucket the p0f v2
# DB expects (32 / 64 / 128 / 255).
try:
ttl_int = int(modal_ttl) if modal_ttl is not None else None
except (TypeError, ValueError):
ttl_int = None
initial_ttl_bucket = _initial_ttl_bucket(ttl_int) if ttl_int is not None else None
obs: dict[str, Any] = {
"window": tcp_fp.get("window"),
"wscale": tcp_fp.get("wscale"),
"mss": tcp_fp.get("mss"),
"options_sig": tcp_fp.get("options_sig"),
"ttl": initial_ttl_bucket,
# DF and total_len are not captured today — passed as None so
# Signature.score treats them as soft fields (skip check when
# missing). Promote to hard fields once the sniffer/prober
# emit them on tcp_syn_fingerprint / tcpfp_fingerprint.
"df": None,
"total_len": None,
# Sniffer doesn't yet emit a quirks SD field, so the matcher
# sees an empty set — which matches signatures with no quirks
# (the common case) but not signatures with specific quirks.
# That's correct behaviour, not a bug.
"quirks": frozenset(),
"context": context,
}
best: Optional[OsMatch] = None
try:
providers = get_all_providers()
except Exception as exc: # noqa: BLE001 — must not propagate
_log.warning("osfp: provider init failed, skipping match: %s", exc)
return None
for provider in providers:
try:
match = provider.match(obs)
except Exception as exc: # noqa: BLE001 — must not propagate
_log.warning("osfp: provider %s raised during match: %s", provider.name, exc)
continue
if match is None:
continue
if best is None or match.confidence > best.confidence:
best = match
return best
@_traced("profiler.sniffer_rollup") @_traced("profiler.sniffer_rollup")
def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]: def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
""" """
@@ -74,6 +143,9 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
ttl_values: list[str] = [] ttl_values: list[str] = []
hops: list[int] = [] hops: list[int] = []
tcp_fp: dict[str, Any] | None = None tcp_fp: dict[str, Any] | None = None
# Tracks which event set tcp_fp last — picks the provider "context"
# (syn vs synack) when we feed the p0f-v2 matcher below.
tcp_fp_context: str = "syn"
retransmits = 0 retransmits = 0
kex_order_raw: list[str] = [] kex_order_raw: list[str] = []
_kex_seen: set[str] = set() _kex_seen: set[str] = set()
@@ -110,6 +182,7 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
"has_sack": e.fields.get("has_sack") == "true", "has_sack": e.fields.get("has_sack") == "true",
"has_timestamps": e.fields.get("has_timestamps") == "true", "has_timestamps": e.fields.get("has_timestamps") == "true",
} }
tcp_fp_context = "syn"
elif e.event_type == _SNIFFER_FLOW_EVENT: elif e.event_type == _SNIFFER_FLOW_EVENT:
try: try:
@@ -164,16 +237,30 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
"has_sack": e.fields.get("sack_ok") == "1", "has_sack": e.fields.get("sack_ok") == "1",
"has_timestamps": e.fields.get("timestamp") == "1", "has_timestamps": e.fields.get("timestamp") == "1",
} }
tcp_fp_context = "synack" # prober sent SYN, captured attacker's SYN-ACK
# Mode for the OS bucket — most frequently observed label. # OS-guess resolution chain:
# 1. p0f-v2 (or whichever providers DECNET_OSFP_PROVIDERS enables)
# matched against the latest tcp_fp snapshot — the 375-sig
# vendored DB is far more discriminating than what follows.
# 2. Modal sniffer-emitted label from the old ~10-sig hand-rolled
# table in decnet/sniffer/p0f.py. Kept as fallback because the
# vendored v2 DB predates post-2006 kernels.
# 3. TTL bucket (linux / windows / embedded). Coarse but never
# lies when at least one TCP packet was seen.
os_guess: str | None = None os_guess: str | None = None
if os_guesses: modal_ttl = Counter(ttl_values).most_common(1)[0][0] if ttl_values else None
osfp_match = _match_via_osfp_providers(tcp_fp, modal_ttl, tcp_fp_context)
if osfp_match is not None:
# Render "Linux" + "2.6.x kernel" as "Linux 2.6.x kernel" — a single
# string fits the existing os_guess column contract. Flavor can be
# empty for generic signatures, in which case we just emit the OS.
os_guess = osfp_match.os if not osfp_match.flavor else f"{osfp_match.os} {osfp_match.flavor}"
elif os_guesses:
os_guess = Counter(os_guesses).most_common(1)[0][0] os_guess = Counter(os_guesses).most_common(1)[0][0]
else: elif modal_ttl is not None:
# TTL-based fallback: use the most common observed TTL value. os_guess = _os_from_ttl(modal_ttl)
if ttl_values:
modal_ttl = Counter(ttl_values).most_common(1)[0][0]
os_guess = _os_from_ttl(modal_ttl)
# Median hop distance (robust to the occasional weird TTL). # Median hop distance (robust to the occasional weird TTL).
hop_distance: int | None = None hop_distance: int | None = None

View File

@@ -63,6 +63,21 @@ def test_score_df_wildcard_on_signature_matches_either() -> None:
assert sig.score(_obs(df=False)) is not None assert sig.score(_obs(df=False)) is not None
def test_score_df_none_on_observation_is_soft_skip() -> None:
"""When the observation lacks df (sniffer doesn't emit it today),
a signature with a specific df constraint must still match rather
than hard-reject. Rationale in the score() docstring."""
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:df-required")
assert sig.score(_obs(df=None)) is not None
def test_score_total_len_none_on_observation_is_soft_skip() -> None:
"""Same soft-field semantics for total_len — the profiler adapter
passes None when the sniffer / prober didn't capture it."""
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:len-specific")
assert sig.score(_obs(total_len=None)) is not None
def test_score_options_order_mismatch_returns_none() -> None: def test_score_options_order_mismatch_returns_none() -> None:
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:ordered") sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:ordered")
# Same tokens, different order — must NOT match. # Same tokens, different order — must NOT match.

View File

@@ -510,6 +510,50 @@ class TestSnifferRollup:
r = sniffer_rollup(events) r = sniffer_rollup(events)
assert r["ssh_client_banners"] == [] assert r["ssh_client_banners"] == []
# ─── p0f v2 provider wiring (DEBT — unblocks SLOW SCAN attackers) ─────
def test_p0f_v2_provider_beats_ttl_fallback(self):
"""When the sniffer emits os_guess='unknown' (hand-rolled table
didn't match) but the TCP quirks DO match a vendored p0f v2
signature, the new priority chain must promote the richer
v2 match above the coarse TTL bucket.
Target: Linux 2.6 sig with window=5840, ttl=64, options
M1460,S,T,N,W7 — 262-sig p0f.fp has this explicitly."""
events = [
_mk(0, event_type="tcp_syn_fingerprint",
fields={
"os_guess": "unknown", # hand-rolled had no match
"ttl": "64",
"window": "5840",
"mss": "1460",
"wscale": "7",
"options_sig": "M1460,S,T,N,W7",
}),
]
r = sniffer_rollup(events)
# Old chain would collapse to the "linux" TTL bucket. New chain
# must surface the Linux 2.6-specific match from p0f v2.
assert r["os_guess"] is not None
assert r["os_guess"].startswith("Linux")
assert r["os_guess"] != "linux", (
"resolved to the coarse TTL-bucket fallback; p0f-v2 match "
f"should have taken priority. Got: {r['os_guess']!r}"
)
def test_p0f_v2_match_falls_back_when_no_tcp_fp(self):
"""If the event has no window / mss / options_sig (e.g. a
non-fingerprint event or a malformed sniffer row), p0f-v2 must
return None and the chain must still resolve to the modal
label / TTL fallback the old code used."""
events = [
_mk(0, event_type="tcp_syn_fingerprint",
fields={"os_guess": "linux", "ttl": "64"}),
]
r = sniffer_rollup(events)
# Modal os_guess path: the label "linux" still wins.
assert r["os_guess"] == "linux"
# ─── build_behavior_record (composite) ────────────────────────────────────── # ─── build_behavior_record (composite) ──────────────────────────────────────