diff --git a/decnet/prober/osfp/p0f/signature.py b/decnet/prober/osfp/p0f/signature.py index 5c08dec5..fed7e13e 100644 --- a/decnet/prober/osfp/p0f/signature.py +++ b/decnet/prober/osfp/p0f/signature.py @@ -151,29 +151,38 @@ class Signature: def score(self, obs: dict[str, Any]) -> Optional[float]: """Return a confidence in [0, 1] on match, or None if any field - rejects the observation.""" + rejects the observation. + + Soft-field semantics: ``df`` and ``total_len`` are treated as + "skip check when observation is missing" — the sniffer doesn't + currently emit either, and a literal-constraint sig shouldn't + reject a match solely because the observation is upstream- + incomplete. Hard fields (``window``, ``ttl``, ``options_sig``, + ``quirks``) still hard-reject on absent or mismatched input — + those are the real discriminators.""" mss = obs.get("mss") - # Window + # Window (hard) if not self.wss.matches(obs.get("window"), mss): return None # TTL — initial-TTL bucket must match exactly. The profiler is # expected to have rounded the observed TTL up to the nearest - # bucket already via decnet.sniffer.p0f.initial_ttl. + # bucket already via decnet.sniffer.p0f.initial_ttl. (hard) obs_ttl = obs.get("ttl") if obs_ttl is None or obs_ttl != self.ttl: return None - # DF (None on the sig side = wildcard) + # DF (soft — skip when unknown) if self.df is not None: obs_df = obs.get("df") - if obs_df is None or bool(obs_df) != self.df: + if obs_df is not None and bool(obs_df) != self.df: return None - # Total length - if not self.total_len.matches(obs.get("total_len")): + # Total length (soft — skip when unknown) + obs_total = obs.get("total_len") + if obs_total is not None and not self.total_len.matches(obs_total): return None - # Options + # Options (hard) if not _options_match(self.options, obs.get("options_sig")): return None - # Quirks — must match as a set. + # Quirks — must match as a set. (hard) obs_quirks = obs.get("quirks") or frozenset() if not isinstance(obs_quirks, frozenset): obs_quirks = frozenset(obs_quirks) diff --git a/decnet/profiler/fingerprint.py b/decnet/profiler/fingerprint.py index 970f6090..12beb958 100644 --- a/decnet/profiler/fingerprint.py +++ b/decnet/profiler/fingerprint.py @@ -7,13 +7,18 @@ active prober `tcpfp_fingerprint` events; derives a per-attacker summary from __future__ import annotations +import logging import statistics from collections import Counter -from typing import Any +from typing import Any, Optional from decnet.correlation.parser import LogEvent +from decnet.prober.osfp import OsMatch, get_all_providers +from decnet.sniffer.p0f import initial_ttl as _initial_ttl_bucket from decnet.telemetry import traced as _traced +_log = logging.getLogger("decnet.profiler.fingerprint") + # Sniffer-emitted packet events that feed into fingerprint rollup. _SNIFFER_SYN_EVENT: str = "tcp_syn_fingerprint" _SNIFFER_FLOW_EVENT: str = "tcp_flow_timing" @@ -59,6 +64,70 @@ def _int_or_none(v: Any) -> int | None: return None +def _match_via_osfp_providers( + tcp_fp: dict[str, Any] | None, + modal_ttl: str | None, + context: str, +) -> Optional[OsMatch]: + """Feed the current tcp_fp snapshot through every enabled OS-fingerprint + provider and return the best match, or None. + + Must never raise — factory / provider failures collapse to None so a + corrupt .fp file or misconfigured DECNET_OSFP_PROVIDERS env var can't + wedge the profile rebuild for an entire attacker. Worst case: the + caller falls back to the modal-label / TTL-bucket path that existed + before this wiring. + """ + if not tcp_fp: + return None + # Convert the observed TTL (which may be N hops below the initial TTL + # the remote OS uses) to the canonical initial-TTL bucket the p0f v2 + # DB expects (32 / 64 / 128 / 255). + try: + ttl_int = int(modal_ttl) if modal_ttl is not None else None + except (TypeError, ValueError): + ttl_int = None + initial_ttl_bucket = _initial_ttl_bucket(ttl_int) if ttl_int is not None else None + + obs: dict[str, Any] = { + "window": tcp_fp.get("window"), + "wscale": tcp_fp.get("wscale"), + "mss": tcp_fp.get("mss"), + "options_sig": tcp_fp.get("options_sig"), + "ttl": initial_ttl_bucket, + # DF and total_len are not captured today — passed as None so + # Signature.score treats them as soft fields (skip check when + # missing). Promote to hard fields once the sniffer/prober + # emit them on tcp_syn_fingerprint / tcpfp_fingerprint. + "df": None, + "total_len": None, + # Sniffer doesn't yet emit a quirks SD field, so the matcher + # sees an empty set — which matches signatures with no quirks + # (the common case) but not signatures with specific quirks. + # That's correct behaviour, not a bug. + "quirks": frozenset(), + "context": context, + } + + best: Optional[OsMatch] = None + try: + providers = get_all_providers() + except Exception as exc: # noqa: BLE001 — must not propagate + _log.warning("osfp: provider init failed, skipping match: %s", exc) + return None + for provider in providers: + try: + match = provider.match(obs) + except Exception as exc: # noqa: BLE001 — must not propagate + _log.warning("osfp: provider %s raised during match: %s", provider.name, exc) + continue + if match is None: + continue + if best is None or match.confidence > best.confidence: + best = match + return best + + @_traced("profiler.sniffer_rollup") def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]: """ @@ -74,6 +143,9 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]: ttl_values: list[str] = [] hops: list[int] = [] tcp_fp: dict[str, Any] | None = None + # Tracks which event set tcp_fp last — picks the provider "context" + # (syn vs synack) when we feed the p0f-v2 matcher below. + tcp_fp_context: str = "syn" retransmits = 0 kex_order_raw: list[str] = [] _kex_seen: set[str] = set() @@ -110,6 +182,7 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]: "has_sack": e.fields.get("has_sack") == "true", "has_timestamps": e.fields.get("has_timestamps") == "true", } + tcp_fp_context = "syn" elif e.event_type == _SNIFFER_FLOW_EVENT: try: @@ -164,16 +237,30 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]: "has_sack": e.fields.get("sack_ok") == "1", "has_timestamps": e.fields.get("timestamp") == "1", } + tcp_fp_context = "synack" # prober sent SYN, captured attacker's SYN-ACK - # Mode for the OS bucket — most frequently observed label. + # OS-guess resolution chain: + # 1. p0f-v2 (or whichever providers DECNET_OSFP_PROVIDERS enables) + # matched against the latest tcp_fp snapshot — the 375-sig + # vendored DB is far more discriminating than what follows. + # 2. Modal sniffer-emitted label from the old ~10-sig hand-rolled + # table in decnet/sniffer/p0f.py. Kept as fallback because the + # vendored v2 DB predates post-2006 kernels. + # 3. TTL bucket (linux / windows / embedded). Coarse but never + # lies when at least one TCP packet was seen. os_guess: str | None = None - if os_guesses: + modal_ttl = Counter(ttl_values).most_common(1)[0][0] if ttl_values else None + + osfp_match = _match_via_osfp_providers(tcp_fp, modal_ttl, tcp_fp_context) + if osfp_match is not None: + # Render "Linux" + "2.6.x kernel" as "Linux 2.6.x kernel" — a single + # string fits the existing os_guess column contract. Flavor can be + # empty for generic signatures, in which case we just emit the OS. + os_guess = osfp_match.os if not osfp_match.flavor else f"{osfp_match.os} {osfp_match.flavor}" + elif os_guesses: os_guess = Counter(os_guesses).most_common(1)[0][0] - else: - # TTL-based fallback: use the most common observed TTL value. - if ttl_values: - modal_ttl = Counter(ttl_values).most_common(1)[0][0] - os_guess = _os_from_ttl(modal_ttl) + elif modal_ttl is not None: + os_guess = _os_from_ttl(modal_ttl) # Median hop distance (robust to the occasional weird TTL). hop_distance: int | None = None diff --git a/tests/prober/osfp/test_signature.py b/tests/prober/osfp/test_signature.py index 1ccc45ed..ee06ebae 100644 --- a/tests/prober/osfp/test_signature.py +++ b/tests/prober/osfp/test_signature.py @@ -63,6 +63,21 @@ def test_score_df_wildcard_on_signature_matches_either() -> None: assert sig.score(_obs(df=False)) is not None +def test_score_df_none_on_observation_is_soft_skip() -> None: + """When the observation lacks df (sniffer doesn't emit it today), + a signature with a specific df constraint must still match rather + than hard-reject. Rationale in the score() docstring.""" + sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:df-required") + assert sig.score(_obs(df=None)) is not None + + +def test_score_total_len_none_on_observation_is_soft_skip() -> None: + """Same soft-field semantics for total_len — the profiler adapter + passes None when the sniffer / prober didn't capture it.""" + sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:len-specific") + assert sig.score(_obs(total_len=None)) is not None + + def test_score_options_order_mismatch_returns_none() -> None: sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:ordered") # Same tokens, different order — must NOT match. diff --git a/tests/profiler/test_profiler_behavioral.py b/tests/profiler/test_profiler_behavioral.py index 981a63d8..725ea1e5 100644 --- a/tests/profiler/test_profiler_behavioral.py +++ b/tests/profiler/test_profiler_behavioral.py @@ -510,6 +510,50 @@ class TestSnifferRollup: r = sniffer_rollup(events) assert r["ssh_client_banners"] == [] + # ─── p0f v2 provider wiring (DEBT — unblocks SLOW SCAN attackers) ───── + + def test_p0f_v2_provider_beats_ttl_fallback(self): + """When the sniffer emits os_guess='unknown' (hand-rolled table + didn't match) but the TCP quirks DO match a vendored p0f v2 + signature, the new priority chain must promote the richer + v2 match above the coarse TTL bucket. + + Target: Linux 2.6 sig with window=5840, ttl=64, options + M1460,S,T,N,W7 — 262-sig p0f.fp has this explicitly.""" + events = [ + _mk(0, event_type="tcp_syn_fingerprint", + fields={ + "os_guess": "unknown", # hand-rolled had no match + "ttl": "64", + "window": "5840", + "mss": "1460", + "wscale": "7", + "options_sig": "M1460,S,T,N,W7", + }), + ] + r = sniffer_rollup(events) + # Old chain would collapse to the "linux" TTL bucket. New chain + # must surface the Linux 2.6-specific match from p0f v2. + assert r["os_guess"] is not None + assert r["os_guess"].startswith("Linux") + assert r["os_guess"] != "linux", ( + "resolved to the coarse TTL-bucket fallback; p0f-v2 match " + f"should have taken priority. Got: {r['os_guess']!r}" + ) + + def test_p0f_v2_match_falls_back_when_no_tcp_fp(self): + """If the event has no window / mss / options_sig (e.g. a + non-fingerprint event or a malformed sniffer row), p0f-v2 must + return None and the chain must still resolve to the modal + label / TTL fallback the old code used.""" + events = [ + _mk(0, event_type="tcp_syn_fingerprint", + fields={"os_guess": "linux", "ttl": "64"}), + ] + r = sniffer_rollup(events) + # Modal os_guess path: the label "linux" still wins. + assert r["os_guess"] == "linux" + # ─── build_behavior_record (composite) ──────────────────────────────────────