feat(prober/osfp): p0f v2 .fp parser + Signature scoring

First code layer of the OS-fingerprinting work on top of yesterday's
vendored p0f v2 database. Three new modules, all pure (no I/O outside
of the parser's file read):

- decnet/prober/osfp/base.py — Provider protocol + OsMatch dataclass
  matching the established Provider convention in decnet/geoip and
  decnet/bus. Docstring spells out the never-raise invariant: malformed
  input returns None, so a single bad event can't wedge a whole
  attacker-profile rebuild.

- decnet/prober/osfp/p0f/signature.py — Signature dataclass + three
  predicate helpers (WindowSpec / IntSpec / OptionToken) encoding the
  p0f v2 DSL's wildcard / modulo / MSS-multiple / MTU-multiple
  semantics. Scoring is our extension on top of upstream p0f's
  first-match-wins policy: each signature carries a precomputed
  specificity in [0, 1] so the factory can pick the most-specific
  match when multiple signatures fire against one observation.

- decnet/prober/osfp/p0f/format.py — .fp line parser. Every shipped
  field variant from the DSL spec at the top of p0f.fp is covered
  (Snn / Tnn / %nnn / * for window; T0 vs T; -/@/* os-genre prefixes;
  quirks as concatenated single-letter flags; '.' sentinels for
  no-options / no-quirks). Malformed lines log a warning and skip
  instead of aborting the whole file — 1 bad row must not cost the
  other 374.

20 parser tests + 14 scoring tests. Full vendored-DB smoke tests
confirm all 375 signatures parse round-trip (262 SYN + 61 SYN-ACK +
46 RST + 6 stray) and every computed specificity lands in [0, 1].
This commit is contained in:
2026-04-24 11:47:54 -04:00
parent 620e1f5b1d
commit 41ff6b4b03
6 changed files with 857 additions and 0 deletions

View File

View File

@@ -0,0 +1,152 @@
"""Tests for the p0f v2 .fp parser (decnet/prober/osfp/p0f/format.py)."""
from __future__ import annotations
from pathlib import Path
import pytest
from decnet.prober.osfp.p0f.format import P0fParseError, _parse_line, parse_p0f_v2
# ─── Line-parser unit tests ──────────────────────────────────────────────────
def test_parse_line_minimal_literal() -> None:
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:2.6.x kernel")
assert sig.os == "Linux"
assert sig.flavor == "2.6.x kernel"
assert sig.ttl == 64
assert sig.df is True
assert sig.wss.kind == "literal" and sig.wss.value == 5840
assert sig.total_len.kind == "literal" and sig.total_len.value == 60
assert len(sig.options) == 5
# First option: MSS=1460
mss_opt = sig.options[0]
assert mss_opt.kind == "M"
assert mss_opt.value is not None and mss_opt.value.value == 1460
assert sig.quirks == frozenset()
assert not sig.is_userland
def test_parse_line_wildcard_window() -> None:
sig = _parse_line("*:128:1:*:M*,S,T,N,W*:.:Windows:XP SP1+")
assert sig.wss.kind == "any"
assert sig.total_len.kind == "any"
assert sig.options[0].kind == "M"
assert sig.options[0].value is not None and sig.options[0].value.kind == "any"
def test_parse_line_mss_multiple_window() -> None:
sig = _parse_line("S4:64:1:60:M*,S,T,N,W*:.:Linux:generic")
assert sig.wss.kind == "mss_mul" and sig.wss.value == 4
def test_parse_line_mtu_multiple_window() -> None:
sig = _parse_line("T3:64:1:60:M*,S,T,N,W*:.:Solaris:10")
assert sig.wss.kind == "mtu_mul" and sig.wss.value == 3
def test_parse_line_modulo_window() -> None:
sig = _parse_line("%8192:64:1:60:M*,S,T,N,W*:.:Linux:probe")
assert sig.wss.kind == "mod" and sig.wss.value == 8192
def test_parse_line_userland_prefix() -> None:
sig = _parse_line("5840:64:1:60:M*,S,T,N,W*:.:-nmap:syn stealth")
assert sig.is_userland is True
assert sig.os == "nmap"
def test_parse_line_combined_prefixes() -> None:
sig = _parse_line("5840:64:1:60:M*:.:-@Windows:fuzzy match")
assert sig.is_userland is True
assert sig.is_approximate is True
assert sig.os == "Windows"
def test_parse_line_quirks_non_empty() -> None:
sig = _parse_line("5840:64:1:60:M*,S,T,N,W*:PZ:Linux:with quirks")
assert sig.quirks == frozenset({"P", "Z"})
def test_parse_line_no_options_sentinel() -> None:
sig = _parse_line("5840:64:1:60:.:.:Linux:barebones")
assert len(sig.options) == 1
assert sig.options[0].kind == "."
def test_parse_line_t0_timestamp_distinct_from_t() -> None:
sig = _parse_line("5840:64:1:60:M*,T0:.:Linux:broken timestamps")
assert sig.options[1].kind == "T0"
def test_parse_line_unknown_option_number() -> None:
sig = _parse_line("5840:64:1:60:M*,?47:.:Weird:stack")
unknown = sig.options[1]
assert unknown.kind == "?"
assert unknown.value is not None and unknown.value.value == 47
def test_parse_line_rejects_too_few_fields() -> None:
with pytest.raises(P0fParseError):
_parse_line("5840:64:1:60")
def test_parse_line_rejects_bad_df() -> None:
with pytest.raises(P0fParseError):
_parse_line("5840:64:X:60:M*:.:Linux:bad")
def test_parse_line_rejects_bad_window_token() -> None:
with pytest.raises(P0fParseError):
_parse_line("Kfoo:64:1:60:M*:.:Linux:bad")
def test_parse_line_rejects_malformed_option() -> None:
with pytest.raises(P0fParseError):
_parse_line("5840:64:1:60:!!!wat:.:Linux:bad")
# ─── File-level tests ────────────────────────────────────────────────────────
def test_parse_file_skips_comments_blanks_bad_lines(tmp_path: Path) -> None:
fp = tmp_path / "test.fp"
fp.write_text(
"# comment\n"
"\n"
"5840:64:1:60:M1460,S,T,N,W7:.:Linux:2.6.x\n"
"# another comment\n"
"garbage line that should skip\n"
"8192:128:1:48:M1460,N,W0,N,N,S:.:Windows:XP\n"
)
sigs = parse_p0f_v2(fp)
assert len(sigs) == 2
assert {s.os for s in sigs} == {"Linux", "Windows"}
def test_parse_vendored_syn_db_fully_loads() -> None:
"""The full vendored p0f.fp MUST parse without losing signatures.
Upstream inventory: 262 SYN signatures. A regression that drops rows
would silently degrade OS-fingerprint coverage."""
data = Path(__file__).resolve().parents[3] / "decnet/prober/osfp/p0f/data/p0f.fp"
sigs = parse_p0f_v2(data)
assert len(sigs) == 262, f"expected 262 SYN sigs, parser returned {len(sigs)}"
def test_parse_vendored_all_four_dbs_fully_load() -> None:
"""Same invariant across all four vendored databases."""
base = Path(__file__).resolve().parents[3] / "decnet/prober/osfp/p0f/data"
expected = {"p0f.fp": 262, "p0fa.fp": 61, "p0fr.fp": 46, "p0fo.fp": 6}
for name, want in expected.items():
sigs = parse_p0f_v2(base / name)
assert len(sigs) == want, f"{name}: expected {want}, got {len(sigs)}"
def test_parse_vendored_specificity_in_range() -> None:
"""Every signature's computed specificity must land in [0, 1]."""
data = Path(__file__).resolve().parents[3] / "decnet/prober/osfp/p0f/data/p0f.fp"
for sig in parse_p0f_v2(data):
assert 0.0 <= sig.specificity <= 1.0, (
f"{sig.os}/{sig.flavor}: specificity out of range ({sig.specificity})"
)

View File

@@ -0,0 +1,125 @@
"""Tests for signature matching + scoring."""
from __future__ import annotations
import pytest
from decnet.prober.osfp.p0f.format import _parse_line
def _obs(**overrides):
"""Baseline observation (Linux 2.6 on Ethernet), overridable."""
base = {
"window": 5840,
"ttl": 64,
"df": True,
"total_len": 60,
"options_sig": "M1460,S,T,N,W7",
"quirks": frozenset(),
"mss": 1460,
"wscale": 7,
}
base.update(overrides)
return base
# ─── Match / no-match ────────────────────────────────────────────────────────
def test_score_exact_match_is_high() -> None:
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:2.6.x literal")
score = sig.score(_obs())
assert score is not None
assert score >= 0.9, f"literal-fields signature should score high, got {score}"
def test_score_wildcard_match_is_lower_than_literal() -> None:
literal = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:literal")
wildcard = _parse_line("*:64:1:*:M*,S,T,N,W*:.:Linux:wildcard")
obs = _obs()
ls = literal.score(obs)
ws = wildcard.score(obs)
assert ls is not None and ws is not None
assert ls > ws, f"literal ({ls}) should outscore wildcard ({ws})"
def test_score_window_mismatch_returns_none() -> None:
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:fixed")
assert sig.score(_obs(window=64240)) is None
def test_score_ttl_mismatch_returns_none() -> None:
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:ttl64")
assert sig.score(_obs(ttl=128)) is None
def test_score_df_mismatch_returns_none() -> None:
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:df-required")
assert sig.score(_obs(df=False)) is None
def test_score_df_wildcard_on_signature_matches_either() -> None:
sig = _parse_line("5840:64:*:60:M1460,S,T,N,W7:.:Linux:any-df")
assert sig.score(_obs(df=True)) is not None
assert sig.score(_obs(df=False)) is not None
def test_score_options_order_mismatch_returns_none() -> None:
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:ordered")
# Same tokens, different order — must NOT match.
assert sig.score(_obs(options_sig="S,T,M1460,N,W7")) is None
def test_score_options_missing_token_returns_none() -> None:
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:5opts")
assert sig.score(_obs(options_sig="M1460,S,T,N")) is None
def test_score_quirks_must_match_as_set() -> None:
sig = _parse_line("5840:64:1:60:M*,S,T,N,W*:PZ:Linux:with PZ")
assert sig.score(_obs(quirks=frozenset({"P", "Z"}))) is not None
assert sig.score(_obs(quirks=frozenset({"P"}))) is None # missing Z
assert sig.score(_obs(quirks=frozenset({"P", "Z", "I"}))) is None # extra I
def test_score_mss_multiple_window() -> None:
# S4 = 4 * MSS. With MSS=1460 → window=5840.
sig = _parse_line("S4:64:1:60:M1460,S,T,N,W7:.:Linux:S4")
assert sig.score(_obs(window=5840, mss=1460)) is not None
# With MSS=536 → S4 expects window=2144
assert sig.score(_obs(window=2144, mss=536)) is not None
assert sig.score(_obs(window=5840, mss=536)) is None
def test_score_modulo_window() -> None:
sig = _parse_line("%8192:64:1:60:M1460,S,T,N,W7:.:Linux:mod8192")
assert sig.score(_obs(window=32768)) is not None
assert sig.score(_obs(window=40960)) is not None
assert sig.score(_obs(window=32769)) is None
def test_score_no_options_sentinel() -> None:
sig = _parse_line("5840:64:1:60:.:.:Linux:no-opts")
assert sig.score(_obs(options_sig="")) is not None
assert sig.score(_obs(options_sig=None)) is not None
assert sig.score(_obs(options_sig="M1460")) is None
def test_score_missing_observation_fields_returns_none() -> None:
"""A signature that requires a specific window can't match when the
observation has no window. This is the safety invariant —
sniffer_rollup may call score() with partial data."""
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:strict")
assert sig.score(_obs(window=None)) is None
assert sig.score(_obs(ttl=None)) is None
def test_score_option_value_wildcard_matches_any_literal() -> None:
sig = _parse_line("5840:64:1:60:M*,S,T,N,W*:.:Linux:wild-mss-wscale")
assert sig.score(_obs(options_sig="M1460,S,T,N,W7")) is not None
assert sig.score(_obs(options_sig="M536,S,T,N,W2")) is not None
def test_score_option_value_modulo() -> None:
sig = _parse_line("5840:64:1:60:M%4,S,T,N,W7:.:Linux:mss-mod-4")
assert sig.score(_obs(options_sig="M1460,S,T,N,W7")) is not None # 1460 % 4 == 0
assert sig.score(_obs(options_sig="M1461,S,T,N,W7")) is None