feat(prober/osfp): p0f v2 .fp parser + Signature scoring
First code layer of the OS-fingerprinting work on top of yesterday's vendored p0f v2 database. Three new modules, all pure (no I/O outside of the parser's file read): - decnet/prober/osfp/base.py — Provider protocol + OsMatch dataclass matching the established Provider convention in decnet/geoip and decnet/bus. Docstring spells out the never-raise invariant: malformed input returns None, so a single bad event can't wedge a whole attacker-profile rebuild. - decnet/prober/osfp/p0f/signature.py — Signature dataclass + three predicate helpers (WindowSpec / IntSpec / OptionToken) encoding the p0f v2 DSL's wildcard / modulo / MSS-multiple / MTU-multiple semantics. Scoring is our extension on top of upstream p0f's first-match-wins policy: each signature carries a precomputed specificity in [0, 1] so the factory can pick the most-specific match when multiple signatures fire against one observation. - decnet/prober/osfp/p0f/format.py — .fp line parser. Every shipped field variant from the DSL spec at the top of p0f.fp is covered (Snn / Tnn / %nnn / * for window; T0 vs T; -/@/* os-genre prefixes; quirks as concatenated single-letter flags; '.' sentinels for no-options / no-quirks). Malformed lines log a warning and skip instead of aborting the whole file — 1 bad row must not cost the other 374. 20 parser tests + 14 scoring tests. Full vendored-DB smoke tests confirm all 375 signatures parse round-trip (262 SYN + 61 SYN-ACK + 46 RST + 6 stray) and every computed specificity lands in [0, 1].
This commit is contained in:
59
decnet/prober/osfp/base.py
Normal file
59
decnet/prober/osfp/base.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""OS-fingerprint provider protocol + OsMatch result shape.
|
||||
|
||||
Each concrete provider (p0f v2 today; nmap-osdb / DECNET-observed DB
|
||||
later) implements `Provider`. Callers go through
|
||||
:func:`decnet.prober.osfp.factory.get_provider` or
|
||||
:func:`decnet.prober.osfp.factory.get_all_providers` — direct imports
|
||||
of a concrete class are forbidden, mirroring the convention in
|
||||
``decnet/geoip`` and ``decnet/bus``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OsMatch:
|
||||
"""The result of matching an observation against a provider's DB.
|
||||
|
||||
Consumers should prefer higher ``confidence``. Providers compute
|
||||
confidence as the fraction of signature fields that matched exactly
|
||||
(vs. wildcard / modulo / "any" predicates) — a signature with every
|
||||
field constrained scoring 1.0, one with every field wildcarded
|
||||
approaching 0.0. This is explicit so the profiler can pick the
|
||||
most-specific match when multiple providers fire.
|
||||
"""
|
||||
|
||||
os: str
|
||||
flavor: str
|
||||
confidence: float
|
||||
provider: str
|
||||
is_userland: bool = False
|
||||
|
||||
def __str__(self) -> str:
|
||||
tag = "userland" if self.is_userland else self.os
|
||||
return f"{tag} {self.flavor} ({self.confidence:.2f} via {self.provider})"
|
||||
|
||||
|
||||
class Provider(ABC):
|
||||
"""Abstract OS-fingerprint source.
|
||||
|
||||
Providers consume a dict of observed TCP/IP quirks (``window``,
|
||||
``wscale``, ``mss``, ``options_sig``, ``ttl``, ``df``,
|
||||
``total_len``, ``quirks`` — not all fields required) and return a
|
||||
best-match :class:`OsMatch` or ``None`` when nothing matches.
|
||||
|
||||
Providers MUST NOT raise on malformed or partial input — the
|
||||
upstream caller (`profiler/fingerprint.py::sniffer_rollup`) runs
|
||||
on data that may be missing any or all fields depending on the
|
||||
event mix, and a raising provider would wedge every attacker
|
||||
profile rebuild. Return ``None`` instead.
|
||||
"""
|
||||
|
||||
name: str
|
||||
|
||||
@abstractmethod
|
||||
def match(self, obs: dict[str, Any]) -> Optional[OsMatch]:
|
||||
"""Return best-match OsMatch for *obs*, or None."""
|
||||
243
decnet/prober/osfp/p0f/format.py
Normal file
243
decnet/prober/osfp/p0f/format.py
Normal file
@@ -0,0 +1,243 @@
|
||||
"""p0f v2 ``.fp`` file parser.
|
||||
|
||||
Format (from the DSL spec at the top of every shipped ``.fp`` file):
|
||||
|
||||
wwww:ttt:D:ss:OOO:QQ:OS:Details
|
||||
|
||||
Where:
|
||||
wwww — window size: literal int | '*' | '%nnn' | 'Snn' | 'Tnn'
|
||||
ttt — initial TTL (literal int: 32/64/128/255 typically)
|
||||
D — DF bit: '0' or '1'
|
||||
ss — total IP packet length: literal int | '*' | '%nnn'
|
||||
OOO — option order: comma/space-separated tokens, or '.' for none.
|
||||
Tokens: N, E, S, T, T0, P, Wnnn/W*/W%nnn, Mnnn/M*/M%nnn, ?n
|
||||
QQ — quirks: concatenated single-letter flags, or '.' for none.
|
||||
Flags: P, Z, I, U, X, A, T, F, D, !, K, Q, 0, R
|
||||
OS — genre, optionally prefixed '-' (userland), '@' (group),
|
||||
'*' (random/bogus), or combinations (e.g. '-@Windows').
|
||||
Details — free-text flavor/version.
|
||||
|
||||
Lines starting with '#' and blank lines are skipped.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from decnet.prober.osfp.p0f.signature import (
|
||||
IntSpec,
|
||||
OptionToken,
|
||||
Signature,
|
||||
WindowSpec,
|
||||
precompute_specificity,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("decnet.prober.osfp.p0f.format")
|
||||
|
||||
_OPTION_TOKEN_RE = re.compile(r"^([NESTPE]|T0|[MW\?])(\*|%\d+|\d+)?$")
|
||||
|
||||
|
||||
class P0fParseError(ValueError):
|
||||
"""Raised on genuinely malformed signature lines. The loader
|
||||
catches these and skips the offending line with a logger warning —
|
||||
one bad row doesn't disable the whole DB."""
|
||||
|
||||
|
||||
def parse_p0f_v2(path: Path) -> list[Signature]:
|
||||
"""Parse a p0f v2 ``.fp`` file and return a list of Signatures.
|
||||
|
||||
Malformed lines are logged at WARNING and skipped rather than
|
||||
aborting the whole load — the vendored DB has ~375 entries and one
|
||||
corrupt row shouldn't prevent the other 374 from being usable.
|
||||
"""
|
||||
out: list[Signature] = []
|
||||
with path.open("r", encoding="utf-8", errors="replace") as fh:
|
||||
for lineno, raw in enumerate(fh, 1):
|
||||
line = raw.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
try:
|
||||
sig = _parse_line(line)
|
||||
except P0fParseError as exc:
|
||||
logger.warning(
|
||||
"p0f parse: skipping %s:%d — %s", path.name, lineno, exc,
|
||||
)
|
||||
continue
|
||||
out.append(sig)
|
||||
logger.debug("p0f parse: loaded %d signatures from %s", len(out), path.name)
|
||||
return out
|
||||
|
||||
|
||||
def _parse_line(line: str) -> Signature:
|
||||
parts = line.split(":", 7)
|
||||
if len(parts) < 7:
|
||||
raise P0fParseError(f"expected 7+ colon-delimited fields, got {len(parts)}")
|
||||
if len(parts) == 7:
|
||||
parts = [*parts, ""] # empty details
|
||||
wss_s, ttl_s, df_s, tot_s, opts_s, quirks_s, os_s, details = parts
|
||||
|
||||
wss = _parse_wss(wss_s)
|
||||
ttl = _parse_int_field(ttl_s, field="ttl")
|
||||
df = _parse_df(df_s)
|
||||
total_len = _parse_int_spec(tot_s)
|
||||
options = _parse_options(opts_s)
|
||||
quirks = _parse_quirks(quirks_s)
|
||||
os_name, is_userland, is_approx, is_random = _parse_os_genre(os_s)
|
||||
|
||||
sig = Signature(
|
||||
wss=wss,
|
||||
ttl=ttl,
|
||||
df=df,
|
||||
total_len=total_len,
|
||||
options=options,
|
||||
quirks=quirks,
|
||||
os=os_name,
|
||||
flavor=details.strip(),
|
||||
notes="",
|
||||
is_userland=is_userland,
|
||||
is_approximate=is_approx,
|
||||
is_random=is_random,
|
||||
)
|
||||
# Replace specificity (frozen dataclass field default) with the
|
||||
# computed value via dataclasses.replace.
|
||||
from dataclasses import replace
|
||||
return replace(sig, specificity=precompute_specificity(sig))
|
||||
|
||||
|
||||
def _parse_wss(s: str) -> WindowSpec:
|
||||
s = s.strip()
|
||||
if s == "*":
|
||||
return WindowSpec("any")
|
||||
if s.startswith("%"):
|
||||
try:
|
||||
return WindowSpec("mod", int(s[1:]))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad mod window {s!r}") from exc
|
||||
if s.startswith("S"):
|
||||
try:
|
||||
return WindowSpec("mss_mul", int(s[1:]))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad Snn window {s!r}") from exc
|
||||
if s.startswith("T"):
|
||||
try:
|
||||
return WindowSpec("mtu_mul", int(s[1:]))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad Tnn window {s!r}") from exc
|
||||
try:
|
||||
return WindowSpec("literal", int(s))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad literal window {s!r}") from exc
|
||||
|
||||
|
||||
def _parse_int_field(s: str, *, field: str) -> int:
|
||||
"""Parse a bare int field (used for TTL). No wildcards allowed."""
|
||||
try:
|
||||
return int(s.strip())
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad {field}: {s!r}") from exc
|
||||
|
||||
|
||||
def _parse_df(s: str) -> Optional[bool]:
|
||||
s = s.strip()
|
||||
if s == "*":
|
||||
return None
|
||||
if s == "0":
|
||||
return False
|
||||
if s == "1":
|
||||
return True
|
||||
raise P0fParseError(f"bad DF {s!r}; expected 0/1/*")
|
||||
|
||||
|
||||
def _parse_int_spec(s: str) -> IntSpec:
|
||||
s = s.strip()
|
||||
if s == "*":
|
||||
return IntSpec("any")
|
||||
if s.startswith("%"):
|
||||
try:
|
||||
return IntSpec("mod", int(s[1:]))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad mod int {s!r}") from exc
|
||||
try:
|
||||
return IntSpec("literal", int(s))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad literal int {s!r}") from exc
|
||||
|
||||
|
||||
def _parse_options(s: str) -> tuple[OptionToken, ...]:
|
||||
s = s.strip()
|
||||
if s in (".", ""):
|
||||
return (OptionToken("."),)
|
||||
normalized = s.replace(",", " ")
|
||||
tokens: list[OptionToken] = []
|
||||
for raw in normalized.split():
|
||||
tok = raw.strip()
|
||||
if not tok:
|
||||
continue
|
||||
tokens.append(_parse_option_token(tok))
|
||||
if not tokens:
|
||||
return (OptionToken("."),)
|
||||
return tuple(tokens)
|
||||
|
||||
|
||||
def _parse_option_token(raw: str) -> OptionToken:
|
||||
# T0 — timestamp zero (not the TCP option '?0').
|
||||
if raw == "T0":
|
||||
return OptionToken("T0")
|
||||
m = _OPTION_TOKEN_RE.match(raw)
|
||||
if not m:
|
||||
raise P0fParseError(f"bad option token {raw!r}")
|
||||
kind, val_raw = m.group(1), m.group(2)
|
||||
if kind in ("N", "E", "S", "T", "P"):
|
||||
return OptionToken(kind)
|
||||
# M / W / ? expect a numeric predicate (or wildcard).
|
||||
if val_raw is None:
|
||||
raise P0fParseError(f"option {kind!r} missing required value")
|
||||
if val_raw == "*":
|
||||
spec = IntSpec("any")
|
||||
elif val_raw.startswith("%"):
|
||||
try:
|
||||
spec = IntSpec("mod", int(val_raw[1:]))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad {kind} mod value {val_raw!r}") from exc
|
||||
else:
|
||||
try:
|
||||
spec = IntSpec("literal", int(val_raw))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad {kind} literal value {val_raw!r}") from exc
|
||||
return OptionToken(kind, spec)
|
||||
|
||||
|
||||
def _parse_quirks(s: str) -> frozenset[str]:
|
||||
s = s.strip()
|
||||
if s == "." or not s:
|
||||
return frozenset()
|
||||
# Quirks are a concatenated string of single-letter flags. '!' is a
|
||||
# valid quirk too.
|
||||
return frozenset(c for c in s if not c.isspace())
|
||||
|
||||
|
||||
def _parse_os_genre(s: str) -> tuple[str, bool, bool, bool]:
|
||||
"""Strip p0f's genre-prefix modifiers and return (os_name, is_userland, is_approx, is_random)."""
|
||||
is_userland = False
|
||||
is_approx = False
|
||||
is_random = False
|
||||
s = s.strip()
|
||||
# Prefixes can stack in any order — strip them all.
|
||||
changed = True
|
||||
while changed and s:
|
||||
changed = False
|
||||
if s.startswith("-"):
|
||||
is_userland = True
|
||||
s = s[1:]
|
||||
changed = True
|
||||
elif s.startswith("@"):
|
||||
is_approx = True
|
||||
s = s[1:]
|
||||
changed = True
|
||||
elif s.startswith("*"):
|
||||
is_random = True
|
||||
s = s[1:]
|
||||
changed = True
|
||||
return s, is_userland, is_approx, is_random
|
||||
278
decnet/prober/osfp/p0f/signature.py
Normal file
278
decnet/prober/osfp/p0f/signature.py
Normal file
@@ -0,0 +1,278 @@
|
||||
"""p0f v2 signature + observation matching/scoring.
|
||||
|
||||
A :class:`Signature` is one parsed row from a ``.fp`` file. A match
|
||||
against an observation dict (the kind ``sniffer_rollup`` hands us)
|
||||
returns a confidence score in [0, 1], with higher scores indicating
|
||||
more-specific matches. Wildcards and modulo predicates match but
|
||||
contribute less to the confidence than an exact literal match, so
|
||||
when multiple signatures fire against one observation we can pick the
|
||||
most-specific one.
|
||||
|
||||
Observation dict shape (all keys optional — a provider returns None
|
||||
if too few match-relevant fields are present):
|
||||
|
||||
{
|
||||
"window": int | None, # TCP window size
|
||||
"mss": int | None, # TCP MSS option value
|
||||
"wscale": int | None, # TCP window-scale option value
|
||||
"ttl": int | None, # initial-TTL bucket (32/64/128/255)
|
||||
"df": bool | None, # IP Don't-Fragment flag
|
||||
"total_len": int | None, # IP total length (SYN)
|
||||
"options_sig": str | None, # e.g. "M,N,W,T" or "M1460,N,W7,S"
|
||||
"quirks": frozenset[str] | None, # e.g. {"Z", "P"}
|
||||
}
|
||||
|
||||
The scoring is our extension — upstream p0f is "first match wins"
|
||||
using the order of entries in ``.fp``. We score so the factory can
|
||||
compare across multiple DB files (p0f.fp + p0fa.fp) and return the
|
||||
winner objectively.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
# ─── Field predicates ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WindowSpec:
|
||||
"""Parsed 'wss' field. Encodes p0f v2's window-size predicate DSL:
|
||||
|
||||
- 'literal' → observed window == value
|
||||
- 'mss_mul' → observed window == MSS * value (p0f "Snn")
|
||||
- 'mtu_mul' → observed window == (MSS+40) * value (p0f "Tnn")
|
||||
- 'mod' → observed window % value == 0 (p0f "%nnn")
|
||||
- 'any' → wildcard (p0f "*")
|
||||
"""
|
||||
|
||||
kind: str
|
||||
value: Optional[int] = None
|
||||
|
||||
def matches(self, window: Optional[int], mss: Optional[int]) -> bool:
|
||||
if self.kind == "any":
|
||||
return True
|
||||
if window is None:
|
||||
return False
|
||||
if self.kind == "literal":
|
||||
return window == self.value
|
||||
if self.kind == "mod":
|
||||
return self.value is not None and self.value > 0 and (window % self.value == 0)
|
||||
if self.kind == "mss_mul":
|
||||
return mss is not None and self.value is not None and window == mss * self.value
|
||||
if self.kind == "mtu_mul":
|
||||
return mss is not None and self.value is not None and window == (mss + 40) * self.value
|
||||
return False
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IntSpec:
|
||||
"""Wildcard-or-modulo int predicate, used for MSS / wscale / total_len."""
|
||||
|
||||
kind: str # 'literal' | 'mod' | 'any'
|
||||
value: Optional[int] = None
|
||||
|
||||
def matches(self, observed: Optional[int]) -> bool:
|
||||
if self.kind == "any":
|
||||
return True
|
||||
if observed is None:
|
||||
return False
|
||||
if self.kind == "literal":
|
||||
return observed == self.value
|
||||
if self.kind == "mod":
|
||||
return self.value is not None and self.value > 0 and (observed % self.value == 0)
|
||||
return False
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OptionToken:
|
||||
"""One TCP option as it appears in a signature's options list.
|
||||
|
||||
- kind='N' EOL 'E' SACK-permitted 'S' timestamp 'T' zero-timestamp 'T0'
|
||||
- kind='M' MSS option, value = IntSpec
|
||||
- kind='W' window-scale option, value = IntSpec
|
||||
- kind='?' unknown option number, value = IntSpec (literal = option number)
|
||||
- kind='.' no-options sentinel (singleton — matches only empty option list)
|
||||
"""
|
||||
|
||||
kind: str
|
||||
value: Optional[IntSpec] = None
|
||||
|
||||
def matches_literal(self, token: "OptionToken") -> bool:
|
||||
"""True when *this* signature token matches an observed *token*.
|
||||
|
||||
Signature-side carries the wildcard/modulo predicate; observed
|
||||
side is always a literal (or kind-only for flag options).
|
||||
"""
|
||||
if self.kind != token.kind:
|
||||
return False
|
||||
if self.value is None:
|
||||
return True
|
||||
if token.value is None:
|
||||
return False
|
||||
# Both have IntSpecs — match via predicate.
|
||||
return self.value.matches(token.value.value)
|
||||
|
||||
|
||||
# ─── Signature ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Signature:
|
||||
"""One parsed row from a p0f v2 .fp file.
|
||||
|
||||
``label_prefix`` captures p0f's os-genre modifiers:
|
||||
- ``-`` userland stack (not a real OS; flagged scanner/browser)
|
||||
- ``@`` approximate / group match
|
||||
- ``*`` random or bogus userland
|
||||
These prefixes are stripped from ``os``; the flags survive here
|
||||
for the profiler to decide e.g. "do I promote nmap to tool_guesses?"
|
||||
"""
|
||||
|
||||
wss: WindowSpec
|
||||
ttl: int
|
||||
df: Optional[bool]
|
||||
total_len: IntSpec
|
||||
options: tuple[OptionToken, ...] # in order; use (OptionToken('.'),) for none
|
||||
quirks: frozenset[str]
|
||||
os: str
|
||||
flavor: str
|
||||
notes: str
|
||||
is_userland: bool = False # '-' prefix
|
||||
is_approximate: bool = False # '@' prefix
|
||||
is_random: bool = False # '*' prefix (distinct from wildcard)
|
||||
|
||||
# Cache: a crude "specificity budget" precomputed at parse time.
|
||||
# Higher = more constrained fields, used as a tie-breaker when two
|
||||
# signatures match the same observation.
|
||||
specificity: float = field(default=0.0)
|
||||
|
||||
def score(self, obs: dict[str, Any]) -> Optional[float]:
|
||||
"""Return a confidence in [0, 1] on match, or None if any field
|
||||
rejects the observation."""
|
||||
mss = obs.get("mss")
|
||||
# Window
|
||||
if not self.wss.matches(obs.get("window"), mss):
|
||||
return None
|
||||
# TTL — initial-TTL bucket must match exactly. The profiler is
|
||||
# expected to have rounded the observed TTL up to the nearest
|
||||
# bucket already via decnet.sniffer.p0f.initial_ttl.
|
||||
obs_ttl = obs.get("ttl")
|
||||
if obs_ttl is None or obs_ttl != self.ttl:
|
||||
return None
|
||||
# DF (None on the sig side = wildcard)
|
||||
if self.df is not None:
|
||||
obs_df = obs.get("df")
|
||||
if obs_df is None or bool(obs_df) != self.df:
|
||||
return None
|
||||
# Total length
|
||||
if not self.total_len.matches(obs.get("total_len")):
|
||||
return None
|
||||
# Options
|
||||
if not _options_match(self.options, obs.get("options_sig")):
|
||||
return None
|
||||
# Quirks — must match as a set.
|
||||
obs_quirks = obs.get("quirks") or frozenset()
|
||||
if not isinstance(obs_quirks, frozenset):
|
||||
obs_quirks = frozenset(obs_quirks)
|
||||
if self.quirks != obs_quirks:
|
||||
return None
|
||||
# All fields matched — return the precomputed specificity.
|
||||
return self.specificity
|
||||
|
||||
|
||||
def _options_match(sig_opts: tuple[OptionToken, ...], obs_sig: Optional[str]) -> bool:
|
||||
"""Match signature option sequence against observation's comma/space-
|
||||
separated option string."""
|
||||
obs_tokens = _parse_observation_options(obs_sig)
|
||||
# Special case: signature is '.' (no-options sentinel).
|
||||
if len(sig_opts) == 1 and sig_opts[0].kind == ".":
|
||||
return len(obs_tokens) == 0
|
||||
if len(sig_opts) != len(obs_tokens):
|
||||
return False
|
||||
return all(s.matches_literal(o) for s, o in zip(sig_opts, obs_tokens))
|
||||
|
||||
|
||||
_OBS_TOKEN_RE = re.compile(r"^([A-Z\?])(\d+)?$")
|
||||
|
||||
|
||||
def _parse_observation_options(opts_sig: Optional[str]) -> list[OptionToken]:
|
||||
"""Convert the observation-side options string (from
|
||||
tcp_syn_fingerprint / tcpfp_fingerprint SD fields) into a list of
|
||||
literal OptionTokens. Accepts comma or space delimiters and tokens
|
||||
like 'M1460', 'W7', 'T', 'T0', 'N', 'E', '?47'.
|
||||
"""
|
||||
if not opts_sig:
|
||||
return []
|
||||
normalized = opts_sig.replace(",", " ")
|
||||
out: list[OptionToken] = []
|
||||
for raw in normalized.split():
|
||||
token = raw.strip()
|
||||
if not token:
|
||||
continue
|
||||
if token == "T0": # nosec B105 — TCP option name ("Timestamp zero"), not a credential
|
||||
out.append(OptionToken("T0"))
|
||||
continue
|
||||
m = _OBS_TOKEN_RE.match(token)
|
||||
if not m:
|
||||
# Unknown token — represent as opaque "?" with no value so
|
||||
# nothing matches it. Better than raising.
|
||||
out.append(OptionToken("?", IntSpec("literal", -1)))
|
||||
continue
|
||||
kind, num = m.group(1), m.group(2)
|
||||
if num is None:
|
||||
out.append(OptionToken(kind))
|
||||
else:
|
||||
out.append(OptionToken(kind, IntSpec("literal", int(num))))
|
||||
return out
|
||||
|
||||
|
||||
def precompute_specificity(sig: Signature) -> float:
|
||||
"""Crude specificity score used when comparing matching signatures.
|
||||
|
||||
Each field contributes a weight; wildcards and modulo predicates
|
||||
contribute less. Tuned so a fully-literal signature scores ~1.0 and
|
||||
a near-wildcard signature scores ~0.1.
|
||||
"""
|
||||
w = 0.0
|
||||
total = 0.0
|
||||
# Window (weight 3 — very discriminating)
|
||||
total += 3
|
||||
if sig.wss.kind == "literal":
|
||||
w += 3.0
|
||||
elif sig.wss.kind in ("mss_mul", "mtu_mul"):
|
||||
w += 2.5
|
||||
elif sig.wss.kind == "mod":
|
||||
w += 1.5
|
||||
# TTL — always literal, contributes a flat 1
|
||||
total += 1
|
||||
w += 1.0
|
||||
# DF (weight 1)
|
||||
total += 1
|
||||
if sig.df is not None:
|
||||
w += 1.0
|
||||
# Total length (weight 1)
|
||||
total += 1
|
||||
if sig.total_len.kind == "literal":
|
||||
w += 1.0
|
||||
elif sig.total_len.kind == "mod":
|
||||
w += 0.5
|
||||
# Options (weight 3 — highly discriminating when literal)
|
||||
total += 3
|
||||
if not (len(sig.options) == 1 and sig.options[0].kind == "."):
|
||||
literal_opts = sum(
|
||||
1 for o in sig.options
|
||||
if o.value is None or o.value.kind == "literal"
|
||||
)
|
||||
if sig.options:
|
||||
w += 3.0 * (literal_opts / len(sig.options))
|
||||
else:
|
||||
# "no options" is itself a signal.
|
||||
w += 2.0
|
||||
# Quirks (weight 1 — most sigs have no quirks so this is a small edge)
|
||||
total += 1
|
||||
if sig.quirks:
|
||||
w += 1.0
|
||||
return round(w / total, 4)
|
||||
0
tests/prober/osfp/__init__.py
Normal file
0
tests/prober/osfp/__init__.py
Normal file
152
tests/prober/osfp/test_format.py
Normal file
152
tests/prober/osfp/test_format.py
Normal file
@@ -0,0 +1,152 @@
|
||||
"""Tests for the p0f v2 .fp parser (decnet/prober/osfp/p0f/format.py)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.prober.osfp.p0f.format import P0fParseError, _parse_line, parse_p0f_v2
|
||||
|
||||
|
||||
# ─── Line-parser unit tests ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_parse_line_minimal_literal() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:2.6.x kernel")
|
||||
assert sig.os == "Linux"
|
||||
assert sig.flavor == "2.6.x kernel"
|
||||
assert sig.ttl == 64
|
||||
assert sig.df is True
|
||||
assert sig.wss.kind == "literal" and sig.wss.value == 5840
|
||||
assert sig.total_len.kind == "literal" and sig.total_len.value == 60
|
||||
assert len(sig.options) == 5
|
||||
# First option: MSS=1460
|
||||
mss_opt = sig.options[0]
|
||||
assert mss_opt.kind == "M"
|
||||
assert mss_opt.value is not None and mss_opt.value.value == 1460
|
||||
assert sig.quirks == frozenset()
|
||||
assert not sig.is_userland
|
||||
|
||||
|
||||
def test_parse_line_wildcard_window() -> None:
|
||||
sig = _parse_line("*:128:1:*:M*,S,T,N,W*:.:Windows:XP SP1+")
|
||||
assert sig.wss.kind == "any"
|
||||
assert sig.total_len.kind == "any"
|
||||
assert sig.options[0].kind == "M"
|
||||
assert sig.options[0].value is not None and sig.options[0].value.kind == "any"
|
||||
|
||||
|
||||
def test_parse_line_mss_multiple_window() -> None:
|
||||
sig = _parse_line("S4:64:1:60:M*,S,T,N,W*:.:Linux:generic")
|
||||
assert sig.wss.kind == "mss_mul" and sig.wss.value == 4
|
||||
|
||||
|
||||
def test_parse_line_mtu_multiple_window() -> None:
|
||||
sig = _parse_line("T3:64:1:60:M*,S,T,N,W*:.:Solaris:10")
|
||||
assert sig.wss.kind == "mtu_mul" and sig.wss.value == 3
|
||||
|
||||
|
||||
def test_parse_line_modulo_window() -> None:
|
||||
sig = _parse_line("%8192:64:1:60:M*,S,T,N,W*:.:Linux:probe")
|
||||
assert sig.wss.kind == "mod" and sig.wss.value == 8192
|
||||
|
||||
|
||||
def test_parse_line_userland_prefix() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M*,S,T,N,W*:.:-nmap:syn stealth")
|
||||
assert sig.is_userland is True
|
||||
assert sig.os == "nmap"
|
||||
|
||||
|
||||
def test_parse_line_combined_prefixes() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M*:.:-@Windows:fuzzy match")
|
||||
assert sig.is_userland is True
|
||||
assert sig.is_approximate is True
|
||||
assert sig.os == "Windows"
|
||||
|
||||
|
||||
def test_parse_line_quirks_non_empty() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M*,S,T,N,W*:PZ:Linux:with quirks")
|
||||
assert sig.quirks == frozenset({"P", "Z"})
|
||||
|
||||
|
||||
def test_parse_line_no_options_sentinel() -> None:
|
||||
sig = _parse_line("5840:64:1:60:.:.:Linux:barebones")
|
||||
assert len(sig.options) == 1
|
||||
assert sig.options[0].kind == "."
|
||||
|
||||
|
||||
def test_parse_line_t0_timestamp_distinct_from_t() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M*,T0:.:Linux:broken timestamps")
|
||||
assert sig.options[1].kind == "T0"
|
||||
|
||||
|
||||
def test_parse_line_unknown_option_number() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M*,?47:.:Weird:stack")
|
||||
unknown = sig.options[1]
|
||||
assert unknown.kind == "?"
|
||||
assert unknown.value is not None and unknown.value.value == 47
|
||||
|
||||
|
||||
def test_parse_line_rejects_too_few_fields() -> None:
|
||||
with pytest.raises(P0fParseError):
|
||||
_parse_line("5840:64:1:60")
|
||||
|
||||
|
||||
def test_parse_line_rejects_bad_df() -> None:
|
||||
with pytest.raises(P0fParseError):
|
||||
_parse_line("5840:64:X:60:M*:.:Linux:bad")
|
||||
|
||||
|
||||
def test_parse_line_rejects_bad_window_token() -> None:
|
||||
with pytest.raises(P0fParseError):
|
||||
_parse_line("Kfoo:64:1:60:M*:.:Linux:bad")
|
||||
|
||||
|
||||
def test_parse_line_rejects_malformed_option() -> None:
|
||||
with pytest.raises(P0fParseError):
|
||||
_parse_line("5840:64:1:60:!!!wat:.:Linux:bad")
|
||||
|
||||
|
||||
# ─── File-level tests ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_parse_file_skips_comments_blanks_bad_lines(tmp_path: Path) -> None:
|
||||
fp = tmp_path / "test.fp"
|
||||
fp.write_text(
|
||||
"# comment\n"
|
||||
"\n"
|
||||
"5840:64:1:60:M1460,S,T,N,W7:.:Linux:2.6.x\n"
|
||||
"# another comment\n"
|
||||
"garbage line that should skip\n"
|
||||
"8192:128:1:48:M1460,N,W0,N,N,S:.:Windows:XP\n"
|
||||
)
|
||||
sigs = parse_p0f_v2(fp)
|
||||
assert len(sigs) == 2
|
||||
assert {s.os for s in sigs} == {"Linux", "Windows"}
|
||||
|
||||
|
||||
def test_parse_vendored_syn_db_fully_loads() -> None:
|
||||
"""The full vendored p0f.fp MUST parse without losing signatures.
|
||||
Upstream inventory: 262 SYN signatures. A regression that drops rows
|
||||
would silently degrade OS-fingerprint coverage."""
|
||||
data = Path(__file__).resolve().parents[3] / "decnet/prober/osfp/p0f/data/p0f.fp"
|
||||
sigs = parse_p0f_v2(data)
|
||||
assert len(sigs) == 262, f"expected 262 SYN sigs, parser returned {len(sigs)}"
|
||||
|
||||
|
||||
def test_parse_vendored_all_four_dbs_fully_load() -> None:
|
||||
"""Same invariant across all four vendored databases."""
|
||||
base = Path(__file__).resolve().parents[3] / "decnet/prober/osfp/p0f/data"
|
||||
expected = {"p0f.fp": 262, "p0fa.fp": 61, "p0fr.fp": 46, "p0fo.fp": 6}
|
||||
for name, want in expected.items():
|
||||
sigs = parse_p0f_v2(base / name)
|
||||
assert len(sigs) == want, f"{name}: expected {want}, got {len(sigs)}"
|
||||
|
||||
|
||||
def test_parse_vendored_specificity_in_range() -> None:
|
||||
"""Every signature's computed specificity must land in [0, 1]."""
|
||||
data = Path(__file__).resolve().parents[3] / "decnet/prober/osfp/p0f/data/p0f.fp"
|
||||
for sig in parse_p0f_v2(data):
|
||||
assert 0.0 <= sig.specificity <= 1.0, (
|
||||
f"{sig.os}/{sig.flavor}: specificity out of range ({sig.specificity})"
|
||||
)
|
||||
125
tests/prober/osfp/test_signature.py
Normal file
125
tests/prober/osfp/test_signature.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""Tests for signature matching + scoring."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.prober.osfp.p0f.format import _parse_line
|
||||
|
||||
|
||||
def _obs(**overrides):
|
||||
"""Baseline observation (Linux 2.6 on Ethernet), overridable."""
|
||||
base = {
|
||||
"window": 5840,
|
||||
"ttl": 64,
|
||||
"df": True,
|
||||
"total_len": 60,
|
||||
"options_sig": "M1460,S,T,N,W7",
|
||||
"quirks": frozenset(),
|
||||
"mss": 1460,
|
||||
"wscale": 7,
|
||||
}
|
||||
base.update(overrides)
|
||||
return base
|
||||
|
||||
|
||||
# ─── Match / no-match ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_score_exact_match_is_high() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:2.6.x literal")
|
||||
score = sig.score(_obs())
|
||||
assert score is not None
|
||||
assert score >= 0.9, f"literal-fields signature should score high, got {score}"
|
||||
|
||||
|
||||
def test_score_wildcard_match_is_lower_than_literal() -> None:
|
||||
literal = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:literal")
|
||||
wildcard = _parse_line("*:64:1:*:M*,S,T,N,W*:.:Linux:wildcard")
|
||||
obs = _obs()
|
||||
ls = literal.score(obs)
|
||||
ws = wildcard.score(obs)
|
||||
assert ls is not None and ws is not None
|
||||
assert ls > ws, f"literal ({ls}) should outscore wildcard ({ws})"
|
||||
|
||||
|
||||
def test_score_window_mismatch_returns_none() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:fixed")
|
||||
assert sig.score(_obs(window=64240)) is None
|
||||
|
||||
|
||||
def test_score_ttl_mismatch_returns_none() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:ttl64")
|
||||
assert sig.score(_obs(ttl=128)) is None
|
||||
|
||||
|
||||
def test_score_df_mismatch_returns_none() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:df-required")
|
||||
assert sig.score(_obs(df=False)) is None
|
||||
|
||||
|
||||
def test_score_df_wildcard_on_signature_matches_either() -> None:
|
||||
sig = _parse_line("5840:64:*:60:M1460,S,T,N,W7:.:Linux:any-df")
|
||||
assert sig.score(_obs(df=True)) is not None
|
||||
assert sig.score(_obs(df=False)) is not None
|
||||
|
||||
|
||||
def test_score_options_order_mismatch_returns_none() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:ordered")
|
||||
# Same tokens, different order — must NOT match.
|
||||
assert sig.score(_obs(options_sig="S,T,M1460,N,W7")) is None
|
||||
|
||||
|
||||
def test_score_options_missing_token_returns_none() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:5opts")
|
||||
assert sig.score(_obs(options_sig="M1460,S,T,N")) is None
|
||||
|
||||
|
||||
def test_score_quirks_must_match_as_set() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M*,S,T,N,W*:PZ:Linux:with PZ")
|
||||
assert sig.score(_obs(quirks=frozenset({"P", "Z"}))) is not None
|
||||
assert sig.score(_obs(quirks=frozenset({"P"}))) is None # missing Z
|
||||
assert sig.score(_obs(quirks=frozenset({"P", "Z", "I"}))) is None # extra I
|
||||
|
||||
|
||||
def test_score_mss_multiple_window() -> None:
|
||||
# S4 = 4 * MSS. With MSS=1460 → window=5840.
|
||||
sig = _parse_line("S4:64:1:60:M1460,S,T,N,W7:.:Linux:S4")
|
||||
assert sig.score(_obs(window=5840, mss=1460)) is not None
|
||||
# With MSS=536 → S4 expects window=2144
|
||||
assert sig.score(_obs(window=2144, mss=536)) is not None
|
||||
assert sig.score(_obs(window=5840, mss=536)) is None
|
||||
|
||||
|
||||
def test_score_modulo_window() -> None:
|
||||
sig = _parse_line("%8192:64:1:60:M1460,S,T,N,W7:.:Linux:mod8192")
|
||||
assert sig.score(_obs(window=32768)) is not None
|
||||
assert sig.score(_obs(window=40960)) is not None
|
||||
assert sig.score(_obs(window=32769)) is None
|
||||
|
||||
|
||||
def test_score_no_options_sentinel() -> None:
|
||||
sig = _parse_line("5840:64:1:60:.:.:Linux:no-opts")
|
||||
assert sig.score(_obs(options_sig="")) is not None
|
||||
assert sig.score(_obs(options_sig=None)) is not None
|
||||
assert sig.score(_obs(options_sig="M1460")) is None
|
||||
|
||||
|
||||
def test_score_missing_observation_fields_returns_none() -> None:
|
||||
"""A signature that requires a specific window can't match when the
|
||||
observation has no window. This is the safety invariant —
|
||||
sniffer_rollup may call score() with partial data."""
|
||||
sig = _parse_line("5840:64:1:60:M1460,S,T,N,W7:.:Linux:strict")
|
||||
assert sig.score(_obs(window=None)) is None
|
||||
assert sig.score(_obs(ttl=None)) is None
|
||||
|
||||
|
||||
def test_score_option_value_wildcard_matches_any_literal() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M*,S,T,N,W*:.:Linux:wild-mss-wscale")
|
||||
assert sig.score(_obs(options_sig="M1460,S,T,N,W7")) is not None
|
||||
assert sig.score(_obs(options_sig="M536,S,T,N,W2")) is not None
|
||||
|
||||
|
||||
def test_score_option_value_modulo() -> None:
|
||||
sig = _parse_line("5840:64:1:60:M%4,S,T,N,W7:.:Linux:mss-mod-4")
|
||||
assert sig.score(_obs(options_sig="M1460,S,T,N,W7")) is not None # 1460 % 4 == 0
|
||||
assert sig.score(_obs(options_sig="M1461,S,T,N,W7")) is None
|
||||
Reference in New Issue
Block a user