feat(prober/osfp): p0f v2 .fp parser + Signature scoring
First code layer of the OS-fingerprinting work on top of yesterday's vendored p0f v2 database. Three new modules, all pure (no I/O outside of the parser's file read): - decnet/prober/osfp/base.py — Provider protocol + OsMatch dataclass matching the established Provider convention in decnet/geoip and decnet/bus. Docstring spells out the never-raise invariant: malformed input returns None, so a single bad event can't wedge a whole attacker-profile rebuild. - decnet/prober/osfp/p0f/signature.py — Signature dataclass + three predicate helpers (WindowSpec / IntSpec / OptionToken) encoding the p0f v2 DSL's wildcard / modulo / MSS-multiple / MTU-multiple semantics. Scoring is our extension on top of upstream p0f's first-match-wins policy: each signature carries a precomputed specificity in [0, 1] so the factory can pick the most-specific match when multiple signatures fire against one observation. - decnet/prober/osfp/p0f/format.py — .fp line parser. Every shipped field variant from the DSL spec at the top of p0f.fp is covered (Snn / Tnn / %nnn / * for window; T0 vs T; -/@/* os-genre prefixes; quirks as concatenated single-letter flags; '.' sentinels for no-options / no-quirks). Malformed lines log a warning and skip instead of aborting the whole file — 1 bad row must not cost the other 374. 20 parser tests + 14 scoring tests. Full vendored-DB smoke tests confirm all 375 signatures parse round-trip (262 SYN + 61 SYN-ACK + 46 RST + 6 stray) and every computed specificity lands in [0, 1].
This commit is contained in:
243
decnet/prober/osfp/p0f/format.py
Normal file
243
decnet/prober/osfp/p0f/format.py
Normal file
@@ -0,0 +1,243 @@
|
||||
"""p0f v2 ``.fp`` file parser.
|
||||
|
||||
Format (from the DSL spec at the top of every shipped ``.fp`` file):
|
||||
|
||||
wwww:ttt:D:ss:OOO:QQ:OS:Details
|
||||
|
||||
Where:
|
||||
wwww — window size: literal int | '*' | '%nnn' | 'Snn' | 'Tnn'
|
||||
ttt — initial TTL (literal int: 32/64/128/255 typically)
|
||||
D — DF bit: '0' or '1'
|
||||
ss — total IP packet length: literal int | '*' | '%nnn'
|
||||
OOO — option order: comma/space-separated tokens, or '.' for none.
|
||||
Tokens: N, E, S, T, T0, P, Wnnn/W*/W%nnn, Mnnn/M*/M%nnn, ?n
|
||||
QQ — quirks: concatenated single-letter flags, or '.' for none.
|
||||
Flags: P, Z, I, U, X, A, T, F, D, !, K, Q, 0, R
|
||||
OS — genre, optionally prefixed '-' (userland), '@' (group),
|
||||
'*' (random/bogus), or combinations (e.g. '-@Windows').
|
||||
Details — free-text flavor/version.
|
||||
|
||||
Lines starting with '#' and blank lines are skipped.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from decnet.prober.osfp.p0f.signature import (
|
||||
IntSpec,
|
||||
OptionToken,
|
||||
Signature,
|
||||
WindowSpec,
|
||||
precompute_specificity,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("decnet.prober.osfp.p0f.format")
|
||||
|
||||
_OPTION_TOKEN_RE = re.compile(r"^([NESTPE]|T0|[MW\?])(\*|%\d+|\d+)?$")
|
||||
|
||||
|
||||
class P0fParseError(ValueError):
|
||||
"""Raised on genuinely malformed signature lines. The loader
|
||||
catches these and skips the offending line with a logger warning —
|
||||
one bad row doesn't disable the whole DB."""
|
||||
|
||||
|
||||
def parse_p0f_v2(path: Path) -> list[Signature]:
|
||||
"""Parse a p0f v2 ``.fp`` file and return a list of Signatures.
|
||||
|
||||
Malformed lines are logged at WARNING and skipped rather than
|
||||
aborting the whole load — the vendored DB has ~375 entries and one
|
||||
corrupt row shouldn't prevent the other 374 from being usable.
|
||||
"""
|
||||
out: list[Signature] = []
|
||||
with path.open("r", encoding="utf-8", errors="replace") as fh:
|
||||
for lineno, raw in enumerate(fh, 1):
|
||||
line = raw.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
try:
|
||||
sig = _parse_line(line)
|
||||
except P0fParseError as exc:
|
||||
logger.warning(
|
||||
"p0f parse: skipping %s:%d — %s", path.name, lineno, exc,
|
||||
)
|
||||
continue
|
||||
out.append(sig)
|
||||
logger.debug("p0f parse: loaded %d signatures from %s", len(out), path.name)
|
||||
return out
|
||||
|
||||
|
||||
def _parse_line(line: str) -> Signature:
|
||||
parts = line.split(":", 7)
|
||||
if len(parts) < 7:
|
||||
raise P0fParseError(f"expected 7+ colon-delimited fields, got {len(parts)}")
|
||||
if len(parts) == 7:
|
||||
parts = [*parts, ""] # empty details
|
||||
wss_s, ttl_s, df_s, tot_s, opts_s, quirks_s, os_s, details = parts
|
||||
|
||||
wss = _parse_wss(wss_s)
|
||||
ttl = _parse_int_field(ttl_s, field="ttl")
|
||||
df = _parse_df(df_s)
|
||||
total_len = _parse_int_spec(tot_s)
|
||||
options = _parse_options(opts_s)
|
||||
quirks = _parse_quirks(quirks_s)
|
||||
os_name, is_userland, is_approx, is_random = _parse_os_genre(os_s)
|
||||
|
||||
sig = Signature(
|
||||
wss=wss,
|
||||
ttl=ttl,
|
||||
df=df,
|
||||
total_len=total_len,
|
||||
options=options,
|
||||
quirks=quirks,
|
||||
os=os_name,
|
||||
flavor=details.strip(),
|
||||
notes="",
|
||||
is_userland=is_userland,
|
||||
is_approximate=is_approx,
|
||||
is_random=is_random,
|
||||
)
|
||||
# Replace specificity (frozen dataclass field default) with the
|
||||
# computed value via dataclasses.replace.
|
||||
from dataclasses import replace
|
||||
return replace(sig, specificity=precompute_specificity(sig))
|
||||
|
||||
|
||||
def _parse_wss(s: str) -> WindowSpec:
|
||||
s = s.strip()
|
||||
if s == "*":
|
||||
return WindowSpec("any")
|
||||
if s.startswith("%"):
|
||||
try:
|
||||
return WindowSpec("mod", int(s[1:]))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad mod window {s!r}") from exc
|
||||
if s.startswith("S"):
|
||||
try:
|
||||
return WindowSpec("mss_mul", int(s[1:]))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad Snn window {s!r}") from exc
|
||||
if s.startswith("T"):
|
||||
try:
|
||||
return WindowSpec("mtu_mul", int(s[1:]))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad Tnn window {s!r}") from exc
|
||||
try:
|
||||
return WindowSpec("literal", int(s))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad literal window {s!r}") from exc
|
||||
|
||||
|
||||
def _parse_int_field(s: str, *, field: str) -> int:
|
||||
"""Parse a bare int field (used for TTL). No wildcards allowed."""
|
||||
try:
|
||||
return int(s.strip())
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad {field}: {s!r}") from exc
|
||||
|
||||
|
||||
def _parse_df(s: str) -> Optional[bool]:
|
||||
s = s.strip()
|
||||
if s == "*":
|
||||
return None
|
||||
if s == "0":
|
||||
return False
|
||||
if s == "1":
|
||||
return True
|
||||
raise P0fParseError(f"bad DF {s!r}; expected 0/1/*")
|
||||
|
||||
|
||||
def _parse_int_spec(s: str) -> IntSpec:
|
||||
s = s.strip()
|
||||
if s == "*":
|
||||
return IntSpec("any")
|
||||
if s.startswith("%"):
|
||||
try:
|
||||
return IntSpec("mod", int(s[1:]))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad mod int {s!r}") from exc
|
||||
try:
|
||||
return IntSpec("literal", int(s))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad literal int {s!r}") from exc
|
||||
|
||||
|
||||
def _parse_options(s: str) -> tuple[OptionToken, ...]:
|
||||
s = s.strip()
|
||||
if s in (".", ""):
|
||||
return (OptionToken("."),)
|
||||
normalized = s.replace(",", " ")
|
||||
tokens: list[OptionToken] = []
|
||||
for raw in normalized.split():
|
||||
tok = raw.strip()
|
||||
if not tok:
|
||||
continue
|
||||
tokens.append(_parse_option_token(tok))
|
||||
if not tokens:
|
||||
return (OptionToken("."),)
|
||||
return tuple(tokens)
|
||||
|
||||
|
||||
def _parse_option_token(raw: str) -> OptionToken:
|
||||
# T0 — timestamp zero (not the TCP option '?0').
|
||||
if raw == "T0":
|
||||
return OptionToken("T0")
|
||||
m = _OPTION_TOKEN_RE.match(raw)
|
||||
if not m:
|
||||
raise P0fParseError(f"bad option token {raw!r}")
|
||||
kind, val_raw = m.group(1), m.group(2)
|
||||
if kind in ("N", "E", "S", "T", "P"):
|
||||
return OptionToken(kind)
|
||||
# M / W / ? expect a numeric predicate (or wildcard).
|
||||
if val_raw is None:
|
||||
raise P0fParseError(f"option {kind!r} missing required value")
|
||||
if val_raw == "*":
|
||||
spec = IntSpec("any")
|
||||
elif val_raw.startswith("%"):
|
||||
try:
|
||||
spec = IntSpec("mod", int(val_raw[1:]))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad {kind} mod value {val_raw!r}") from exc
|
||||
else:
|
||||
try:
|
||||
spec = IntSpec("literal", int(val_raw))
|
||||
except ValueError as exc:
|
||||
raise P0fParseError(f"bad {kind} literal value {val_raw!r}") from exc
|
||||
return OptionToken(kind, spec)
|
||||
|
||||
|
||||
def _parse_quirks(s: str) -> frozenset[str]:
|
||||
s = s.strip()
|
||||
if s == "." or not s:
|
||||
return frozenset()
|
||||
# Quirks are a concatenated string of single-letter flags. '!' is a
|
||||
# valid quirk too.
|
||||
return frozenset(c for c in s if not c.isspace())
|
||||
|
||||
|
||||
def _parse_os_genre(s: str) -> tuple[str, bool, bool, bool]:
|
||||
"""Strip p0f's genre-prefix modifiers and return (os_name, is_userland, is_approx, is_random)."""
|
||||
is_userland = False
|
||||
is_approx = False
|
||||
is_random = False
|
||||
s = s.strip()
|
||||
# Prefixes can stack in any order — strip them all.
|
||||
changed = True
|
||||
while changed and s:
|
||||
changed = False
|
||||
if s.startswith("-"):
|
||||
is_userland = True
|
||||
s = s[1:]
|
||||
changed = True
|
||||
elif s.startswith("@"):
|
||||
is_approx = True
|
||||
s = s[1:]
|
||||
changed = True
|
||||
elif s.startswith("*"):
|
||||
is_random = True
|
||||
s = s[1:]
|
||||
changed = True
|
||||
return s, is_userland, is_approx, is_random
|
||||
Reference in New Issue
Block a user