feat(sniffer): IP-ID sequence classifier (random/incremental/zero/constant)

Adds a per-source-IP rolling sample buffer (deque, maxlen=8) for IP-ID
values seen on attacker SYNs and a stdlib-only classifier in
decnet/sniffer/seq_class.py. Each new SYN appends ip.id and re-classifies
the buffer; the result is logged on tcp_syn_fingerprint events alongside
sample count.

The dedup key now folds in ipid_class so a transition from 'unknown' to
a definitive verdict emits exactly one fresh event instead of being
suppressed by the old (os|options) key. Profiler rollup carries the
latest non-'unknown' label into attacker.tcp_fingerprint.

UI surfaces it as a colour-coded tag in the TCP STACK panel: random
neutral, incremental amber, zero/constant green (the strong signal).
This commit is contained in:
2026-04-26 20:28:32 -04:00
parent b0b08754d0
commit 0e40cc8ae1
6 changed files with 204 additions and 3 deletions

View File

@@ -143,6 +143,7 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
ttl_values: list[str] = []
hops: list[int] = []
tcp_fp: dict[str, Any] | None = None
ipid_latest: str | None = None
# Tracks which event set tcp_fp last — picks the provider "context"
# (syn vs synack) when we feed the p0f-v2 matcher below.
tcp_fp_context: str = "syn"
@@ -185,6 +186,13 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
"dscp": _int_or_none(e.fields.get("dscp")),
"ecn": _int_or_none(e.fields.get("ecn")),
}
# Sequence classifications converge as samples accumulate; the
# most recent non-"unknown" label wins so a later "unknown" event
# (e.g. a deque reset) doesn't overwrite a confident verdict.
ipid_class = e.fields.get("ipid_class")
if ipid_class and ipid_class != "unknown":
ipid_latest = ipid_class
tcp_fp["ipid_class"] = ipid_latest
tcp_fp_context = "syn"
elif e.event_type == _SNIFFER_FLOW_EVENT:

View File

@@ -12,10 +12,12 @@ from __future__ import annotations
import hashlib
import struct
import time
from collections import deque
from typing import Any, Callable
from decnet.prober.tcpfp import _extract_options_order
from decnet.sniffer.p0f import guess_os, hop_distance, initial_ttl
from decnet.sniffer.seq_class import classify_sequence
from decnet.sniffer.syslog import SEVERITY_INFO, SEVERITY_WARNING, syslog_line
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
@@ -745,6 +747,12 @@ class SnifferEngine:
self._tcp_syn: dict[tuple[str, int, str, int], dict[str, Any]] = {}
self._tcp_rtt: dict[tuple[str, int, str, int], dict[str, Any]] = {}
# Per-source-IP rolling samples for sequence-pattern classification.
# IP-ID and TCP ISN need multiple SYNs from the same attacker before
# we can label them random/incremental/zero/constant.
self._SEQ_SAMPLE_SIZE = 8
self._ipid_samples: dict[str, deque[int]] = {}
# Per-flow timing aggregator. Key: (src_ip, src_port, dst_ip, dst_port).
# Flow direction is client→decky; reverse packets are associated back
# to the forward flow so we can track retransmits and inter-arrival.
@@ -791,9 +799,16 @@ class SnifferEngine:
if event_type == "tls_certificate":
return fields.get("subject_cn", "") + "|" + fields.get("issuer", "")
if event_type == "tcp_syn_fingerprint":
# Dedupe per (OS signature, options layout). One event per unique
# stack profile from this attacker IP per dedup window.
return fields.get("os_guess", "") + "|" + fields.get("options_sig", "")
# Dedupe per (OS signature, options layout, sequence-pattern
# classification). Including ipid_class/isn_class lets each
# transition (unknown → random/incremental/zero/constant) emit
# exactly one fresh event as samples accumulate.
return (
fields.get("os_guess", "")
+ "|" + fields.get("options_sig", "")
+ "|" + fields.get("ipid_class", "")
+ "|" + fields.get("isn_class", "")
)
if event_type == "tcp_flow_timing":
# Dedup per (attacker_ip, decky_port) — src_port is deliberately
# excluded so a port scanner rotating source ports only produces
@@ -1031,6 +1046,12 @@ class SnifferEngine:
_span.set_attribute("attacker_ip", src_ip)
_span.set_attribute("dst_port", dst_port)
tcp_fp = _extract_tcp_fingerprint(list(tcp.options or []))
ipid_buf = self._ipid_samples.setdefault(
src_ip, deque(maxlen=self._SEQ_SAMPLE_SIZE)
)
ipid_buf.append(int(ip.id))
ipid_class = classify_sequence(list(ipid_buf))
os_label = guess_os(
ttl=ip.ttl,
window=int(tcp.window),
@@ -1059,6 +1080,8 @@ class SnifferEngine:
tos=str(int(getattr(ip, "tos", 0))),
dscp=str((int(getattr(ip, "tos", 0)) >> 2) & 0x3F),
ecn=str(int(getattr(ip, "tos", 0)) & 0x3),
ipid_class=ipid_class,
ipid_samples=str(len(ipid_buf)),
os_guess=os_label,
)

View File

@@ -0,0 +1,63 @@
"""
Sequence-pattern classifier for TCP/IP fields that are useful as a tooling
fingerprint when sampled across multiple packets from the same source.
Two callers today:
- IP-ID sequence per attacker (random/incremental/zero/constant).
- TCP ISN sequence per attacker; modern stacks randomise, so a non-random
result is itself a strong signal (legacy stacks, custom raw-socket tools).
Pure stdlib so it stays trivially unit-testable.
"""
from __future__ import annotations
import statistics
# Minimum samples needed for a meaningful classification. Below this we
# return "unknown" rather than guess from 1-3 noisy values.
_MIN_SAMPLES = 4
# Max plausible delta for an "incremental" classification. The IP-ID field
# is 16-bit so kernel-emitted increments wrap rapidly under load — anything
# over 4096 between consecutive SYNs from the same host is almost certainly
# random rather than a counter we just happen to be sampling sparsely.
_INCREMENTAL_MAX_DELTA = 0x1000
# Coefficient-of-variation threshold above which we call a sequence random.
# stddev/mean > 0.5 is well past anything a counter would produce.
_RANDOM_CV_THRESHOLD = 0.5
def classify_sequence(samples: list[int]) -> str:
"""
Classify an integer sequence as one of:
- "zero": every sample is 0
- "constant": every sample is the same non-zero value
- "incremental": strictly monotonic with small positive deltas
- "random": high coefficient of variation, no monotonic pattern
- "unknown": fewer than _MIN_SAMPLES samples
Order is preserved — pass the deque/list in arrival order.
"""
if len(samples) < _MIN_SAMPLES:
return "unknown"
if all(s == 0 for s in samples):
return "zero"
first = samples[0]
if all(s == first for s in samples):
return "constant"
deltas = [b - a for a, b in zip(samples, samples[1:])]
if all(0 < d <= _INCREMENTAL_MAX_DELTA for d in deltas):
return "incremental"
mean = statistics.fmean(samples)
if mean > 0:
stdev = statistics.pstdev(samples)
if stdev / mean > _RANDOM_CV_THRESHOLD:
return "random"
return "random"