feat(sniffer): IP-ID sequence classifier (random/incremental/zero/constant)
Adds a per-source-IP rolling sample buffer (deque, maxlen=8) for IP-ID values seen on attacker SYNs and a stdlib-only classifier in decnet/sniffer/seq_class.py. Each new SYN appends ip.id and re-classifies the buffer; the result is logged on tcp_syn_fingerprint events alongside sample count. The dedup key now folds in ipid_class so a transition from 'unknown' to a definitive verdict emits exactly one fresh event instead of being suppressed by the old (os|options) key. Profiler rollup carries the latest non-'unknown' label into attacker.tcp_fingerprint. UI surfaces it as a colour-coded tag in the TCP STACK panel: random neutral, incremental amber, zero/constant green (the strong signal).
This commit is contained in:
@@ -143,6 +143,7 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
|
||||
ttl_values: list[str] = []
|
||||
hops: list[int] = []
|
||||
tcp_fp: dict[str, Any] | None = None
|
||||
ipid_latest: str | None = None
|
||||
# Tracks which event set tcp_fp last — picks the provider "context"
|
||||
# (syn vs synack) when we feed the p0f-v2 matcher below.
|
||||
tcp_fp_context: str = "syn"
|
||||
@@ -185,6 +186,13 @@ def sniffer_rollup(events: list[LogEvent]) -> dict[str, Any]:
|
||||
"dscp": _int_or_none(e.fields.get("dscp")),
|
||||
"ecn": _int_or_none(e.fields.get("ecn")),
|
||||
}
|
||||
# Sequence classifications converge as samples accumulate; the
|
||||
# most recent non-"unknown" label wins so a later "unknown" event
|
||||
# (e.g. a deque reset) doesn't overwrite a confident verdict.
|
||||
ipid_class = e.fields.get("ipid_class")
|
||||
if ipid_class and ipid_class != "unknown":
|
||||
ipid_latest = ipid_class
|
||||
tcp_fp["ipid_class"] = ipid_latest
|
||||
tcp_fp_context = "syn"
|
||||
|
||||
elif e.event_type == _SNIFFER_FLOW_EVENT:
|
||||
|
||||
@@ -12,10 +12,12 @@ from __future__ import annotations
|
||||
import hashlib
|
||||
import struct
|
||||
import time
|
||||
from collections import deque
|
||||
from typing import Any, Callable
|
||||
|
||||
from decnet.prober.tcpfp import _extract_options_order
|
||||
from decnet.sniffer.p0f import guess_os, hop_distance, initial_ttl
|
||||
from decnet.sniffer.seq_class import classify_sequence
|
||||
from decnet.sniffer.syslog import SEVERITY_INFO, SEVERITY_WARNING, syslog_line
|
||||
from decnet.telemetry import traced as _traced, get_tracer as _get_tracer
|
||||
|
||||
@@ -745,6 +747,12 @@ class SnifferEngine:
|
||||
self._tcp_syn: dict[tuple[str, int, str, int], dict[str, Any]] = {}
|
||||
self._tcp_rtt: dict[tuple[str, int, str, int], dict[str, Any]] = {}
|
||||
|
||||
# Per-source-IP rolling samples for sequence-pattern classification.
|
||||
# IP-ID and TCP ISN need multiple SYNs from the same attacker before
|
||||
# we can label them random/incremental/zero/constant.
|
||||
self._SEQ_SAMPLE_SIZE = 8
|
||||
self._ipid_samples: dict[str, deque[int]] = {}
|
||||
|
||||
# Per-flow timing aggregator. Key: (src_ip, src_port, dst_ip, dst_port).
|
||||
# Flow direction is client→decky; reverse packets are associated back
|
||||
# to the forward flow so we can track retransmits and inter-arrival.
|
||||
@@ -791,9 +799,16 @@ class SnifferEngine:
|
||||
if event_type == "tls_certificate":
|
||||
return fields.get("subject_cn", "") + "|" + fields.get("issuer", "")
|
||||
if event_type == "tcp_syn_fingerprint":
|
||||
# Dedupe per (OS signature, options layout). One event per unique
|
||||
# stack profile from this attacker IP per dedup window.
|
||||
return fields.get("os_guess", "") + "|" + fields.get("options_sig", "")
|
||||
# Dedupe per (OS signature, options layout, sequence-pattern
|
||||
# classification). Including ipid_class/isn_class lets each
|
||||
# transition (unknown → random/incremental/zero/constant) emit
|
||||
# exactly one fresh event as samples accumulate.
|
||||
return (
|
||||
fields.get("os_guess", "")
|
||||
+ "|" + fields.get("options_sig", "")
|
||||
+ "|" + fields.get("ipid_class", "")
|
||||
+ "|" + fields.get("isn_class", "")
|
||||
)
|
||||
if event_type == "tcp_flow_timing":
|
||||
# Dedup per (attacker_ip, decky_port) — src_port is deliberately
|
||||
# excluded so a port scanner rotating source ports only produces
|
||||
@@ -1031,6 +1046,12 @@ class SnifferEngine:
|
||||
_span.set_attribute("attacker_ip", src_ip)
|
||||
_span.set_attribute("dst_port", dst_port)
|
||||
tcp_fp = _extract_tcp_fingerprint(list(tcp.options or []))
|
||||
|
||||
ipid_buf = self._ipid_samples.setdefault(
|
||||
src_ip, deque(maxlen=self._SEQ_SAMPLE_SIZE)
|
||||
)
|
||||
ipid_buf.append(int(ip.id))
|
||||
ipid_class = classify_sequence(list(ipid_buf))
|
||||
os_label = guess_os(
|
||||
ttl=ip.ttl,
|
||||
window=int(tcp.window),
|
||||
@@ -1059,6 +1080,8 @@ class SnifferEngine:
|
||||
tos=str(int(getattr(ip, "tos", 0))),
|
||||
dscp=str((int(getattr(ip, "tos", 0)) >> 2) & 0x3F),
|
||||
ecn=str(int(getattr(ip, "tos", 0)) & 0x3),
|
||||
ipid_class=ipid_class,
|
||||
ipid_samples=str(len(ipid_buf)),
|
||||
os_guess=os_label,
|
||||
)
|
||||
|
||||
|
||||
63
decnet/sniffer/seq_class.py
Normal file
63
decnet/sniffer/seq_class.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""
|
||||
Sequence-pattern classifier for TCP/IP fields that are useful as a tooling
|
||||
fingerprint when sampled across multiple packets from the same source.
|
||||
|
||||
Two callers today:
|
||||
- IP-ID sequence per attacker (random/incremental/zero/constant).
|
||||
- TCP ISN sequence per attacker; modern stacks randomise, so a non-random
|
||||
result is itself a strong signal (legacy stacks, custom raw-socket tools).
|
||||
|
||||
Pure stdlib so it stays trivially unit-testable.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import statistics
|
||||
|
||||
# Minimum samples needed for a meaningful classification. Below this we
|
||||
# return "unknown" rather than guess from 1-3 noisy values.
|
||||
_MIN_SAMPLES = 4
|
||||
|
||||
# Max plausible delta for an "incremental" classification. The IP-ID field
|
||||
# is 16-bit so kernel-emitted increments wrap rapidly under load — anything
|
||||
# over 4096 between consecutive SYNs from the same host is almost certainly
|
||||
# random rather than a counter we just happen to be sampling sparsely.
|
||||
_INCREMENTAL_MAX_DELTA = 0x1000
|
||||
|
||||
# Coefficient-of-variation threshold above which we call a sequence random.
|
||||
# stddev/mean > 0.5 is well past anything a counter would produce.
|
||||
_RANDOM_CV_THRESHOLD = 0.5
|
||||
|
||||
|
||||
def classify_sequence(samples: list[int]) -> str:
|
||||
"""
|
||||
Classify an integer sequence as one of:
|
||||
- "zero": every sample is 0
|
||||
- "constant": every sample is the same non-zero value
|
||||
- "incremental": strictly monotonic with small positive deltas
|
||||
- "random": high coefficient of variation, no monotonic pattern
|
||||
- "unknown": fewer than _MIN_SAMPLES samples
|
||||
|
||||
Order is preserved — pass the deque/list in arrival order.
|
||||
"""
|
||||
if len(samples) < _MIN_SAMPLES:
|
||||
return "unknown"
|
||||
|
||||
if all(s == 0 for s in samples):
|
||||
return "zero"
|
||||
|
||||
first = samples[0]
|
||||
if all(s == first for s in samples):
|
||||
return "constant"
|
||||
|
||||
deltas = [b - a for a, b in zip(samples, samples[1:])]
|
||||
if all(0 < d <= _INCREMENTAL_MAX_DELTA for d in deltas):
|
||||
return "incremental"
|
||||
|
||||
mean = statistics.fmean(samples)
|
||||
if mean > 0:
|
||||
stdev = statistics.pstdev(samples)
|
||||
if stdev / mean > _RANDOM_CV_THRESHOLD:
|
||||
return "random"
|
||||
|
||||
return "random"
|
||||
Reference in New Issue
Block a user