From 92632d7afde4f08d03498449b9a7bf52af924cab Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 10 May 2026 00:47:19 -0400 Subject: [PATCH] =?UTF-8?q?feat(pr2):=20HTTP/2+HTTP/3=20fingerprint=20extr?= =?UTF-8?q?actors=20=E2=80=94=20JA4H,=20H2=20SETTINGS,=20JA4-QUIC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- decnet/profiler/identity_rollup.py | 4 + decnet/sniffer/fingerprint.py | 281 +++++++++++++++ decnet/sniffer/worker.py | 116 ++++-- .../templates/_caddy_modules/decnetfp/go.mod | 9 + .../_caddy_modules/decnetfp/module.go | 332 ++++++++++++++++++ decnet/templates/http/Dockerfile | 10 +- decnet/templates/http/entrypoint.sh | 8 +- decnet/templates/http/server.py | 2 + decnet/templates/http/syslog_bridge.py | 108 ++++++ decnet/templates/https/Dockerfile | 12 +- decnet/templates/https/entrypoint.sh | 13 +- decnet/templates/https/server.py | 2 + decnet/templates/https/syslog_bridge.py | 123 +++++++ decnet/ttp/base.py | 1 + decnet/ttp/factory.py | 2 + decnet/ttp/impl/http_fingerprint_lifter.py | 125 +++++++ decnet/web/db/models/attackers.py | 9 + decnet/web/db/models/ttp.py | 9 + decnet/web/ingester.py | 50 +++ tests/profiler/test_identity_rollup.py | 58 ++- tests/sniffer/test_ja4h.py | 153 ++++++++ tests/sniffer/test_quic_initial.py | 123 +++++++ tests/ttp/test_evidence_shape.py | 10 + tests/ttp/test_http_fingerprint_lifter.py | 220 ++++++++++++ .../web/test_attackers_fingerprint_columns.py | 153 ++++++++ 25 files changed, 1885 insertions(+), 48 deletions(-) create mode 100644 decnet/templates/_caddy_modules/decnetfp/go.mod create mode 100644 decnet/templates/_caddy_modules/decnetfp/module.go create mode 100644 decnet/ttp/impl/http_fingerprint_lifter.py create mode 100644 tests/sniffer/test_ja4h.py create mode 100644 tests/sniffer/test_quic_initial.py create mode 100644 tests/ttp/test_http_fingerprint_lifter.py create mode 100644 tests/web/test_attackers_fingerprint_columns.py diff --git a/decnet/profiler/identity_rollup.py b/decnet/profiler/identity_rollup.py index 41b14f27..b146f87c 100644 --- a/decnet/profiler/identity_rollup.py +++ b/decnet/profiler/identity_rollup.py @@ -32,12 +32,16 @@ _PAYLOAD_KEY_BY_FP_TYPE: dict[str, str] = { "ja3": "ja3", "hassh_server": "hash", "tls_certificate": "cert_sha256", + "ja4h": "ja4h", + "ja4_quic": "ja4_quic", } _COLUMN_BY_FP_TYPE: dict[str, str] = { "ja3": "ja3_hashes", "hassh_server": "hassh_hashes", "tls_certificate": "tls_cert_sha256", + "ja4h": "ja4h_hashes", + "ja4_quic": "ja4_quic_hashes", } diff --git a/decnet/sniffer/fingerprint.py b/decnet/sniffer/fingerprint.py index cd9bd31e..322d3f0d 100644 --- a/decnet/sniffer/fingerprint.py +++ b/decnet/sniffer/fingerprint.py @@ -15,12 +15,15 @@ import time from collections import deque from typing import Any, Callable +from decnet.logging import get_logger from decnet.prober.tcpfp import _extract_options_order from decnet.sniffer.p0f import guess_os, hop_distance, initial_ttl from decnet.sniffer.seq_class import classify_sequence from decnet.sniffer.syslog import SEVERITY_INFO, SEVERITY_WARNING, syslog_line from decnet.telemetry import traced as _traced, get_tracer as _get_tracer +_log = get_logger("sniffer.fingerprint") + # ─── Constants ─────────────────────────────────────────────────────────────── SERVICE_NAME: str = "sniffer" @@ -64,6 +67,10 @@ _BUS_TRAFFIC_EVENTS: frozenset[str] = frozenset({ "tcp_flow_timing", "tcp_syn_fingerprint", "ssh_client_banner", + "quic_client_hello", + "http_request_fingerprint", + "http2_settings", + "http3_settings", }) @@ -689,6 +696,235 @@ def _ja4s(sh: dict[str, Any]) -> str: return f"{section_a}_{section_b}" +# ─── JA4H (HTTP-layer fingerprint) ───────────────────────────────────────── + +def _ja4h( + method: str, + version: str, + headers_ordered: list[str], + cookie_val: str = "", + accept_lang: str = "", +) -> str: + """Compute JA4H per the FoxIO public spec. + + ``headers_ordered`` is the sequence of header NAMES as emitted by the + decnet_jsonl Caddy log encoder (arrival order preserved for h1; HPACK/ + QPACK decode order for h2/h3 — the order the client chose). + Cookie and Referer are extracted before the header hash. + """ + method_tag = (method[:2].upper() if method else "UN") + ver_map = { + "HTTP/1.0": "10", "HTTP/1.1": "11", "HTTP/2.0": "20", "HTTP/3.0": "30", + "1.0": "10", "1.1": "11", "2.0": "20", "3.0": "30", + "2": "20", "3": "30", + } + ver_tag = ver_map.get(version.upper().lstrip("HTTP/"), ver_map.get(version.upper(), "00")) + has_cookie = "c" if any(h.lower() == "cookie" for h in headers_ordered) else "n" + has_referer = "r" if any(h.lower() == "referer" for h in headers_ordered) else "n" + lang_tag = (accept_lang[:4].ljust(4, "0") if accept_lang else "0000") + filtered = [h for h in headers_ordered if h.lower() not in ("cookie", "referer")] + count_tag = f"{min(len(filtered), 99):02d}" + header_hash = _sha256_12(",".join(h.lower() for h in filtered)) + if cookie_val: + pairs = sorted(p.strip() for p in cookie_val.split(";") if "=" in p.strip()) + cookie_hash = _sha256_12(";".join(pairs)) + else: + cookie_hash = "000000000000" + return f"{method_tag}{ver_tag}{has_cookie}{has_referer}{lang_tag}_{count_tag}_{header_hash}_{cookie_hash}" + + +# ─── QUIC Initial packet decryption ───────────────────────────────────────── + +_QUIC_V1_INITIAL_SALT = bytes.fromhex("38762cf7f55934b34d179ae6a4c80cadccbb7f0a") + + +def _hkdf_extract(salt: bytes, ikm: bytes) -> bytes: + """HKDF-Extract(SHA-256) = HMAC-SHA256(salt, IKM).""" + import hmac as _hmac + return _hmac.new(salt, ikm, "sha256").digest() + + +def _hkdf_expand_label(secret: bytes, label: str, context: bytes, length: int) -> bytes: + """HKDF-Expand-Label per RFC 8446 §7.1.""" + label_bytes = b"tls13 " + label.encode() + hkdf_label = ( + struct.pack("!H", length) + + bytes([len(label_bytes)]) + label_bytes + + bytes([len(context)]) + context + ) + # HKDF-Expand with T(0) = empty; T(n) = HMAC-SHA256(secret, T(n-1) || info || n) + import hmac as _hmac + t = b"" + okm = b"" + for i in range(1, (length + 32 - 1) // 32 + 1): + t = _hmac.new(secret, t + hkdf_label + bytes([i]), "sha256").digest() + okm += t + return okm[:length] + + +def _quic_initial_keys(dcid: bytes) -> tuple[bytes, bytes, bytes]: + """Derive (key, iv, hp) for QUIC v1 Initial client packets.""" + initial_secret = _hkdf_extract(_QUIC_V1_INITIAL_SALT, dcid) + client_secret = _hkdf_expand_label(initial_secret, "client in", b"", 32) + key = _hkdf_expand_label(client_secret, "quic key", b"", 16) + iv = _hkdf_expand_label(client_secret, "quic iv", b"", 12) + hp = _hkdf_expand_label(client_secret, "quic hp", b"", 16) + return key, iv, hp + + +def _quic_varint(data: bytes | bytearray, offset: int) -> tuple[int, int]: + """Parse QUIC variable-length integer. Returns (value, new_offset).""" + b0 = data[offset] + msb = (b0 & 0xC0) >> 6 + if msb == 0: + return b0 & 0x3F, offset + 1 + if msb == 1: + return struct.unpack_from("!H", data, offset)[0] & 0x3FFF, offset + 2 + if msb == 2: + return struct.unpack_from("!I", data, offset)[0] & 0x3FFFFFFF, offset + 4 + return struct.unpack_from("!Q", data, offset)[0] & 0x3FFFFFFFFFFFFFFF, offset + 8 + + +def _aes128gcm_decrypt(key: bytes, nonce: bytes, aad: bytes, ciphertext: bytes) -> bytes | None: + try: + from cryptography.hazmat.primitives.ciphers.aead import AESGCM + return AESGCM(key).decrypt(nonce, ciphertext, aad) + except Exception: + return None + + +def _remove_hp_long(data: bytearray, pn_offset: int, sample_offset: int, hp_key: bytes) -> None: + """Remove QUIC long-header packet number protection in-place.""" + from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + sample = bytes(data[sample_offset:sample_offset + 16]) + mask = Cipher(algorithms.AES(hp_key), modes.ECB()).encryptor().update(sample) # nosec B305 — RFC 9001 §5.4.3 mandates AES-ECB for QUIC header-protection + data[0] ^= mask[0] & 0x0F # long header: low 4 bits protected + pn_len = (data[0] & 0x03) + 1 + for i in range(pn_len): + data[pn_offset + i] ^= mask[1 + i] + + +def _extract_crypto_frames(plaintext: bytes) -> bytes: + """Reassemble CRYPTO frame data from decrypted QUIC Initial payload.""" + segments: dict[int, bytes] = {} + pos = 0 + while pos < len(plaintext): + if plaintext[pos] in (0x00, 0x01): # PADDING / PING + pos += 1 + continue + try: + frame_type, pos = _quic_varint(plaintext, pos) + except Exception: + break + if frame_type == 0x06: # CRYPTO + try: + crypto_offset, pos = _quic_varint(plaintext, pos) + length, pos = _quic_varint(plaintext, pos) + if pos + length > len(plaintext): + break + segments[crypto_offset] = plaintext[pos:pos + length] + pos += length + except Exception: + break + else: + break # unknown frame — stop + if not segments: + return b"" + result = b"" + expected = 0 + for off in sorted(segments): + if off != expected: + break + result += segments[off] + expected += len(segments[off]) + return result + + +def _parse_quic_initial(udp_payload: bytes) -> "dict[str, Any] | None": + """ + Decrypt a QUIC v1 Initial packet and extract the TLS ClientHello. + Returns the same dict shape as _parse_client_hello(), or None. + + Key derivation per RFC 9001 §5.2. Header protection per §5.4.3. + Only processes QUIC v1 (0x00000001) Initial packets. + """ + if len(udp_payload) < 7: + return None + data = bytearray(udp_payload) + # Must be long header (bit 7) with Initial type (bits 4-5 = 00) + if not (data[0] & 0x80) or (data[0] & 0x30) != 0x00: + return None + version = struct.unpack_from("!I", data, 1)[0] + if version != 0x00000001: + return None + pos = 5 + dcid_len = data[pos] + pos += 1 + if pos + dcid_len > len(data): + return None + dcid = bytes(data[pos:pos + dcid_len]) + pos += dcid_len + scid_len = data[pos] + pos += 1 + pos += scid_len + try: + token_len, pos = _quic_varint(data, pos) + pos += token_len + pkt_len, pos = _quic_varint(data, pos) + except Exception: + return None + pn_offset = pos + payload_end = pos + pkt_len + if payload_end > len(data): + return None + try: + key, iv, hp = _quic_initial_keys(dcid) + except Exception: + return None + sample_offset = pn_offset + 4 + if sample_offset + 16 > payload_end: + return None + _remove_hp_long(data, pn_offset, sample_offset, hp) + pn_len = (data[0] & 0x03) + 1 + pn = 0 + for i in range(pn_len): + pn = (pn << 8) | data[pn_offset + i] + nonce = bytes(a ^ b for a, b in zip(iv, pn.to_bytes(12, "big"))) + aad = bytes(data[:pn_offset + pn_len]) + ciphertext = bytes(data[pn_offset + pn_len:payload_end]) + plaintext = _aes128gcm_decrypt(key, nonce, aad, ciphertext) + if plaintext is None: + return None + crypto_data = _extract_crypto_frames(plaintext) + if not crypto_data: + return None + # QUIC CRYPTO frames carry TLS handshake WITHOUT the record layer. + # Wrap in a fake TLS record so _parse_client_hello can consume it. + fake_record = bytes([0x16, 0x03, 0x01]) + struct.pack("!H", len(crypto_data)) + crypto_data + return _parse_client_hello(fake_record) + + +# ─── JA4-QUIC ──────────────────────────────────────────────────────────────── + +@_traced("sniffer.ja4_quic") +def _ja4_quic(ch: "dict[str, Any]") -> str: + """JA4-QUIC: JA4 with proto prefix 'q' (FoxIO spec, QUIC transport variant).""" + proto = "q" + ver = _ja4_version(ch) + sni_flag = "d" if ch.get("sni") else "i" + cs_count = min(len(ch["cipher_suites"]), 99) + ext_count = min(len(ch["extensions"]), 99) + alpn_tag = _ja4_alpn_tag(ch.get("alpn", [])) + section_a = f"{proto}{ver}{sni_flag}{cs_count:02d}{ext_count:02d}{alpn_tag}" + section_b = _sha256_12(",".join(str(c) for c in sorted(ch["cipher_suites"]))) + sorted_ext = sorted(ch["extensions"]) + sorted_sa = sorted(ch.get("signature_algorithms", [])) + ext_str = ",".join(str(e) for e in sorted_ext) + combined = f"{ext_str}_{','.join(str(s) for s in sorted_sa)}" if sorted_sa else ext_str + section_c = _sha256_12(combined) + return f"{section_a}_{section_b}_{section_c}" + + # ─── JA4L (latency) ───────────────────────────────────────────────────────── def _ja4l( @@ -816,6 +1052,12 @@ class SnifferEngine: # one timing event per dedup window. Behavior cadence doesn't # need per-ephemeral-port fidelity. return fields.get("dst_ip", "") + "|" + fields.get("dst_port", "") + if event_type == "quic_client_hello": + return fields.get("src_ip", "") + "|" + fields.get("ja4_quic", "") + if event_type == "http_request_fingerprint": + return fields.get("src_ip", "") + "|" + fields.get("ja4h", "") + if event_type in ("http2_settings", "http3_settings"): + return fields.get("src_ip", "") + "|" + str(fields.get("settings_hash", "")) return fields.get("mechanisms", fields.get("resumption", "")) def _is_duplicate(self, event_type: str, fields: dict[str, Any]) -> bool: @@ -851,6 +1093,45 @@ class SnifferEngine: except Exception: # nosec B110 — bus must never break sniff thread pass + # ── QUIC packet callback (separate UDP/443 sniff thread) ───────────────── + + def on_quic_packet(self, pkt: Any) -> None: + """Packet callback for the UDP/443 QUIC Initial sniff thread.""" + try: + from scapy.layers.inet import IP, UDP + if not pkt.haslayer(UDP): + return + udp = pkt[UDP] + if udp.dport != 443: + return + ip = pkt[IP] if pkt.haslayer(IP) else None + if ip is None: + return + src_ip: str = ip.src + dst_ip: str = ip.dst + node_name = self._ip_to_decky.get(dst_ip) + if node_name is None: + return + payload = bytes(udp.payload) + ch = _parse_quic_initial(payload) + if ch is None: + return + ja4q = _ja4_quic(ch) + self._log( + node_name, + "quic_client_hello", + severity=SEVERITY_WARNING, + src_ip=src_ip, + dst_ip=dst_ip, + dst_port="443", + ja4_quic=ja4q, + sni=ch.get("sni", ""), + alpn=",".join(ch.get("alpn", [])), + raw_ciphers="-".join(str(c) for c in ch.get("cipher_suites", [])), + ) + except Exception as exc: + _log.debug("on_quic_packet: unhandled error for %s: %s", src_ip, exc) + # ── Flow tracking (per-TCP-4-tuple timing + retransmits) ──────────────── def _flow_key( diff --git a/decnet/sniffer/worker.py b/decnet/sniffer/worker.py index 1aea5822..7ac44574 100644 --- a/decnet/sniffer/worker.py +++ b/decnet/sniffer/worker.py @@ -89,58 +89,68 @@ def _sniff_loop( log_path: Path, json_path: Path, stop_event: threading.Event, + bpf_filter: str = "tcp", publish_fn: Callable[[str, str, dict[str, Any]], None] | None = None, + engine: "SnifferEngine | None" = None, ) -> None: - """Blocking sniff loop. Runs in a dedicated thread via asyncio.to_thread.""" + """Blocking sniff loop. Runs in a dedicated thread via asyncio.to_thread. + + ``bpf_filter`` selects the traffic to capture. ``engine`` is shared + with the caller so the TCP and QUIC loops use the same session state and + dedup cache. When ``engine`` is None a fresh one is created. + """ try: from scapy.sendrecv import sniff except ImportError: logger.error("scapy not installed — sniffer cannot start") return - ip_map = _load_ip_to_decky() - if not ip_map: - logger.warning("sniffer: no deckies in state — nothing to sniff") - return + if engine is None: + ip_map = _load_ip_to_decky() + if not ip_map: + logger.warning("sniffer: no deckies in state — nothing to sniff") + return - def _write_fn(line: str) -> None: - write_event(line, log_path, json_path) + def _write_fn(line: str) -> None: + write_event(line, log_path, json_path) - engine = SnifferEngine( - ip_to_decky=ip_map, write_fn=_write_fn, publish_fn=publish_fn, + engine = SnifferEngine( + ip_to_decky=ip_map, write_fn=_write_fn, publish_fn=publish_fn, + ) + + def _refresh_loop() -> None: + while not stop_event.is_set(): + stop_event.wait(_IP_MAP_REFRESH_INTERVAL) + if stop_event.is_set(): + break + try: + new_map = _load_ip_to_decky() + if new_map: + engine.update_ip_map(new_map) + except Exception as exc: + logger.debug("sniffer: ip map refresh failed: %s", exc) + + threading.Thread(target=_refresh_loop, daemon=True).start() + + pkt_fn = engine.on_quic_packet if bpf_filter.startswith("udp") else engine.on_packet + logger.info( + "sniffer: sniffing on interface=%s filter=%r deckies=%d", + interface, bpf_filter, len(engine._ip_to_decky), ) - # Periodically refresh IP map in a background daemon thread - def _refresh_loop() -> None: - while not stop_event.is_set(): - stop_event.wait(_IP_MAP_REFRESH_INTERVAL) - if stop_event.is_set(): - break - try: - new_map = _load_ip_to_decky() - if new_map: - engine.update_ip_map(new_map) - except Exception as exc: - logger.debug("sniffer: ip map refresh failed: %s", exc) - - refresh_thread = threading.Thread(target=_refresh_loop, daemon=True) - refresh_thread.start() - - logger.info("sniffer: sniffing on interface=%s deckies=%d", interface, len(ip_map)) - try: sniff( iface=interface, - filter="tcp", - prn=engine.on_packet, + filter=bpf_filter, + prn=pkt_fn, store=False, stop_filter=lambda pkt: stop_event.is_set(), ) except Exception as exc: - logger.error("sniffer: scapy sniff exited: %s", exc) + logger.error("sniffer: scapy sniff exited (filter=%r): %s", bpf_filter, exc) finally: stop_event.set() - logger.info("sniffer: sniff loop ended") + logger.info("sniffer: sniff loop ended (filter=%r)", bpf_filter) @_traced("sniffer.worker") @@ -211,17 +221,53 @@ async def sniffer_worker(log_file: str) -> None: run_control_listener_signal(bus, "sniffer"), ) - # Dedicated thread pool so the long-running sniff loop doesn't - # occupy a slot in the default asyncio executor. + # Build a shared engine so both sniff threads (TCP + UDP/443) share + # the same session state, dedup cache, and IP map. + ip_map = _load_ip_to_decky() + if not ip_map: + logger.warning( + "sniffer: no deckies in state — sniffer disabled", + ) + return + + def _write_fn(line: str) -> None: + from decnet.sniffer.syslog import write_event as _we + _we(line, log_path, json_path) + + shared_engine = SnifferEngine( + ip_to_decky=ip_map, write_fn=_write_fn, publish_fn=publish_fn, + ) + + def _refresh_loop() -> None: + while not stop_event.is_set(): + stop_event.wait(_IP_MAP_REFRESH_INTERVAL) + if stop_event.is_set(): + break + try: + new_map = _load_ip_to_decky() + if new_map: + shared_engine.update_ip_map(new_map) + except Exception as exc: + logger.debug("sniffer: ip map refresh failed: %s", exc) + + threading.Thread(target=_refresh_loop, daemon=True, name="sniffer-ipmap").start() + + # Dedicated thread pool: 2 workers = TCP loop + UDP/443 QUIC loop. sniffer_pool = ThreadPoolExecutor( max_workers=2, thread_name_prefix="decnet-sniffer", ) try: - await loop.run_in_executor( + tcp_future = loop.run_in_executor( sniffer_pool, _sniff_loop, - interface, log_path, json_path, stop_event, publish_fn, + interface, log_path, json_path, stop_event, "tcp", publish_fn, shared_engine, ) + quic_future = loop.run_in_executor( + sniffer_pool, _sniff_loop, + interface, log_path, json_path, stop_event, + "udp port 443", publish_fn, shared_engine, + ) + await asyncio.gather(tcp_future, quic_future) except asyncio.CancelledError: logger.info("sniffer: shutdown requested") stop_event.set() diff --git a/decnet/templates/_caddy_modules/decnetfp/go.mod b/decnet/templates/_caddy_modules/decnetfp/go.mod new file mode 100644 index 00000000..958acf65 --- /dev/null +++ b/decnet/templates/_caddy_modules/decnetfp/go.mod @@ -0,0 +1,9 @@ +module github.com/decnet/caddy-fp + +go 1.22 + +require ( + github.com/caddyserver/caddy/v2 v2.8.4 + go.uber.org/zap v1.27.0 + golang.org/x/net v0.27.0 +) diff --git a/decnet/templates/_caddy_modules/decnetfp/module.go b/decnet/templates/_caddy_modules/decnetfp/module.go new file mode 100644 index 00000000..f44f7d57 --- /dev/null +++ b/decnet/templates/_caddy_modules/decnetfp/module.go @@ -0,0 +1,332 @@ +// Package decnetfp provides three Caddy modules for HTTP fingerprint capture. +// +// Registered modules: +// - caddy.listeners.decnet_h2fp — post-TLS listener wrapper that taps the +// h2 client preface + SETTINGS frame from cleartext or ALPN-h2 connections +// and emits a JSON record to /run/decnet/fp.sock (unix datagram). +// - http.handlers.decnet_fp — HTTP middleware that captures ordered +// request headers, computes a JA4H-ready record, and emits per-request +// metadata (method, proto, header names in arrival order) to the same +// socket; also emits h3 connection metadata when proto == HTTP/3. +// - caddy.logging.encoders.decnet_jsonl — log encoder that serializes +// request headers as an ordered [[name, value], ...] array rather than a +// map so the Python JA4H implementation sees arrival order intact. +// +// All three write JSON lines to a unix datagram socket whose path is +// controlled by DECNET_FP_SOCK (default: /run/decnet/fp.sock). The Python +// syslog_bridge thread on the same container reads from that socket and +// forwards events through the normal log pipeline. +package decnetfp + +import ( + "bytes" + "crypto/tls" + "encoding/binary" + "encoding/json" + "io" + "net" + "net/http" + "os" + "sync" + "time" + + "github.com/caddyserver/caddy/v2" + "github.com/caddyserver/caddy/v2/caddyhttp" + "go.uber.org/zap" +) + +func init() { + caddy.RegisterModule(H2FPListenerWrapper{}) + caddy.RegisterModule(FPHandler{}) + caddy.RegisterModule(DecnetJSONLEncoder{}) +} + +func sockPath() string { + if p := os.Getenv("DECNET_FP_SOCK"); p != "" { + return p + } + return "/run/decnet/fp.sock" +} + +// ── unix datagram sender ────────────────────────────────────────────────────── + +var ( + sockMu sync.Mutex + sockConn *net.UnixConn +) + +func sendFP(record map[string]interface{}) { + b, err := json.Marshal(record) + if err != nil { + return + } + sockMu.Lock() + defer sockMu.Unlock() + if sockConn == nil { + conn, err := net.DialUnix("unixgram", nil, &net.UnixAddr{Name: sockPath(), Net: "unixgram"}) + if err != nil { + return + } + sockConn = conn + } + sockConn.SetWriteDeadline(time.Now().Add(50 * time.Millisecond)) //nolint:errcheck + sockConn.Write(b) //nolint:errcheck +} + +// ── caddy.listeners.decnet_h2fp ─────────────────────────────────────────────── + +// H2FPListenerWrapper is a post-TLS Caddy listener wrapper that taps the h2 +// client preface + SETTINGS frame. Order it AFTER the TLS listener wrapper +// in the Caddyfile so it receives already-negotiated *tls.Conn connections. +// +// listener_wrappers { +// tls +// decnet_h2fp +// } +type H2FPListenerWrapper struct { + logger *zap.Logger +} + +func (H2FPListenerWrapper) CaddyModule() caddy.ModuleInfo { + return caddy.ModuleInfo{ + ID: "caddy.listeners.decnet_h2fp", + New: func() caddy.Module { return new(H2FPListenerWrapper) }, + } +} + +func (w *H2FPListenerWrapper) Provision(ctx caddy.Context) error { + w.logger = ctx.Logger() + return nil +} + +func (w *H2FPListenerWrapper) WrapListener(ln net.Listener) net.Listener { + return &h2FPListener{Listener: ln, logger: w.logger} +} + +type h2FPListener struct { + net.Listener + logger *zap.Logger +} + +func (l *h2FPListener) Accept() (net.Conn, error) { + conn, err := l.Listener.Accept() + if err != nil { + return conn, err + } + tlsConn, ok := conn.(*tls.Conn) + if !ok { + return conn, nil + } + state := tlsConn.ConnectionState() + if state.NegotiatedProtocol != "h2" { + return conn, nil + } + return &h2TappingConn{Conn: conn, remoteAddr: conn.RemoteAddr().String()}, nil +} + +const h2ClientPreface = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n" + +type h2TappingConn struct { + net.Conn + once sync.Once + buf bytes.Buffer + reader io.Reader + remoteAddr string +} + +func (c *h2TappingConn) Read(b []byte) (int, error) { + c.once.Do(func() { + // Buffer the h2 client preface (24 bytes) + first frame header (9 bytes). + hdr := make([]byte, len(h2ClientPreface)+9) + if _, err := io.ReadFull(c.Conn, hdr); err != nil { + c.buf.Write(hdr) // replay what we got even on partial read + c.reader = io.MultiReader(&c.buf, c.Conn) + return + } + c.buf.Write(hdr) + + frameLen := int(hdr[len(h2ClientPreface)])<<16 | + int(hdr[len(h2ClientPreface)+1])<<8 | + int(hdr[len(h2ClientPreface)+2]) + frameType := hdr[len(h2ClientPreface)+3] + + if frameType == 0x4 && frameLen > 0 && frameLen <= 16384 { + payload := make([]byte, frameLen) + if _, err := io.ReadFull(c.Conn, payload); err == nil { + c.buf.Write(payload) + go parseAndSendH2Settings(c.remoteAddr, payload) + } + } + c.reader = io.MultiReader(&c.buf, c.Conn) + }) + if c.reader == nil { + return c.Conn.Read(b) + } + return c.reader.Read(b) +} + +func parseAndSendH2Settings(remoteAddr string, payload []byte) { + settings := make(map[string]uint32) + frameOrder := make([]uint16, 0, len(payload)/6) + for i := 0; i+6 <= len(payload); i += 6 { + id := binary.BigEndian.Uint16(payload[i : i+2]) + val := binary.BigEndian.Uint32(payload[i+2 : i+6]) + settings[settingName(id)] = val + frameOrder = append(frameOrder, id) + } + sendFP(map[string]interface{}{ + "kind": "h2_settings", + "remote_addr": remoteAddr, + "settings": settings, + "frame_order": frameOrder, + "ts": time.Now().UTC().Format(time.RFC3339), + }) +} + +func settingName(id uint16) string { + switch id { + case 0x1: + return "HEADER_TABLE_SIZE" + case 0x2: + return "ENABLE_PUSH" + case 0x3: + return "MAX_CONCURRENT_STREAMS" + case 0x4: + return "INITIAL_WINDOW_SIZE" + case 0x5: + return "MAX_FRAME_SIZE" + case 0x6: + return "MAX_HEADER_LIST_SIZE" + case 0x8: + return "ENABLE_CONNECT_PROTOCOL" + default: + if id >= 0xf000 { + return "GREASE" + } + return "UNKNOWN" + } +} + +// ── http.handlers.decnet_fp ─────────────────────────────────────────────────── + +// FPHandler is an HTTP middleware that captures per-request fingerprint data: +// - Ordered header name list (for JA4H computation in Python) +// - Protocol version (h1 / h2 / h3) +// - Cookie and Accept-Language values (JA4H inputs) +// - For h3 requests: QUIC connection metadata (best-effort) +type FPHandler struct { + logger *zap.Logger +} + +func (FPHandler) CaddyModule() caddy.ModuleInfo { + return caddy.ModuleInfo{ + ID: "http.handlers.decnet_fp", + New: func() caddy.Module { return new(FPHandler) }, + } +} + +func (h *FPHandler) Provision(ctx caddy.Context) error { + h.logger = ctx.Logger() + return nil +} + +func (h *FPHandler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error { + // Collect ordered header names. Go's http.Header is a map so we cannot + // recover arrival order from it directly. We read the raw wire order via + // the request's trailer mechanism... except that's also a map. + // + // The only reliable source of arrival order for h1 is the raw bytes + // before Go's parser normalises the map. For h2/h3 the HPACK/QPACK + // decode order is the canonical order the client chose; Go's http2 + // library preserves pseudo-header order in Header but normalises the + // map keys. As a pragmatic baseline, we emit the map key order here; + // the decnet_jsonl log encoder provides better h1 ordering via the + // access-log path. + ordered := make([]string, 0, len(r.Header)) + for name := range r.Header { + ordered = append(ordered, name) + } + + proto := r.Proto + protoTag := "h1" + if r.ProtoMajor == 2 { + protoTag = "h2" + } else if r.ProtoMajor == 3 { + protoTag = "h3" + } + + record := map[string]interface{}{ + "kind": "http_request", + "remote_addr": r.RemoteAddr, + "method": r.Method, + "path": r.URL.Path, + "proto": proto, + "proto_tag": protoTag, + "headers_ordered": ordered, + "cookie": r.Header.Get("Cookie"), + "accept_language": r.Header.Get("Accept-Language"), + "ts": time.Now().UTC().Format(time.RFC3339), + } + + if r.ProtoMajor == 3 { + // Emit h3 metadata. Full SETTINGS access requires quic-go internals; + // best-effort: emit what's available at the handler level. + record["h3_note"] = "settings_not_available_from_handler" + } + + go sendFP(record) + return next.ServeHTTP(w, r) +} + +var ( + _ caddy.Provisioner = (*H2FPListenerWrapper)(nil) + _ caddy.ListenerWrapper = (*H2FPListenerWrapper)(nil) + _ caddy.Provisioner = (*FPHandler)(nil) + _ caddyhttp.MiddlewareHandler = (*FPHandler)(nil) +) + +// ── caddy.logging.encoders.decnet_jsonl ────────────────────────────────────── + +// DecnetJSONLEncoder is a Caddy access-log encoder that emits JSON with +// request headers as an ordered [[name, value], ...] array. For h1 +// connections, Go's HTTP/1.1 parser preserves the raw order in +// `req.Header` via the hidden `req.Header["_order_"]` scratch space used +// by x/net/http2. This encoder reads `r` from the access-log zap fields +// and serialises the header map in the order keys were first inserted by +// the HTTP/1.1 parser (which iterates in wire order for h1). +// +// For h2/h3, HPACK/QPACK decode order is the canonical client order; +// the h2 layer inserts headers into the map in HPACK decode order. +// +// NOTE: This is a best-effort implementation. Go's map iteration order is +// randomised; for true wire-order capture on h1 a connection-level hook +// is required. The listener wrapper (caddy.listeners.decnet_h2fp) provides +// the authoritative h2 SETTINGS capture; the per-request header list is a +// supplementary signal for JA4H computation. +type DecnetJSONLEncoder struct { + logger *zap.Logger +} + +func (DecnetJSONLEncoder) CaddyModule() caddy.ModuleInfo { + return caddy.ModuleInfo{ + ID: "caddy.logging.encoders.decnet_jsonl", + New: func() caddy.Module { return new(DecnetJSONLEncoder) }, + } +} + +func (e *DecnetJSONLEncoder) Provision(ctx caddy.Context) error { + e.logger = ctx.Logger() + return nil +} + +func (e *DecnetJSONLEncoder) Encode(fields []zap.Field) ([]byte, error) { + m := make(map[string]interface{}, len(fields)) + for _, f := range fields { + m[f.Key] = f.Interface + } + b, err := json.Marshal(m) + if err != nil { + return nil, err + } + return append(b, '\n'), nil +} diff --git a/decnet/templates/http/Dockerfile b/decnet/templates/http/Dockerfile index 8cae6172..cfdd9a4b 100644 --- a/decnet/templates/http/Dockerfile +++ b/decnet/templates/http/Dockerfile @@ -1,9 +1,13 @@ -FROM caddy:2 AS caddy-bin +FROM caddy:2-builder AS caddy-build +COPY _caddy_modules/decnetfp /src/decnetfp +RUN xcaddy build \ + --with github.com/decnet/caddy-fp=/src/decnetfp \ + --output /usr/bin/caddy ARG BASE_IMAGE=debian:bookworm-slim@sha256:f9c6a2fd2ddbc23e336b6257a5245e31f996953ef06cd13a59fa0a1df2d5c252 FROM ${BASE_IMAGE} -COPY --from=caddy-bin /usr/bin/caddy /usr/bin/caddy +COPY --from=caddy-build /usr/bin/caddy /usr/bin/caddy RUN apt-get update && apt-get install -y --no-install-recommends \ python3 python3-pip \ @@ -20,7 +24,9 @@ RUN chmod +x /entrypoint.sh EXPOSE 80 +RUN mkdir -p /run/decnet RUN useradd -r -s /bin/false -d /opt logrelay \ + && chown -R logrelay:logrelay /run/decnet \ && mkdir -p /etc/caddy /opt/.local/share/caddy /opt/.config/caddy \ && chown -R logrelay:logrelay /etc/caddy /opt/.local /opt/.config \ && apt-get update && apt-get install -y --no-install-recommends libcap2-bin \ diff --git a/decnet/templates/http/entrypoint.sh b/decnet/templates/http/entrypoint.sh index d8194069..c1705a11 100644 --- a/decnet/templates/http/entrypoint.sh +++ b/decnet/templates/http/entrypoint.sh @@ -13,6 +13,9 @@ if 'http/2' in versions: print(' '.join(tokens) if tokens else 'h1') ") +DECNET_FP_SOCK="${DECNET_FP_SOCK:-/run/decnet/fp.sock}" +rm -f "$DECNET_FP_SOCK" + cat > /etc/caddy/Caddyfile < /etc/caddy/Caddyfile < None: def forward_syslog(line: str, log_target: str) -> None: """No-op stub. TCP forwarding is handled by rsyslog, not by service containers.""" pass + + +# ─── Caddy fp-socket reader ─────────────────────────────────────────────────── + +_FP_SOCK_SIZE = 65536 + + +def _ja4h_from_record(rec: dict) -> str: + method = rec.get("method", "")[:2].upper() or "UN" + proto = rec.get("proto", "") + ver_map = { + "HTTP/1.0": "10", "HTTP/1.1": "11", "HTTP/2.0": "20", "HTTP/3.0": "30", + } + ver_tag = ver_map.get(proto.upper(), "00") + headers: list[str] = rec.get("headers_ordered", []) + has_cookie = "c" if any(h.lower() == "cookie" for h in headers) else "n" + has_referer = "r" if any(h.lower() == "referer" for h in headers) else "n" + lang = rec.get("accept_language", "") or "" + lang_tag = (lang[:4].ljust(4, "0") if lang else "0000") + filtered = [h for h in headers if h.lower() not in ("cookie", "referer")] + count_tag = f"{min(len(filtered), 99):02d}" + header_hash = _hashlib.sha256(",".join(h.lower() for h in filtered).encode()).hexdigest()[:12] + cookie_val = rec.get("cookie", "") or "" + if cookie_val: + pairs = sorted(p.strip() for p in cookie_val.split(";") if "=" in p.strip()) + cookie_hash = _hashlib.sha256(";".join(pairs).encode()).hexdigest()[:12] + else: + cookie_hash = "000000000000" + return f"{method}{ver_tag}{has_cookie}{has_referer}{lang_tag}_{count_tag}_{header_hash}_{cookie_hash}" + + +def _fp_socket_reader( + node_name: str, + service_name: str, + log_target: str, + sock_path: str = "/run/decnet/fp.sock", +) -> None: + import os as _os + try: + sock = _socket.socket(_socket.AF_UNIX, _socket.SOCK_DGRAM) + _os.makedirs(_os.path.dirname(sock_path), exist_ok=True) + try: + _os.unlink(sock_path) + except FileNotFoundError: + pass + sock.bind(sock_path) + except Exception: + return + + while True: + try: + data = sock.recv(_FP_SOCK_SIZE) + rec = _json.loads(data.decode("utf-8", errors="replace")) + kind = rec.get("kind", "") + remote = rec.get("remote_addr", "").split(":")[0] + + if kind == "http_request": + ja4h = _ja4h_from_record(rec) + proto_tag = rec.get("proto_tag", "h1") + line = syslog_line( + service_name, node_name, "http_request_fingerprint", + attacker_ip=remote, + ja4h=ja4h, + protocol=proto_tag, + method=rec.get("method", ""), + path=rec.get("path", ""), + ) + write_syslog_file(line) + forward_syslog(line, log_target) + + elif kind == "h2_settings": + settings_hash = _hashlib.sha256( + _json.dumps(rec.get("settings", {}), sort_keys=True).encode() + ).hexdigest()[:12] + line = syslog_line( + service_name, node_name, "http2_settings", + attacker_ip=remote, + settings=_json.dumps(rec.get("settings", {})), + frame_order=_json.dumps(rec.get("frame_order", [])), + settings_hash=settings_hash, + ) + write_syslog_file(line) + forward_syslog(line, log_target) + + except Exception: + pass + + +def start_fp_socket_reader( + node_name: str, + service_name: str, + log_target: str = "", + sock_path: str = "/run/decnet/fp.sock", +) -> None: + import os as _os + if not _os.path.isdir(_os.path.dirname(sock_path) or "."): + return + t = _threading.Thread( + target=_fp_socket_reader, + args=(node_name, service_name, log_target, sock_path), + daemon=True, + name="fp-socket-reader", + ) + t.start() diff --git a/decnet/templates/https/Dockerfile b/decnet/templates/https/Dockerfile index 67176d9a..9028fa32 100644 --- a/decnet/templates/https/Dockerfile +++ b/decnet/templates/https/Dockerfile @@ -1,9 +1,13 @@ -FROM caddy:2 AS caddy-bin +FROM caddy:2-builder AS caddy-build +COPY _caddy_modules/decnetfp /src/decnetfp +RUN xcaddy build \ + --with github.com/decnet/caddy-fp=/src/decnetfp \ + --output /usr/bin/caddy ARG BASE_IMAGE=debian:bookworm-slim@sha256:f9c6a2fd2ddbc23e336b6257a5245e31f996953ef06cd13a59fa0a1df2d5c252 FROM ${BASE_IMAGE} -COPY --from=caddy-bin /usr/bin/caddy /usr/bin/caddy +COPY --from=caddy-build /usr/bin/caddy /usr/bin/caddy RUN apt-get update && apt-get install -y --no-install-recommends \ python3 python3-pip openssl \ @@ -18,12 +22,12 @@ COPY server.py /opt/server.py COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh -RUN mkdir -p /opt/tls +RUN mkdir -p /opt/tls /run/decnet EXPOSE 443 RUN useradd -r -s /bin/false -d /opt logrelay \ - && chown -R logrelay:logrelay /opt/tls \ + && chown -R logrelay:logrelay /opt/tls /run/decnet \ && mkdir -p /etc/caddy /opt/.local/share/caddy /opt/.config/caddy \ && chown -R logrelay:logrelay /etc/caddy /opt/.local /opt/.config \ && apt-get update && apt-get install -y --no-install-recommends libcap2-bin \ diff --git a/decnet/templates/https/entrypoint.sh b/decnet/templates/https/entrypoint.sh index 3d82cc14..f17fc626 100644 --- a/decnet/templates/https/entrypoint.sh +++ b/decnet/templates/https/entrypoint.sh @@ -43,17 +43,28 @@ if 'http/3' in versions: print(' '.join(tokens) if tokens else 'h1') ") +DECNET_FP_SOCK="${DECNET_FP_SOCK:-/run/decnet/fp.sock}" +# Remove stale socket from a previous run +rm -f "$DECNET_FP_SOCK" + cat > /etc/caddy/Caddyfile < None: def forward_syslog(line: str, log_target: str) -> None: """No-op stub. TCP forwarding is handled by rsyslog, not by service containers.""" pass + + +# ─── Caddy fp-socket reader ─────────────────────────────────────────────────── + +_FP_SOCK_SIZE = 65536 # max unix datagram payload + + +def _ja4h_from_record(rec: dict) -> str: + """Compute JA4H from a Caddy decnet_fp 'http_request' record.""" + method = rec.get("method", "")[:2].upper() or "UN" + proto = rec.get("proto", "") + ver_map = { + "HTTP/1.0": "10", "HTTP/1.1": "11", "HTTP/2.0": "20", "HTTP/3.0": "30", + } + ver_tag = ver_map.get(proto.upper(), "00") + headers: list[str] = rec.get("headers_ordered", []) + has_cookie = "c" if any(h.lower() == "cookie" for h in headers) else "n" + has_referer = "r" if any(h.lower() == "referer" for h in headers) else "n" + lang = rec.get("accept_language", "") or "" + lang_tag = (lang[:4].ljust(4, "0") if lang else "0000") + filtered = [h for h in headers if h.lower() not in ("cookie", "referer")] + count_tag = f"{min(len(filtered), 99):02d}" + header_str = ",".join(h.lower() for h in filtered) + header_hash = _hashlib.sha256(header_str.encode()).hexdigest()[:12] + cookie_val = rec.get("cookie", "") or "" + if cookie_val: + pairs = sorted(p.strip() for p in cookie_val.split(";") if "=" in p.strip()) + cookie_hash = _hashlib.sha256(";".join(pairs).encode()).hexdigest()[:12] + else: + cookie_hash = "000000000000" + return f"{method}{ver_tag}{has_cookie}{has_referer}{lang_tag}_{count_tag}_{header_hash}_{cookie_hash}" + + +def _fp_socket_reader( + node_name: str, + service_name: str, + log_target: str, + sock_path: str = "/run/decnet/fp.sock", +) -> None: + """Read JSON fingerprint records from the Caddy fp unix datagram socket.""" + import os as _os + # Create the socket as the receiver (we bind, Caddy writes) + try: + sock = _socket.socket(_socket.AF_UNIX, _socket.SOCK_DGRAM) + _os.makedirs(_os.path.dirname(sock_path), exist_ok=True) + try: + _os.unlink(sock_path) + except FileNotFoundError: + pass + sock.bind(sock_path) + except Exception: + return + + while True: + try: + data = sock.recv(_FP_SOCK_SIZE) + rec = _json.loads(data.decode("utf-8", errors="replace")) + kind = rec.get("kind", "") + remote = rec.get("remote_addr", "").split(":")[0] # strip port + + if kind == "http_request": + ja4h = _ja4h_from_record(rec) + proto_tag = rec.get("proto_tag", "h1") + line = syslog_line( + service_name, node_name, "http_request_fingerprint", + attacker_ip=remote, + ja4h=ja4h, + protocol=proto_tag, + method=rec.get("method", ""), + path=rec.get("path", ""), + ) + write_syslog_file(line) + forward_syslog(line, log_target) + + elif kind == "h2_settings": + settings_hash = _hashlib.sha256( + _json.dumps(rec.get("settings", {}), sort_keys=True).encode() + ).hexdigest()[:12] + line = syslog_line( + service_name, node_name, "http2_settings", + attacker_ip=remote, + settings=_json.dumps(rec.get("settings", {})), + frame_order=_json.dumps(rec.get("frame_order", [])), + settings_hash=settings_hash, + ) + write_syslog_file(line) + forward_syslog(line, log_target) + + elif kind == "h3_settings": + line = syslog_line( + service_name, node_name, "http3_settings", + attacker_ip=remote, + settings=_json.dumps(rec.get("settings", {})), + frame_order=_json.dumps(rec.get("frame_order", [])), + ) + write_syslog_file(line) + forward_syslog(line, log_target) + + except Exception: + pass + + +def start_fp_socket_reader( + node_name: str, + service_name: str, + log_target: str = "", + sock_path: str = "/run/decnet/fp.sock", +) -> None: + """Start the Caddy fp-socket reader in a daemon thread.""" + import os as _os + if not _os.path.isdir(_os.path.dirname(sock_path) or "."): + return + t = _threading.Thread( + target=_fp_socket_reader, + args=(node_name, service_name, log_target, sock_path), + daemon=True, + name="fp-socket-reader", + ) + t.start() diff --git a/decnet/ttp/base.py b/decnet/ttp/base.py index 3a1b8ef5..d07064ae 100644 --- a/decnet/ttp/base.py +++ b/decnet/ttp/base.py @@ -40,6 +40,7 @@ KNOWN_SOURCE_KINDS: Final[frozenset[str]] = frozenset({ "payload", "session", "http_request", + "http_fingerprint", }) diff --git a/decnet/ttp/factory.py b/decnet/ttp/factory.py index 53b07506..935c992e 100644 --- a/decnet/ttp/factory.py +++ b/decnet/ttp/factory.py @@ -140,6 +140,7 @@ def get_tagger() -> Tagger: ) from decnet.ttp.impl.credential_lifter import CredentialLifter from decnet.ttp.impl.email_lifter import EmailLifter + from decnet.ttp.impl.http_fingerprint_lifter import HttpFingerprintLifter from decnet.ttp.impl.identity_lifter import IdentityLifter from decnet.ttp.impl.intel_lifter import IntelLifter from decnet.ttp.impl.rule_engine import RuleEngineTagger @@ -158,6 +159,7 @@ def get_tagger() -> Tagger: EmailLifter(store), IdentityLifter(store), CredentialLifter(store), + HttpFingerprintLifter(store), ]) raise ValueError( f"Unknown tagger: {name!r}. Known: {_KNOWN}" diff --git a/decnet/ttp/impl/http_fingerprint_lifter.py b/decnet/ttp/impl/http_fingerprint_lifter.py new file mode 100644 index 00000000..aa06a022 --- /dev/null +++ b/decnet/ttp/impl/http_fingerprint_lifter.py @@ -0,0 +1,125 @@ +"""HTTP fingerprint lifter — JA4H / H2-settings / H3-settings / JA4-QUIC tagger. + +Reads ``http_fingerprint`` source-kind events and emits Reconnaissance +techniques when fingerprint patterns match known scanner or attacker-tooling +profiles. + +Covered techniques: +* T1592.002 — Gather Victim Host Information: Software (scanner-JA4H match) +* T1046 — Network Service Discovery (h2/h3 protocol probing) +""" +from __future__ import annotations + +from collections.abc import Callable +from typing import Any, Final + +from decnet.ttp.base import TaggerEvent, TolerantTagger +from decnet.ttp.impl._emit import emit_tags +from decnet.ttp.impl._rule_index import RuleIndex +from decnet.ttp.impl._state import is_active +from decnet.ttp.store.base import RuleStore +from decnet.web.db.models.ttp import TTPTag + + +Predicate = Callable[ + [dict[str, Any], dict[str, Any]], + "dict[str, Any] | None", +] + +# Known scanner JA4H prefixes. The method+version+cookie+referer component +# (first segment before the first underscore) is stable across tool versions +# while the header hash varies with User-Agent spoofing. Matching on the +# prefix catches deliberate UA spoofing that forgets to shuffle header order. +_SCANNER_JA4H_PREFIXES: Final[frozenset[str]] = frozenset({ + "GE11nn0000", # curl default (no cookie, no referer, no lang) + "GE20nn0000", # curl --http2 + "GE30nn0000", # curl --http3 + "GE11nn0000", # wget + "GE11nn0000", # python-requests (no lang header) +}) + +# h2/h3 probing without a browser User-Agent is a service-discovery tell. +_H2_PROBE_PROTOCOLS: Final[frozenset[str]] = frozenset({"h2", "h2c", "h3"}) + + +def _p_scanner_ja4h( + spec: dict[str, Any], payload: dict[str, Any], +) -> dict[str, Any] | None: + ja4h: str = payload.get("ja4h", "") + if not ja4h: + return None + prefix = ja4h.split("_")[0] if "_" in ja4h else ja4h[:10] + catalogues_raw = spec.get("catalogues", list(_SCANNER_JA4H_PREFIXES)) + catalogues = set(catalogues_raw) if isinstance(catalogues_raw, list) else _SCANNER_JA4H_PREFIXES + if prefix not in catalogues: + return None + return { + "kind": "ja4h", + "hash": ja4h, + "protocol": payload.get("protocol", "h1"), + "client_ip": payload.get("client_ip", ""), + "seen_at": payload.get("seen_at", ""), + "raw": None, + } + + +def _p_h2_h3_probe( + _spec: dict[str, Any], payload: dict[str, Any], +) -> dict[str, Any] | None: + fp_type: str = payload.get("fingerprint_type", "") + if fp_type not in ("http2_settings", "http3_settings"): + return None + protocol = "h2" if fp_type == "http2_settings" else "h3" + return { + "kind": fp_type, + "hash": "", + "protocol": protocol, + "client_ip": payload.get("client_ip", ""), + "seen_at": payload.get("seen_at", ""), + "raw": payload.get("settings"), + } + + +def _p_quic_probe( + _spec: dict[str, Any], payload: dict[str, Any], +) -> dict[str, Any] | None: + ja4q: str = payload.get("ja4_quic", "") + if not ja4q: + return None + return { + "kind": "ja4_quic", + "hash": ja4q, + "protocol": "h3", + "client_ip": payload.get("client_ip", ""), + "seen_at": payload.get("seen_at", ""), + "raw": None, + } + + +_PREDICATES: Final[dict[str, Predicate]] = { + "HFP-0001": _p_scanner_ja4h, + "HFP-0002": _p_h2_h3_probe, + "HFP-0003": _p_quic_probe, +} + + +class HttpFingerprintLifter(TolerantTagger): + """Tags HTTP-layer fingerprint events with MITRE ATT&CK techniques.""" + + HANDLES: frozenset[str] = frozenset({"http_fingerprint"}) + + def __init__(self, store: RuleStore) -> None: + self._index = RuleIndex() + + async def _tag_impl(self, event: TaggerEvent) -> list[TTPTag]: + payload = event.payload if isinstance(event.payload, dict) else {} + tags: list[TTPTag] = [] + for rule_id, predicate in _PREDICATES.items(): + rule = self._index.get(rule_id) + if rule is None or not is_active(rule.state): + continue + evidence = predicate(rule.match_spec, payload) + if evidence is None: + continue + tags.extend(emit_tags(rule, event, evidence)) + return tags diff --git a/decnet/web/db/models/attackers.py b/decnet/web/db/models/attackers.py index bf5dfbe7..a13b0015 100644 --- a/decnet/web/db/models/attackers.py +++ b/decnet/web/db/models/attackers.py @@ -180,6 +180,15 @@ class AttackerIdentity(SQLModel, table=True): hassh_hashes: Optional[str] = Field( default=None, sa_column=Column("hassh_hashes", Text, nullable=True) ) + ja4h_hashes: Optional[str] = Field( + default=None, sa_column=Column("ja4h_hashes", Text, nullable=True) + ) + ja4_quic_hashes: Optional[str] = Field( + default=None, sa_column=Column("ja4_quic_hashes", Text, nullable=True) + ) + http_versions_seen: Optional[str] = Field( + default=None, sa_column=Column("http_versions_seen", Text, nullable=True) + ) # JSON list[str] — SHA-256 fingerprints of leaf certs presented by # attacker-run TLS servers, captured by the active prober alongside # JARM. Same federation-gossip rationale as ja3_hashes/hassh_hashes: diff --git a/decnet/web/db/models/ttp.py b/decnet/web/db/models/ttp.py index 2943c643..b7efaa7c 100644 --- a/decnet/web/db/models/ttp.py +++ b/decnet/web/db/models/ttp.py @@ -89,6 +89,15 @@ class CanaryFingerprintEvidence(TypedDict): matched_signature: str # signature ID, not raw fingerprint blob +class HttpFingerprintEvidence(TypedDict): + kind: str # "ja4h" | "h2_settings" | "h3_settings" | "ja4_quic" + hash: str # fingerprint hash string (or empty for settings events) + protocol: str # "h1" | "h2" | "h2c" | "h3" + client_ip: str + seen_at: str # ISO8601 UTC + raw: Optional[dict] # raw settings dict for h2_settings / h3_settings + + # ── Tables ────────────────────────────────────────────────────────── diff --git a/decnet/web/ingester.py b/decnet/web/ingester.py index 320ba1c5..bf3b8849 100644 --- a/decnet/web/ingester.py +++ b/decnet/web/ingester.py @@ -626,6 +626,56 @@ async def _extract_bounty( if log_data.get("service") == "smtp_relay": await _publish_probe_pending(log_data, _fields) + # 13. JA4H HTTP-layer fingerprint (from http/https templates via fp socket) + _ja4h = _fields.get("ja4h") + if _ja4h and log_data.get("event_type") == "http_request_fingerprint": + await repo.add_bounty({ + "decky": log_data.get("decky"), + "service": log_data.get("service"), + "attacker_ip": log_data.get("attacker_ip"), + "bounty_type": "fingerprint", + "payload": { + "fingerprint_type": "ja4h", + "ja4h": _ja4h, + "protocol": _fields.get("protocol", "h1"), + "method": _fields.get("method"), + "path": _fields.get("path"), + }, + }) + + # 14. H2/H3 SETTINGS frame fingerprint (from Caddy fp module) + _evt_type = log_data.get("event_type", "") + if _evt_type in ("http2_settings", "http3_settings"): + await repo.add_bounty({ + "decky": log_data.get("decky"), + "service": log_data.get("service"), + "attacker_ip": log_data.get("attacker_ip"), + "bounty_type": "fingerprint", + "payload": { + "fingerprint_type": _evt_type, + "settings": _fields.get("settings"), + "frame_order": _fields.get("frame_order"), + "protocol": "h2" if _evt_type == "http2_settings" else "h3", + }, + }) + + # 15. JA4-QUIC fingerprint from fleet-wide sniffer (UDP/443) + _ja4q = _fields.get("ja4_quic") + if _ja4q and log_data.get("event_type") == "quic_client_hello": + await repo.add_bounty({ + "decky": log_data.get("decky"), + "service": log_data.get("service", "sniffer"), + "attacker_ip": log_data.get("attacker_ip"), + "bounty_type": "fingerprint", + "payload": { + "fingerprint_type": "ja4_quic", + "ja4_quic": _ja4q, + "sni": _fields.get("sni") or None, + "alpn": _fields.get("alpn") or None, + "raw_ciphers": _fields.get("raw_ciphers"), + }, + }) + _RCPT_SPLIT_RE = re.compile(r"[,\s]+") _ADDR_AT_RE = re.compile(r"@([A-Za-z0-9.\-]+)") diff --git a/tests/profiler/test_identity_rollup.py b/tests/profiler/test_identity_rollup.py index 0036f0fd..eaf9ab3e 100644 --- a/tests/profiler/test_identity_rollup.py +++ b/tests/profiler/test_identity_rollup.py @@ -29,11 +29,12 @@ class TestExtractFpSummaries: def test_empty_input_returns_all_none(self): result = extract_fp_summaries([]) - assert result == { - "ja3_hashes": None, - "hassh_hashes": None, - "tls_cert_sha256": None, - } + assert all(v is None for v in result.values()) + assert "ja3_hashes" in result + assert "hassh_hashes" in result + assert "tls_cert_sha256" in result + assert "ja4h_hashes" in result + assert "ja4_quic_hashes" in result def test_single_row_single_cert(self): row = _row_with(_bounty("tls_certificate", cert_sha256="ab" * 32)) @@ -139,3 +140,50 @@ class TestExtractFpSummaries: assert json.loads(result["ja3_hashes"]) == sorted( ["ja3-shared", "ja3-second", "ja3-third"] ) + + # ── ja4h + ja4_quic (PR2 columns) ──────────────────────────────── + + def test_ja4h_single_value(self): + row = _row_with(_bounty("ja4h", ja4h="GE11nn0000_02_abc_000")) + result = extract_fp_summaries([row]) + assert json.loads(result["ja4h_hashes"]) == ["GE11nn0000_02_abc_000"] + + def test_ja4_quic_single_value(self): + row = _row_with(_bounty("ja4_quic", ja4_quic="q13d0310h2_002f_0403_h3")) + result = extract_fp_summaries([row]) + assert json.loads(result["ja4_quic_hashes"]) == ["q13d0310h2_002f_0403_h3"] + + def test_ja4h_dedup_across_rows(self): + a = _row_with(_bounty("ja4h", ja4h="GE11nn0000_02_abc_000")) + b = _row_with(_bounty("ja4h", ja4h="GE11nn0000_02_abc_000")) + c = _row_with(_bounty("ja4h", ja4h="GE20nn0000_04_def_000")) + result = extract_fp_summaries([a, b, c]) + hashes = json.loads(result["ja4h_hashes"]) + assert len(hashes) == 2 + assert "GE11nn0000_02_abc_000" in hashes + assert "GE20nn0000_04_def_000" in hashes + + def test_ja4h_and_ja4_quic_coexist(self): + row = _row_with( + _bounty("ja4h", ja4h="GE11nn0000_02_abc_000"), + _bounty("ja4_quic", ja4_quic="q13d0310h2_002f_0403_h3"), + ) + result = extract_fp_summaries([row]) + assert json.loads(result["ja4h_hashes"]) == ["GE11nn0000_02_abc_000"] + assert json.loads(result["ja4_quic_hashes"]) == ["q13d0310h2_002f_0403_h3"] + + def test_ja4h_missing_payload_key_skipped(self): + # bounty shaped like a fingerprint but missing the 'ja4h' key + row = _row_with({ + "bounty_type": "fingerprint", + "payload": {"fingerprint_type": "ja4h", "protocol": "h1"}, + }) + result = extract_fp_summaries([row]) + assert result["ja4h_hashes"] is None + + def test_empty_returns_none_for_new_columns(self): + result = extract_fp_summaries([]) + assert "ja4h_hashes" in result + assert result["ja4h_hashes"] is None + assert "ja4_quic_hashes" in result + assert result["ja4_quic_hashes"] is None diff --git a/tests/sniffer/test_ja4h.py b/tests/sniffer/test_ja4h.py new file mode 100644 index 00000000..aac43ee7 --- /dev/null +++ b/tests/sniffer/test_ja4h.py @@ -0,0 +1,153 @@ +"""Tests for _ja4h computation and QUIC helpers in decnet.sniffer.fingerprint.""" +from __future__ import annotations + +import pytest + +from decnet.sniffer.fingerprint import _ja4h, _quic_varint, _extract_crypto_frames + + +class TestJA4H: + def test_basic_get_h11(self): + result = _ja4h( + method="GET", + version="HTTP/1.1", + headers_ordered=["Host", "User-Agent", "Accept"], + ) + parts = result.split("_") + assert len(parts) == 4 + assert parts[0].startswith("GE11") # method + version + assert parts[0][4] == "n" # no cookie + assert parts[0][5] == "n" # no referer + assert parts[0][6:10] == "0000" # no Accept-Language + + def test_cookie_flag(self): + result = _ja4h( + method="POST", + version="HTTP/1.1", + headers_ordered=["Host", "Cookie", "Content-Type"], + cookie_val="session=abc", + ) + parts = result.split("_") + assert parts[0][4] == "c" # has cookie + assert parts[0][5] == "n" # no referer + + def test_referer_flag(self): + result = _ja4h( + method="GET", + version="HTTP/1.1", + headers_ordered=["Host", "Referer"], + ) + parts = result.split("_") + assert parts[0][5] == "r" # has referer + + def test_h2_version_tag(self): + result = _ja4h( + method="GET", + version="HTTP/2.0", + headers_ordered=["Host", "User-Agent"], + ) + assert result.startswith("GE20") + + def test_h3_version_tag(self): + result = _ja4h( + method="GET", + version="HTTP/3.0", + headers_ordered=["Host", "User-Agent"], + ) + assert result.startswith("GE30") + + def test_cookie_and_referer_excluded_from_header_hash(self): + result_with = _ja4h( + method="GET", + version="HTTP/1.1", + headers_ordered=["Host", "User-Agent", "Cookie", "Referer"], + cookie_val="x=1", + ) + result_without = _ja4h( + method="GET", + version="HTTP/1.1", + headers_ordered=["Host", "User-Agent"], + ) + # Header hash (parts[2]) must be identical — cookie/referer excluded from it + assert result_with.split("_")[2] == result_without.split("_")[2] + + def test_header_count_excludes_cookie_and_referer(self): + result = _ja4h( + method="GET", + version="HTTP/1.1", + headers_ordered=["Host", "Cookie", "Accept", "Referer"], + ) + parts = result.split("_") + # 2 headers after dropping Cookie and Referer (Host + Accept) + assert parts[1] == "02" + + def test_cookie_hash_alphabetical_sort(self): + r1 = _ja4h("GET", "HTTP/1.1", [], cookie_val="z=3; a=1; m=2") + r2 = _ja4h("GET", "HTTP/1.1", [], cookie_val="a=1; m=2; z=3") + # Both should produce the same cookie hash regardless of original order + assert r1.split("_")[3] == r2.split("_")[3] + + def test_no_cookie_produces_12_zeros(self): + result = _ja4h("GET", "HTTP/1.1", ["Host"]) + assert result.split("_")[3] == "000000000000" + + def test_accept_lang_truncated_to_4_chars(self): + result = _ja4h("GET", "HTTP/1.1", [], accept_lang="en-US,en;q=0.9") + parts = result.split("_") + lang_tag = parts[0][6:10] + assert lang_tag == "en-U" + + def test_deterministic(self): + kwargs = dict( + method="POST", + version="HTTP/1.1", + headers_ordered=["Host", "Content-Type", "Accept"], + ) + assert _ja4h(**kwargs) == _ja4h(**kwargs) + + +class TestQuicVarint: + def test_1_byte(self): + assert _quic_varint(b"\x3f", 0) == (63, 1) + + def test_2_byte(self): + # 0x4000 → big 2-byte form: 01 + 14 bits = 0x4000 = 16384 + data = bytes([0x40, 0x00]) + assert _quic_varint(data, 0) == (0, 2) + + def test_4_byte(self): + # 0x80000000 → 2 MSB = 10, value = 0 + data = bytes([0x80, 0x00, 0x00, 0x00]) + assert _quic_varint(data, 0) == (0, 4) + + def test_small_values(self): + assert _quic_varint(b"\x00", 0) == (0, 1) + assert _quic_varint(b"\x01", 0) == (1, 1) + assert _quic_varint(b"\x25", 0) == (37, 1) + + +class TestExtractCryptoFrames: + def test_single_crypto_frame(self): + # CRYPTO frame: type=0x06, offset=0x00 (varint), length=5 (varint), data + data_bytes = b"hello" + frame = bytes([0x06, 0x00, 0x05]) + data_bytes + result = _extract_crypto_frames(frame) + assert result == b"hello" + + def test_empty_payload(self): + result = _extract_crypto_frames(b"") + assert result == b"" + + def test_padding_skipped(self): + # PADDING (0x00) + CRYPTO frame + data_bytes = b"world" + frame = bytes([0x00, 0x00, 0x06, 0x00, 0x05]) + data_bytes + result = _extract_crypto_frames(frame) + assert result == b"world" + + def test_non_crypto_frame_stops_parsing(self): + # Unknown frame type (0x10) after CRYPTO — should stop and return what we have + data = b"hello" + frame = bytes([0x06, 0x00, 0x05]) + data + bytes([0x10, 0x00]) + result = _extract_crypto_frames(frame) + assert result == b"hello" diff --git a/tests/sniffer/test_quic_initial.py b/tests/sniffer/test_quic_initial.py new file mode 100644 index 00000000..cf01a8bf --- /dev/null +++ b/tests/sniffer/test_quic_initial.py @@ -0,0 +1,123 @@ +"""Tests for QUIC v1 Initial packet key derivation (RFC 9001 Appendix A vectors).""" +from __future__ import annotations + +import pytest + +from decnet.sniffer.fingerprint import ( + _hkdf_extract, + _hkdf_expand_label, + _quic_initial_keys, + _QUIC_V1_INITIAL_SALT, + _ja4_quic, + _parse_quic_initial, +) + + +# RFC 9001 Appendix A.1 key derivation test vectors +_RFC9001_DCID = bytes.fromhex("8394c8f03e515708") +_RFC9001_CLIENT_KEY = bytes.fromhex("1f369613dd76d5467730efcbe3b1a22d") +_RFC9001_CLIENT_IV = bytes.fromhex("fa044b2f42a3fd3b46fb255c") +_RFC9001_CLIENT_HP = bytes.fromhex("9f50449e04a0e810283a1e9933adedd2") + + +class TestHKDF: + def test_extract_sha256(self): + # HKDF-Extract is HMAC-SHA256(salt, IKM). Cross-check with a known value. + result = _hkdf_extract(b"salt", b"ikm") + import hmac, hashlib + expected = hmac.new(b"salt", b"ikm", hashlib.sha256).digest() + assert result == expected + + def test_expand_label_length(self): + secret = _hkdf_extract(_QUIC_V1_INITIAL_SALT, _RFC9001_DCID) + # "client in" expand should be 32 bytes + client_secret = _hkdf_expand_label(secret, "client in", b"", 32) + assert len(client_secret) == 32 + + def test_expand_label_key_length(self): + secret = _hkdf_extract(_QUIC_V1_INITIAL_SALT, _RFC9001_DCID) + client_secret = _hkdf_expand_label(secret, "client in", b"", 32) + key = _hkdf_expand_label(client_secret, "quic key", b"", 16) + assert len(key) == 16 + + def test_expand_label_iv_length(self): + secret = _hkdf_extract(_QUIC_V1_INITIAL_SALT, _RFC9001_DCID) + client_secret = _hkdf_expand_label(secret, "client in", b"", 32) + iv = _hkdf_expand_label(client_secret, "quic iv", b"", 12) + assert len(iv) == 12 + + +class TestQuicInitialKeys: + def test_rfc9001_appendix_a_vectors(self): + """Key derivation must match RFC 9001 Appendix A.1 test vectors exactly.""" + key, iv, hp = _quic_initial_keys(_RFC9001_DCID) + assert key == _RFC9001_CLIENT_KEY, f"key mismatch: {key.hex()}" + assert iv == _RFC9001_CLIENT_IV, f"iv mismatch: {iv.hex()}" + assert hp == _RFC9001_CLIENT_HP, f"hp mismatch: {hp.hex()}" + + +class TestJA4Quic: + def test_proto_prefix_is_q(self): + ch = { + "cipher_suites": [0x1301, 0x1302], + "extensions": [0x000a, 0x000d, 0x002b], + "signature_algorithms": [0x0403, 0x0804], + "supported_versions": [0x0304], + "sni": "example.com", + "alpn": ["h3"], + "tls_version": 0x0303, + } + result = _ja4_quic(ch) + assert result.startswith("q"), f"expected 'q' prefix: {result}" + + def test_structure(self): + ch = { + "cipher_suites": [0x1301], + "extensions": [0x000a], + "signature_algorithms": [], + "supported_versions": [0x0304], + "sni": "", + "alpn": [], + "tls_version": 0x0303, + } + result = _ja4_quic(ch) + parts = result.split("_") + assert len(parts) == 3 + + def test_deterministic(self): + ch = { + "cipher_suites": [0x1301, 0x1302, 0x1303], + "extensions": [0x000a, 0x000d], + "signature_algorithms": [0x0403], + "supported_versions": [0x0304], + "sni": "host.example", + "alpn": ["h3"], + "tls_version": 0x0303, + } + assert _ja4_quic(ch) == _ja4_quic(ch) + + +class TestParseQuicInitial: + def test_short_header_rejected(self): + # Short header: bit 7 clear + assert _parse_quic_initial(b"\x40" + b"\x00" * 20) is None + + def test_wrong_version_rejected(self): + # Long header, Initial type, version = 0x00000002 + pkt = bytearray(30) + pkt[0] = 0xC0 # long header + Initial + pkt[1:5] = b"\x00\x00\x00\x02" # version 2 + assert _parse_quic_initial(bytes(pkt)) is None + + def test_non_initial_type_rejected(self): + # Long header, Handshake type (0x20 set) + pkt = bytearray(30) + pkt[0] = 0xE0 # long header + Handshake + pkt[1:5] = b"\x00\x00\x00\x01" + assert _parse_quic_initial(bytes(pkt)) is None + + def test_garbage_returns_none(self): + assert _parse_quic_initial(b"garbage bytes that are not QUIC") is None + + def test_too_short_returns_none(self): + assert _parse_quic_initial(b"\xc0\x00") is None diff --git a/tests/ttp/test_evidence_shape.py b/tests/ttp/test_evidence_shape.py index fddda915..849ae108 100644 --- a/tests/ttp/test_evidence_shape.py +++ b/tests/ttp/test_evidence_shape.py @@ -28,11 +28,13 @@ from decnet.ttp.base import TaggerEvent, TolerantTagger from decnet.ttp.impl.behavioral_lifter import BehavioralLifter from decnet.ttp.impl.canary_fingerprint_lifter import CanaryFingerprintLifter from decnet.ttp.impl.email_lifter import EmailLifter +from decnet.ttp.impl.http_fingerprint_lifter import HttpFingerprintLifter from decnet.ttp.impl.intel_lifter import IntelLifter from decnet.web.db.models.ttp import ( CanaryFingerprintEvidence, CommandEvidence, EmailEvidence, + HttpFingerprintEvidence, IntelEvidence, TTPTag, compute_tag_uuid, @@ -76,6 +78,14 @@ def test_canary_fingerprint_evidence_keys() -> None: assert keys == {"metric", "matched_signature"} +def test_http_fingerprint_evidence_keys() -> None: + keys = ( + HttpFingerprintEvidence.__required_keys__ + | HttpFingerprintEvidence.__optional_keys__ + ) + assert keys == {"kind", "hash", "protocol", "client_ip", "seen_at", "raw"} + + # ── Per-lifter parametrized positive case (impl phase) ────────────── diff --git a/tests/ttp/test_http_fingerprint_lifter.py b/tests/ttp/test_http_fingerprint_lifter.py new file mode 100644 index 00000000..83269ce9 --- /dev/null +++ b/tests/ttp/test_http_fingerprint_lifter.py @@ -0,0 +1,220 @@ +"""Per-predicate unit tests for :class:`HttpFingerprintLifter` (PR2). + +Covers HFP-0001 (scanner JA4H), HFP-0002 (h2/h3 settings probe), +and HFP-0003 (QUIC probe) using synthetic CompiledRule stubs injected +directly into the lifter's RuleIndex — no YAML on disk required. +""" +from __future__ import annotations + +import asyncio +from typing import Any + +import pytest + +from decnet.ttp.base import TaggerEvent +from decnet.ttp.impl.http_fingerprint_lifter import HttpFingerprintLifter +from decnet.ttp.impl.rule_engine import CompiledRule +from decnet.ttp.store.base import RuleState +from tests.ttp._stub_store import StubRuleStore + + +_EMITS_BY_RULE: dict[str, tuple] = { + "HFP-0001": (("T1592", "002", "TA0043", 0.6),), + "HFP-0002": (("T1046", None, "TA0043", 0.6),), + "HFP-0003": (("T1046", None, "TA0043", 0.6),), +} + + +def _rule(rule_id: str, applies_to: str = "http_fingerprint") -> CompiledRule: + return CompiledRule( + rule_id=rule_id, + rule_version=1, + name=rule_id, + applies_to=frozenset({applies_to}), + match_spec={}, + emits=_EMITS_BY_RULE.get(rule_id, ()), + evidence_fields=(), + state=RuleState(), + ) + + +def _make_lifter(*rule_ids: str) -> HttpFingerprintLifter: + rules = [_rule(rid) for rid in rule_ids] + lifter = HttpFingerprintLifter(StubRuleStore(compiled=rules)) + for rule in rules: + lifter._index.install(rule) + return lifter + + +def _ev(payload: dict[str, Any]) -> TaggerEvent: + return TaggerEvent( + source_kind="http_fingerprint", + source_id="src-fp", + attacker_uuid="att-1", + identity_uuid=None, + session_id=None, + decky_id=None, + payload=payload, + ) + + +# ── HFP-0001: scanner JA4H prefix match ───────────────────────────── + + +class TestScannerJA4H: + def test_curl_h1_ja4h_fires(self): + lifter = _make_lifter("HFP-0001") + out = asyncio.run(lifter.tag(_ev({ + "ja4h": "GE11nn0000_02_abc123def456_000000000000", + "protocol": "h1", + "client_ip": "1.2.3.4", + "seen_at": "2026-05-10T00:00:00Z", + }))) + assert out, "HFP-0001 must fire on curl-default JA4H prefix" + assert out[0].technique_id == "T1592" + + def test_curl_h2_ja4h_fires(self): + lifter = _make_lifter("HFP-0001") + out = asyncio.run(lifter.tag(_ev({ + "ja4h": "GE20nn0000_02_abc123def456_000000000000", + "protocol": "h2", + }))) + assert out + + def test_browser_ja4h_no_fire(self): + lifter = _make_lifter("HFP-0001") + out = asyncio.run(lifter.tag(_ev({ + "ja4h": "GE11cn0000_08_realbrwsr1234_000000000000", + "protocol": "h1", + }))) + assert out == [] + + def test_missing_ja4h_no_fire(self): + lifter = _make_lifter("HFP-0001") + out = asyncio.run(lifter.tag(_ev({"protocol": "h1"}))) + assert out == [] + + def test_evidence_keys_match_typeddict(self): + lifter = _make_lifter("HFP-0001") + out = asyncio.run(lifter.tag(_ev({ + "ja4h": "GE11nn0000_02_abc123def456_000000000000", + "protocol": "h1", + "client_ip": "10.0.0.1", + "seen_at": "2026-05-10T00:00:00Z", + }))) + assert out + ev = out[0].evidence + assert set(ev) == {"kind", "hash", "protocol", "client_ip", "seen_at", "raw"} + assert ev["kind"] == "ja4h" + assert ev["protocol"] == "h1" + + def test_rule_not_installed_no_fire(self): + lifter = _make_lifter() # no rules installed + out = asyncio.run(lifter.tag(_ev({ + "ja4h": "GE11nn0000_02_abc_000000000000", + }))) + assert out == [] + + +# ── HFP-0002: h2/h3 settings probe ────────────────────────────────── + + +class TestH2H3Probe: + def test_h2_settings_fires(self): + lifter = _make_lifter("HFP-0002") + out = asyncio.run(lifter.tag(_ev({ + "fingerprint_type": "http2_settings", + "settings": {"HEADER_TABLE_SIZE": 65536}, + "client_ip": "5.6.7.8", + "seen_at": "2026-05-10T00:00:00Z", + }))) + assert out, "HFP-0002 must fire on http2_settings" + assert out[0].technique_id == "T1046" + + def test_h3_settings_fires(self): + lifter = _make_lifter("HFP-0002") + out = asyncio.run(lifter.tag(_ev({ + "fingerprint_type": "http3_settings", + "settings": {"QPACK_MAX_TABLE_CAPACITY": 0}, + }))) + assert out + ev = out[0].evidence + assert ev["protocol"] == "h3" + + def test_h2_settings_evidence_carries_raw(self): + lifter = _make_lifter("HFP-0002") + settings = {"HEADER_TABLE_SIZE": 4096, "MAX_CONCURRENT_STREAMS": 100} + out = asyncio.run(lifter.tag(_ev({ + "fingerprint_type": "http2_settings", + "settings": settings, + }))) + assert out + assert out[0].evidence["raw"] == settings + + def test_ja4h_event_does_not_fire_h2_probe(self): + lifter = _make_lifter("HFP-0002") + out = asyncio.run(lifter.tag(_ev({ + "ja4h": "GE11nn0000_02_abc_000000000000", + }))) + assert out == [] + + def test_unknown_fp_type_no_fire(self): + lifter = _make_lifter("HFP-0002") + out = asyncio.run(lifter.tag(_ev({ + "fingerprint_type": "ja3", + }))) + assert out == [] + + +# ── HFP-0003: QUIC probe ───────────────────────────────────────────── + + +class TestQuicProbe: + def test_ja4_quic_fires(self): + lifter = _make_lifter("HFP-0003") + out = asyncio.run(lifter.tag(_ev({ + "ja4_quic": "q13d0310h2_002f,0035_0403,0804_h3", + "client_ip": "9.8.7.6", + "seen_at": "2026-05-10T00:00:00Z", + }))) + assert out, "HFP-0003 must fire on ja4_quic" + assert out[0].technique_id == "T1046" + + def test_evidence_protocol_is_h3(self): + lifter = _make_lifter("HFP-0003") + out = asyncio.run(lifter.tag(_ev({ + "ja4_quic": "q13d0310h2_002f,0035_0403,0804_h3", + }))) + assert out + assert out[0].evidence["protocol"] == "h3" + assert out[0].evidence["kind"] == "ja4_quic" + + def test_missing_ja4_quic_no_fire(self): + lifter = _make_lifter("HFP-0003") + out = asyncio.run(lifter.tag(_ev({"client_ip": "1.1.1.1"}))) + assert out == [] + + +# ── Combined: all three rules installed ────────────────────────────── + + +class TestAllRulesCombined: + def test_only_matching_rule_fires(self): + lifter = _make_lifter("HFP-0001", "HFP-0002", "HFP-0003") + # h2_settings payload should only fire HFP-0002 + out = asyncio.run(lifter.tag(_ev({ + "fingerprint_type": "http2_settings", + "settings": {}, + }))) + rule_ids = {tag.rule_id for tag in out} + assert "HFP-0002" in rule_ids + assert "HFP-0001" not in rule_ids + assert "HFP-0003" not in rule_ids + + def test_empty_payload_no_errors(self): + lifter = _make_lifter("HFP-0001", "HFP-0002", "HFP-0003") + out = asyncio.run(lifter.tag(_ev({}))) + assert out == [] + + def test_handles_only_http_fingerprint(self): + assert HttpFingerprintLifter.HANDLES == frozenset({"http_fingerprint"}) diff --git a/tests/web/test_attackers_fingerprint_columns.py b/tests/web/test_attackers_fingerprint_columns.py new file mode 100644 index 00000000..82201d26 --- /dev/null +++ b/tests/web/test_attackers_fingerprint_columns.py @@ -0,0 +1,153 @@ +"""Round-trip tests for the three PR2 fingerprint columns on AttackerIdentity. + +Verifies: +* ``ja4h_hashes``, ``ja4_quic_hashes``, ``http_versions_seen`` exist as + Optional[str] fields on the model (type-level, GREEN today). +* A full SQLite round-trip stores and retrieves non-None values correctly. +* Columns default to None and don't affect existing columns. +""" +from __future__ import annotations + +import json +import uuid as _uuid +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional, get_type_hints + +import pytest +import pytest_asyncio + +from decnet.web.db.factory import get_repository +from decnet.web.db.models.attackers import AttackerIdentity + + +# ── Field presence (type-level, GREEN today) ───────────────────────── + + +def test_ja4h_hashes_field_is_optional_str() -> None: + hints = get_type_hints(AttackerIdentity) + # Optional[str] == Union[str, None], repr varies by Python version + assert "ja4h_hashes" in hints + h = hints["ja4h_hashes"] + assert h == Optional[str], f"unexpected type: {h}" + + +def test_ja4_quic_hashes_field_is_optional_str() -> None: + hints = get_type_hints(AttackerIdentity) + assert "ja4_quic_hashes" in hints + h = hints["ja4_quic_hashes"] + assert h == Optional[str], f"unexpected type: {h}" + + +def test_http_versions_seen_field_is_optional_str() -> None: + hints = get_type_hints(AttackerIdentity) + assert "http_versions_seen" in hints + h = hints["http_versions_seen"] + assert h == Optional[str], f"unexpected type: {h}" + + +def test_new_columns_default_to_none() -> None: + row = AttackerIdentity(uuid=str(_uuid.uuid4())) + assert row.ja4h_hashes is None + assert row.ja4_quic_hashes is None + assert row.http_versions_seen is None + + +# ── SQLite round-trip ───────────────────────────────────────────────── + + +@pytest_asyncio.fixture() +async def repo(tmp_path: Path, monkeypatch): + monkeypatch.setenv("DECNET_DB_TYPE", "sqlite") + r = get_repository(db_path=str(tmp_path / "fp_col_test.db")) + await r.initialize() + try: + yield r + finally: + engine = getattr(r, "engine", None) + if engine is not None: + try: + await engine.dispose() + except Exception: + pass + + +def _identity(extra: dict | None = None) -> AttackerIdentity: + base = { + "uuid": str(_uuid.uuid4()), + "schema_version": 1, + "created_at": datetime.now(timezone.utc), + "updated_at": datetime.now(timezone.utc), + } + if extra: + base.update(extra) + return AttackerIdentity(**base) + + +@pytest.mark.asyncio +async def test_ja4h_hashes_round_trip(repo) -> None: + value = json.dumps(["GE11nn0000_02_abc_000", "GE20nn0000_04_def_000"]) + row = _identity({"ja4h_hashes": value}) + async with repo._session() as session: + session.add(row) + await session.commit() + async with repo._session() as session: + fetched = await session.get(AttackerIdentity, row.uuid) + assert fetched is not None + assert fetched.ja4h_hashes == value + assert json.loads(fetched.ja4h_hashes) == json.loads(value) + + +@pytest.mark.asyncio +async def test_ja4_quic_hashes_round_trip(repo) -> None: + value = json.dumps(["q13d0310h2_002f_0403_h3"]) + row = _identity({"ja4_quic_hashes": value}) + async with repo._session() as session: + session.add(row) + await session.commit() + async with repo._session() as session: + fetched = await session.get(AttackerIdentity, row.uuid) + assert fetched is not None + assert fetched.ja4_quic_hashes == value + + +@pytest.mark.asyncio +async def test_http_versions_seen_round_trip(repo) -> None: + value = "h1\nh2\nh3" + row = _identity({"http_versions_seen": value}) + async with repo._session() as session: + session.add(row) + await session.commit() + async with repo._session() as session: + fetched = await session.get(AttackerIdentity, row.uuid) + assert fetched is not None + assert fetched.http_versions_seen == value + + +@pytest.mark.asyncio +async def test_new_columns_nullable_when_not_set(repo) -> None: + row = _identity() # no fp columns set + async with repo._session() as session: + session.add(row) + await session.commit() + async with repo._session() as session: + fetched = await session.get(AttackerIdentity, row.uuid) + assert fetched is not None + assert fetched.ja4h_hashes is None + assert fetched.ja4_quic_hashes is None + assert fetched.http_versions_seen is None + + +@pytest.mark.asyncio +async def test_existing_columns_unaffected(repo) -> None: + ja3 = json.dumps(["abc123"]) + row = _identity({"ja3_hashes": ja3, "ja4h_hashes": json.dumps(["fp1"])}) + async with repo._session() as session: + session.add(row) + await session.commit() + async with repo._session() as session: + fetched = await session.get(AttackerIdentity, row.uuid) + assert fetched is not None + assert fetched.ja3_hashes == ja3 + assert fetched.ja4h_hashes == json.dumps(["fp1"]) + assert fetched.ja4_quic_hashes is None