feat(http): header-quirks fingerprint — order + casing + tool guess
Per-request HTTP fingerprint derived from the header dict we already log. Captures: - order_hash: SHA-256 prefix (16 hex) over the lowercased header-name sequence, minus volatile/per-request headers (Content-Length, Cookie, Authorization, XFF family, trace IDs). Stable identity for a given client stack regardless of which target / path is hit. - casing_hash: same shape but over the per-header casing category (Title-Case / lower / UPPER / mixed). Attackers frequently spoof User-Agent but forget their stack sends `user-agent` while browsers send `User-Agent`. - tool_guess: prefix match against curl / python-requests / Go-http-client / nmap-nse signatures. Cheap, best-effort — the hash is the hard signal. - duplicates: reserved for when the HTTP template switches from dict(request.headers) to a list form; today it always fires empty because dict() collapses duplicates. Payload is a fingerprint bounty (bounty_type="fingerprint", fingerprint_type="http_quirks"). Bounty dedup collapses identical hashes per attacker — one row per distinct fingerprint — so a chatty scanner doesn't spam the vault, but a tool-chain change from the same IP surfaces as a new row. UI renderer (FpHttpQuirks) shows the two hashes, tool guess badge in violet, casing/count tags, and a collapsible header-order list. Added to the passiveTypes group so it nests with JA3/JA4L/etc. in the AttackerDetail fingerprints panel. One library note: the naive "title-case" classifier failed on tokens like `X-Forwarded-For` because Python's "".islower() returns False so `p[1:].islower()` rejects single-letter tokens like the `X`. Fix: explicitly accept single-char tokens when uppercase.
This commit is contained in:
@@ -261,6 +261,23 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non
|
||||
"payload": _leak,
|
||||
})
|
||||
|
||||
# 2c. HTTP header quirks — order + casing fingerprint per request.
|
||||
# Real HTTP clients have distinctive header orderings and casing
|
||||
# patterns (curl vs python-requests vs Go-http-client vs nmap vs
|
||||
# browsers all differ). Attackers routinely spoof User-Agent but
|
||||
# forget to match the stack's native header order. Bounty dedup
|
||||
# collapses repeat fingerprints from the same attacker, so this
|
||||
# fires once per distinct hash per source.
|
||||
_quirks = _http_quirks_fingerprint(log_data, _headers)
|
||||
if _quirks is not None:
|
||||
await repo.add_bounty({
|
||||
"decky": log_data.get("decky"),
|
||||
"service": log_data.get("service"),
|
||||
"attacker_ip": log_data.get("attacker_ip"),
|
||||
"bounty_type": "fingerprint",
|
||||
"payload": _quirks,
|
||||
})
|
||||
|
||||
# 3. VNC client version fingerprint
|
||||
_vnc_ver = _fields.get("client_version")
|
||||
if _vnc_ver and log_data.get("event_type") == "version":
|
||||
@@ -593,3 +610,144 @@ def _detect_ip_leak(
|
||||
"method": log_data.get("fields", {}).get("method"),
|
||||
}
|
||||
|
||||
|
||||
# ─── HTTP header quirks fingerprint ─────────────────────────────────────────
|
||||
|
||||
# Headers that vary with per-request content (payload-body size, cookies
|
||||
# set by prior responses) and therefore aren't useful identity. Stripped
|
||||
# before hashing so a tool's order fingerprint is stable across different
|
||||
# targets/sessions.
|
||||
_VOLATILE_HEADERS = frozenset({
|
||||
"content-length",
|
||||
"cookie",
|
||||
"authorization",
|
||||
"x-forwarded-for", # carries attacker-dependent values
|
||||
"forwarded",
|
||||
"x-real-ip",
|
||||
"true-client-ip",
|
||||
"cf-connecting-ip",
|
||||
"x-request-id",
|
||||
"x-correlation-id",
|
||||
"x-amzn-trace-id",
|
||||
})
|
||||
|
||||
|
||||
# Distinctive order signatures for common tools. The match is on the
|
||||
# lowercased-name list MINUS the volatile set. A prefix match wins —
|
||||
# many tools tack on "User-Agent / Accept-Encoding / Accept" in the
|
||||
# same order regardless of method.
|
||||
_TOOL_SIGNATURES: tuple[tuple[str, tuple[str, ...]], ...] = (
|
||||
# curl sends: Host, User-Agent, Accept, <body-headers>.
|
||||
("curl", ("host", "user-agent", "accept")),
|
||||
# python-requests: User-Agent, Accept-Encoding, Accept, Connection, Host.
|
||||
("python-requests", ("host", "user-agent", "accept-encoding", "accept", "connection")),
|
||||
# Go-http-client: Host, User-Agent, Accept-Encoding.
|
||||
("go-http-client", ("host", "user-agent", "accept-encoding")),
|
||||
# nmap http-enum / http-* scripts: short, Host+User-Agent ordering.
|
||||
("nmap-nse", ("host", "user-agent")),
|
||||
# Nikto / Nuclei send distinctive Accept-Language preferences — treat
|
||||
# User-Agent check as the secondary signal elsewhere; order alone is
|
||||
# ambiguous here.
|
||||
)
|
||||
|
||||
|
||||
def _casing_category(name: str) -> str:
|
||||
"""Classify a header-name casing pattern.
|
||||
|
||||
Real HTTP clients and stacks pick one convention and stick to it:
|
||||
browsers send `Title-Case`; python-requests sends `Title-Case`;
|
||||
Go's stdlib canonicalises to `Title-Case`; curl sends literal
|
||||
`Title-Case`; nmap/masscan often send `lowercase`; custom scanners
|
||||
sometimes send `UPPERCASE`.
|
||||
"""
|
||||
if not name:
|
||||
return "empty"
|
||||
if name == name.upper():
|
||||
return "upper"
|
||||
if name == name.lower():
|
||||
return "lower"
|
||||
# "Title-Case" test: each dash-separated token starts with an
|
||||
# uppercase; trailing chars (if any) must be lowercase. Single-
|
||||
# letter tokens like the `X` in `X-Forwarded-For` qualify when
|
||||
# uppercase — "".islower() is False in Python so the naive form
|
||||
# of this test misfires.
|
||||
parts = [p for p in name.split("-") if p]
|
||||
if parts and all(
|
||||
p[:1].isupper() and (len(p) == 1 or p[1:].islower())
|
||||
for p in parts
|
||||
):
|
||||
return "title"
|
||||
return "mixed"
|
||||
|
||||
|
||||
def _short_hash(value: str) -> str:
|
||||
"""16-hex-char SHA-256 prefix — stable identity, short display."""
|
||||
import hashlib
|
||||
return hashlib.sha256(value.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def _guess_tool_from_order(lowered: list[str]) -> Optional[str]:
|
||||
"""Return the first matching tool signature, or None."""
|
||||
for name, sig in _TOOL_SIGNATURES:
|
||||
if len(lowered) >= len(sig) and tuple(lowered[: len(sig)]) == sig:
|
||||
return name
|
||||
return None
|
||||
|
||||
|
||||
def _http_quirks_fingerprint(
|
||||
log_data: dict[str, Any], headers: dict[str, Any],
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""Build an HTTP request-header quirks fingerprint.
|
||||
|
||||
Captures the header-order hash, casing pattern, count, and a
|
||||
best-effort tool guess. Returns ``None`` for non-HTTP services or
|
||||
when no usable headers are present. Bounty dedup will collapse
|
||||
repeat fingerprints from the same attacker.
|
||||
"""
|
||||
if log_data.get("service") != "http":
|
||||
return None
|
||||
if not isinstance(headers, dict) or not headers:
|
||||
return None
|
||||
|
||||
# Preserve insertion order (Python 3.7+ dict guarantee, and JSON
|
||||
# round-trip also preserves it). Drop volatile headers for the
|
||||
# identity hash but keep them in the display order list.
|
||||
names_full: list[str] = [k for k in headers.keys() if isinstance(k, str)]
|
||||
if not names_full:
|
||||
return None
|
||||
|
||||
names_stable = [n for n in names_full if n.lower() not in _VOLATILE_HEADERS]
|
||||
lowered = [n.lower() for n in names_stable]
|
||||
|
||||
order_hash = _short_hash("\n".join(lowered))
|
||||
casing_per_header = [_casing_category(n) for n in names_stable]
|
||||
casing_hash = _short_hash("\n".join(casing_per_header))
|
||||
|
||||
# A single "dominant" casing category — useful for at-a-glance display.
|
||||
categories = set(casing_per_header)
|
||||
if not categories:
|
||||
dominant = "empty"
|
||||
elif len(categories) == 1:
|
||||
dominant = next(iter(categories))
|
||||
else:
|
||||
dominant = "mixed"
|
||||
|
||||
# Duplicate detection: in the dict we got, duplicates would have
|
||||
# collapsed to one key. But we can still flag if the template
|
||||
# someday passes a list — future-proofing, no-op today.
|
||||
duplicates = [n for n in {x for x in names_full if names_full.count(x) > 1}]
|
||||
|
||||
return {
|
||||
"fingerprint_type": "http_quirks",
|
||||
"order_hash": order_hash,
|
||||
"order": names_stable,
|
||||
"casing_hash": casing_hash,
|
||||
"casing_category": dominant,
|
||||
"header_count": len(names_full),
|
||||
"stable_count": len(names_stable),
|
||||
"tool_guess": _guess_tool_from_order(lowered),
|
||||
"duplicates": duplicates or None,
|
||||
"method": log_data.get("fields", {}).get("method"),
|
||||
"path": log_data.get("fields", {}).get("path"),
|
||||
}
|
||||
|
||||
|
||||
@@ -92,6 +92,7 @@ const fpTypeLabel: Record<string, string> = {
|
||||
tls_resumption: 'SESSION RESUMPTION',
|
||||
tls_certificate: 'CERTIFICATE',
|
||||
http_useragent: 'HTTP USER-AGENT',
|
||||
http_quirks: 'HTTP HEADER QUIRKS',
|
||||
vnc_client_version: 'VNC CLIENT',
|
||||
jarm: 'JARM',
|
||||
hassh_server: 'HASSH SERVER',
|
||||
@@ -104,6 +105,7 @@ const fpTypeIcon: Record<string, React.ReactNode> = {
|
||||
tls_resumption: <Wifi size={14} />,
|
||||
tls_certificate: <FileKey size={14} />,
|
||||
http_useragent: <Shield size={14} />,
|
||||
http_quirks: <Fingerprint size={14} />,
|
||||
vnc_client_version: <Lock size={14} />,
|
||||
jarm: <Crosshair size={14} />,
|
||||
hassh_server: <Lock size={14} />,
|
||||
@@ -338,6 +340,47 @@ const FpGeneric: React.FC<{ p: any }> = ({ p }) => (
|
||||
</div>
|
||||
);
|
||||
|
||||
const FpHttpQuirks: React.FC<{ p: any }> = ({ p }) => {
|
||||
const order: string[] = Array.isArray(p.order) ? p.order : [];
|
||||
return (
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: '6px' }}>
|
||||
<HashRow label="ORDER HASH" value={p.order_hash} />
|
||||
<HashRow label="CASING HASH" value={p.casing_hash} />
|
||||
<div style={{ display: 'flex', gap: '8px', flexWrap: 'wrap' }}>
|
||||
{p.tool_guess && (
|
||||
<Tag color="var(--violet)">{String(p.tool_guess).toUpperCase()}</Tag>
|
||||
)}
|
||||
{p.casing_category && (
|
||||
<Tag>CASE · {String(p.casing_category).toUpperCase()}</Tag>
|
||||
)}
|
||||
{typeof p.header_count === 'number' && (
|
||||
<Tag>{p.header_count} HEADERS</Tag>
|
||||
)}
|
||||
{p.duplicates && (
|
||||
<Tag color="var(--warn, #e0a040)">DUPLICATES</Tag>
|
||||
)}
|
||||
</div>
|
||||
{order.length > 0 && (
|
||||
<details>
|
||||
<summary className="dim" style={{ fontSize: '0.7rem', cursor: 'pointer', letterSpacing: '1px' }}>
|
||||
HEADER ORDER
|
||||
</summary>
|
||||
<div style={{ display: 'flex', gap: '4px', flexWrap: 'wrap', marginTop: '4px' }}>
|
||||
{order.map((h, i) => (
|
||||
<Tag key={`${h}-${i}`}>{h}</Tag>
|
||||
))}
|
||||
</div>
|
||||
</details>
|
||||
)}
|
||||
{(p.method || p.path) && (
|
||||
<div className="dim" style={{ fontSize: '0.7rem', fontFamily: 'monospace', marginTop: '2px' }}>
|
||||
{p.method} {p.path}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const FingerprintGroup: React.FC<{ fpType: string; items: any[] }> = ({ fpType, items }) => {
|
||||
const label = fpTypeLabel[fpType] || fpType.toUpperCase().replace(/_/g, ' ');
|
||||
const icon = fpTypeIcon[fpType] || <Fingerprint size={14} />;
|
||||
@@ -365,6 +408,7 @@ const FingerprintGroup: React.FC<{ fpType: string; items: any[] }> = ({ fpType,
|
||||
case 'jarm': return <FpJarm key={i} p={p} />;
|
||||
case 'hassh_server': return <FpHassh key={i} p={p} />;
|
||||
case 'tcpfp': return <FpTcpStack key={i} p={p} />;
|
||||
case 'http_quirks': return <FpHttpQuirks key={i} p={p} />;
|
||||
default: return <FpGeneric key={i} p={p} />;
|
||||
}
|
||||
})}
|
||||
@@ -1245,7 +1289,7 @@ const AttackerDetail: React.FC = () => {
|
||||
|
||||
// Active probes first, then passive, then unknown
|
||||
const activeTypes = ['jarm', 'hassh_server', 'tcpfp'];
|
||||
const passiveTypes = ['ja3', 'ja4l', 'tls_resumption', 'tls_certificate', 'http_useragent', 'vnc_client_version'];
|
||||
const passiveTypes = ['ja3', 'ja4l', 'tls_resumption', 'tls_certificate', 'http_useragent', 'http_quirks', 'vnc_client_version'];
|
||||
const knownTypes = [...activeTypes, ...passiveTypes];
|
||||
const unknownTypes = Object.keys(groups).filter((t) => !knownTypes.includes(t));
|
||||
|
||||
|
||||
206
tests/web/test_ingester_http_quirks.py
Normal file
206
tests/web/test_ingester_http_quirks.py
Normal file
@@ -0,0 +1,206 @@
|
||||
"""HTTP header-quirks fingerprint extraction in the ingester."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.web.ingester import (
|
||||
_casing_category,
|
||||
_guess_tool_from_order,
|
||||
_http_quirks_fingerprint,
|
||||
_short_hash,
|
||||
_extract_bounty,
|
||||
)
|
||||
|
||||
|
||||
def _log_row(headers: dict[str, str], *, service: str = "http") -> dict:
|
||||
return {
|
||||
"decky": "http-01",
|
||||
"service": service,
|
||||
"attacker_ip": "1.2.3.4",
|
||||
"event_type": "request",
|
||||
"fields": {
|
||||
"method": "GET",
|
||||
"path": "/",
|
||||
"headers": headers,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ─── casing classifier ─────────────────────────────────────────────────────
|
||||
|
||||
def test_casing_title():
|
||||
assert _casing_category("User-Agent") == "title"
|
||||
assert _casing_category("Host") == "title"
|
||||
assert _casing_category("X-Forwarded-For") == "title"
|
||||
|
||||
|
||||
def test_casing_lower():
|
||||
assert _casing_category("user-agent") == "lower"
|
||||
assert _casing_category("x-forwarded-for") == "lower"
|
||||
|
||||
|
||||
def test_casing_upper():
|
||||
assert _casing_category("USER-AGENT") == "upper"
|
||||
|
||||
|
||||
def test_casing_mixed():
|
||||
assert _casing_category("USer-AgEnt") == "mixed"
|
||||
|
||||
|
||||
# ─── order + casing hash stability ──────────────────────────────────────────
|
||||
|
||||
def test_same_order_same_hash():
|
||||
row_a = _log_row({"Host": "x", "User-Agent": "curl/8", "Accept": "*/*"})
|
||||
row_b = _log_row({"Host": "y", "User-Agent": "curl/7", "Accept": "*/*"})
|
||||
fa = _http_quirks_fingerprint(row_a, row_a["fields"]["headers"])
|
||||
fb = _http_quirks_fingerprint(row_b, row_b["fields"]["headers"])
|
||||
assert fa["order_hash"] == fb["order_hash"]
|
||||
assert fa["casing_hash"] == fb["casing_hash"]
|
||||
|
||||
|
||||
def test_different_order_different_hash():
|
||||
row_a = _log_row({"Host": "x", "User-Agent": "a", "Accept": "*/*"})
|
||||
row_b = _log_row({"Accept": "*/*", "User-Agent": "a", "Host": "x"})
|
||||
fa = _http_quirks_fingerprint(row_a, row_a["fields"]["headers"])
|
||||
fb = _http_quirks_fingerprint(row_b, row_b["fields"]["headers"])
|
||||
assert fa["order_hash"] != fb["order_hash"]
|
||||
|
||||
|
||||
def test_different_casing_different_hash():
|
||||
row_a = _log_row({"Host": "x", "User-Agent": "a"})
|
||||
row_b = _log_row({"host": "x", "user-agent": "a"})
|
||||
fa = _http_quirks_fingerprint(row_a, row_a["fields"]["headers"])
|
||||
fb = _http_quirks_fingerprint(row_b, row_b["fields"]["headers"])
|
||||
assert fa["casing_hash"] != fb["casing_hash"]
|
||||
assert fa["casing_category"] == "title"
|
||||
assert fb["casing_category"] == "lower"
|
||||
|
||||
|
||||
def test_volatile_headers_excluded_from_hash():
|
||||
"""Content-Length, Cookie, XFF etc. are per-request; the identity
|
||||
hash shouldn't depend on them."""
|
||||
row_a = _log_row({
|
||||
"Host": "x", "User-Agent": "a", "Content-Length": "100",
|
||||
})
|
||||
row_b = _log_row({
|
||||
"Host": "x", "User-Agent": "a", "Content-Length": "999",
|
||||
"Cookie": "session=abc",
|
||||
})
|
||||
fa = _http_quirks_fingerprint(row_a, row_a["fields"]["headers"])
|
||||
fb = _http_quirks_fingerprint(row_b, row_b["fields"]["headers"])
|
||||
assert fa["order_hash"] == fb["order_hash"]
|
||||
# Count reflects ALL headers (the volatile ones WERE there).
|
||||
assert fa["header_count"] == 3
|
||||
assert fb["header_count"] == 4
|
||||
# Stable count excludes the volatile ones.
|
||||
assert fa["stable_count"] == 2
|
||||
assert fb["stable_count"] == 2
|
||||
|
||||
|
||||
# ─── tool guesses ──────────────────────────────────────────────────────────
|
||||
|
||||
def test_curl_signature_guessed():
|
||||
assert _guess_tool_from_order(["host", "user-agent", "accept"]) == "curl"
|
||||
|
||||
|
||||
def test_python_requests_signature_guessed():
|
||||
assert _guess_tool_from_order([
|
||||
"host", "user-agent", "accept-encoding", "accept", "connection",
|
||||
]) == "python-requests"
|
||||
|
||||
|
||||
def test_go_http_client_signature_guessed():
|
||||
assert _guess_tool_from_order([
|
||||
"host", "user-agent", "accept-encoding",
|
||||
]) == "go-http-client"
|
||||
|
||||
|
||||
def test_nmap_nse_signature_guessed():
|
||||
# Short order starting with host, user-agent → nmap-nse.
|
||||
assert _guess_tool_from_order(["host", "user-agent"]) == "nmap-nse"
|
||||
|
||||
|
||||
def test_unknown_tool_returns_none():
|
||||
assert _guess_tool_from_order(["accept", "host", "user-agent"]) is None
|
||||
|
||||
|
||||
def test_fingerprint_includes_tool_guess_curl():
|
||||
row = _log_row({
|
||||
"Host": "target", "User-Agent": "curl/8.0", "Accept": "*/*",
|
||||
})
|
||||
f = _http_quirks_fingerprint(row, row["fields"]["headers"])
|
||||
assert f["tool_guess"] == "curl"
|
||||
|
||||
|
||||
# ─── gating ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_non_http_service_skipped():
|
||||
row = _log_row({"Host": "x"}, service="ssh")
|
||||
assert _http_quirks_fingerprint(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
def test_empty_headers_skipped():
|
||||
row = _log_row({})
|
||||
assert _http_quirks_fingerprint(row, {}) is None
|
||||
|
||||
|
||||
def test_only_volatile_headers_still_emits():
|
||||
"""If every header is in the volatile set we still want a fingerprint,
|
||||
just with empty order — header count alone is still a signal."""
|
||||
row = _log_row({"Content-Length": "10", "Cookie": "a=b"})
|
||||
f = _http_quirks_fingerprint(row, row["fields"]["headers"])
|
||||
assert f is not None
|
||||
assert f["header_count"] == 2
|
||||
assert f["stable_count"] == 0
|
||||
assert f["order"] == []
|
||||
|
||||
|
||||
# ─── end-to-end via _extract_bounty ─────────────────────────────────────────
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_bounty_emits_http_quirks():
|
||||
row = _log_row({
|
||||
"Host": "target", "User-Agent": "curl/8.0", "Accept": "*/*",
|
||||
})
|
||||
repo = AsyncMock()
|
||||
await _extract_bounty(repo, row)
|
||||
|
||||
calls = [
|
||||
c.args[0] for c in repo.add_bounty.call_args_list
|
||||
]
|
||||
# Expect: http_useragent fingerprint + http_quirks fingerprint.
|
||||
fp_types = [
|
||||
c["payload"].get("fingerprint_type")
|
||||
for c in calls
|
||||
if c["bounty_type"] == "fingerprint"
|
||||
]
|
||||
assert "http_useragent" in fp_types
|
||||
assert "http_quirks" in fp_types
|
||||
|
||||
quirks = next(
|
||||
c for c in calls
|
||||
if c["bounty_type"] == "fingerprint"
|
||||
and c["payload"].get("fingerprint_type") == "http_quirks"
|
||||
)
|
||||
assert quirks["payload"]["tool_guess"] == "curl"
|
||||
assert quirks["payload"]["casing_category"] == "title"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_bounty_non_http_skips_quirks():
|
||||
row = _log_row({"Host": "x"}, service="ssh")
|
||||
repo = AsyncMock()
|
||||
await _extract_bounty(repo, row)
|
||||
for call in repo.add_bounty.call_args_list:
|
||||
payload = call.args[0].get("payload") or {}
|
||||
assert payload.get("fingerprint_type") != "http_quirks"
|
||||
|
||||
|
||||
# ─── hash stability across restarts ─────────────────────────────────────────
|
||||
|
||||
def test_short_hash_deterministic():
|
||||
assert _short_hash("abc") == _short_hash("abc")
|
||||
assert _short_hash("abc") != _short_hash("def")
|
||||
assert len(_short_hash("anything")) == 16
|
||||
Reference in New Issue
Block a user