feat(http): header-quirks fingerprint — order + casing + tool guess

Per-request HTTP fingerprint derived from the header dict we already
log. Captures:

- order_hash: SHA-256 prefix (16 hex) over the lowercased header-name
  sequence, minus volatile/per-request headers (Content-Length,
  Cookie, Authorization, XFF family, trace IDs). Stable identity for
  a given client stack regardless of which target / path is hit.
- casing_hash: same shape but over the per-header casing category
  (Title-Case / lower / UPPER / mixed). Attackers frequently spoof
  User-Agent but forget their stack sends `user-agent` while browsers
  send `User-Agent`.
- tool_guess: prefix match against curl / python-requests /
  Go-http-client / nmap-nse signatures. Cheap, best-effort — the
  hash is the hard signal.
- duplicates: reserved for when the HTTP template switches from
  dict(request.headers) to a list form; today it always fires empty
  because dict() collapses duplicates.

Payload is a fingerprint bounty (bounty_type="fingerprint",
fingerprint_type="http_quirks"). Bounty dedup collapses identical
hashes per attacker — one row per distinct fingerprint — so a chatty
scanner doesn't spam the vault, but a tool-chain change from the
same IP surfaces as a new row.

UI renderer (FpHttpQuirks) shows the two hashes, tool guess badge in
violet, casing/count tags, and a collapsible header-order list.
Added to the passiveTypes group so it nests with JA3/JA4L/etc. in
the AttackerDetail fingerprints panel.

One library note: the naive "title-case" classifier failed on tokens
like `X-Forwarded-For` because Python's "".islower() returns False
so `p[1:].islower()` rejects single-letter tokens like the `X`.
Fix: explicitly accept single-char tokens when uppercase.
This commit is contained in:
2026-04-24 17:51:40 -04:00
parent 2a0c5ca410
commit dccb410bb3
3 changed files with 409 additions and 1 deletions

View File

@@ -261,6 +261,23 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non
"payload": _leak, "payload": _leak,
}) })
# 2c. HTTP header quirks — order + casing fingerprint per request.
# Real HTTP clients have distinctive header orderings and casing
# patterns (curl vs python-requests vs Go-http-client vs nmap vs
# browsers all differ). Attackers routinely spoof User-Agent but
# forget to match the stack's native header order. Bounty dedup
# collapses repeat fingerprints from the same attacker, so this
# fires once per distinct hash per source.
_quirks = _http_quirks_fingerprint(log_data, _headers)
if _quirks is not None:
await repo.add_bounty({
"decky": log_data.get("decky"),
"service": log_data.get("service"),
"attacker_ip": log_data.get("attacker_ip"),
"bounty_type": "fingerprint",
"payload": _quirks,
})
# 3. VNC client version fingerprint # 3. VNC client version fingerprint
_vnc_ver = _fields.get("client_version") _vnc_ver = _fields.get("client_version")
if _vnc_ver and log_data.get("event_type") == "version": if _vnc_ver and log_data.get("event_type") == "version":
@@ -593,3 +610,144 @@ def _detect_ip_leak(
"method": log_data.get("fields", {}).get("method"), "method": log_data.get("fields", {}).get("method"),
} }
# ─── HTTP header quirks fingerprint ─────────────────────────────────────────
# Headers that vary with per-request content (payload-body size, cookies
# set by prior responses) and therefore aren't useful identity. Stripped
# before hashing so a tool's order fingerprint is stable across different
# targets/sessions.
_VOLATILE_HEADERS = frozenset({
"content-length",
"cookie",
"authorization",
"x-forwarded-for", # carries attacker-dependent values
"forwarded",
"x-real-ip",
"true-client-ip",
"cf-connecting-ip",
"x-request-id",
"x-correlation-id",
"x-amzn-trace-id",
})
# Distinctive order signatures for common tools. The match is on the
# lowercased-name list MINUS the volatile set. A prefix match wins —
# many tools tack on "User-Agent / Accept-Encoding / Accept" in the
# same order regardless of method.
_TOOL_SIGNATURES: tuple[tuple[str, tuple[str, ...]], ...] = (
# curl sends: Host, User-Agent, Accept, <body-headers>.
("curl", ("host", "user-agent", "accept")),
# python-requests: User-Agent, Accept-Encoding, Accept, Connection, Host.
("python-requests", ("host", "user-agent", "accept-encoding", "accept", "connection")),
# Go-http-client: Host, User-Agent, Accept-Encoding.
("go-http-client", ("host", "user-agent", "accept-encoding")),
# nmap http-enum / http-* scripts: short, Host+User-Agent ordering.
("nmap-nse", ("host", "user-agent")),
# Nikto / Nuclei send distinctive Accept-Language preferences — treat
# User-Agent check as the secondary signal elsewhere; order alone is
# ambiguous here.
)
def _casing_category(name: str) -> str:
"""Classify a header-name casing pattern.
Real HTTP clients and stacks pick one convention and stick to it:
browsers send `Title-Case`; python-requests sends `Title-Case`;
Go's stdlib canonicalises to `Title-Case`; curl sends literal
`Title-Case`; nmap/masscan often send `lowercase`; custom scanners
sometimes send `UPPERCASE`.
"""
if not name:
return "empty"
if name == name.upper():
return "upper"
if name == name.lower():
return "lower"
# "Title-Case" test: each dash-separated token starts with an
# uppercase; trailing chars (if any) must be lowercase. Single-
# letter tokens like the `X` in `X-Forwarded-For` qualify when
# uppercase — "".islower() is False in Python so the naive form
# of this test misfires.
parts = [p for p in name.split("-") if p]
if parts and all(
p[:1].isupper() and (len(p) == 1 or p[1:].islower())
for p in parts
):
return "title"
return "mixed"
def _short_hash(value: str) -> str:
"""16-hex-char SHA-256 prefix — stable identity, short display."""
import hashlib
return hashlib.sha256(value.encode("utf-8")).hexdigest()[:16]
def _guess_tool_from_order(lowered: list[str]) -> Optional[str]:
"""Return the first matching tool signature, or None."""
for name, sig in _TOOL_SIGNATURES:
if len(lowered) >= len(sig) and tuple(lowered[: len(sig)]) == sig:
return name
return None
def _http_quirks_fingerprint(
log_data: dict[str, Any], headers: dict[str, Any],
) -> Optional[dict[str, Any]]:
"""Build an HTTP request-header quirks fingerprint.
Captures the header-order hash, casing pattern, count, and a
best-effort tool guess. Returns ``None`` for non-HTTP services or
when no usable headers are present. Bounty dedup will collapse
repeat fingerprints from the same attacker.
"""
if log_data.get("service") != "http":
return None
if not isinstance(headers, dict) or not headers:
return None
# Preserve insertion order (Python 3.7+ dict guarantee, and JSON
# round-trip also preserves it). Drop volatile headers for the
# identity hash but keep them in the display order list.
names_full: list[str] = [k for k in headers.keys() if isinstance(k, str)]
if not names_full:
return None
names_stable = [n for n in names_full if n.lower() not in _VOLATILE_HEADERS]
lowered = [n.lower() for n in names_stable]
order_hash = _short_hash("\n".join(lowered))
casing_per_header = [_casing_category(n) for n in names_stable]
casing_hash = _short_hash("\n".join(casing_per_header))
# A single "dominant" casing category — useful for at-a-glance display.
categories = set(casing_per_header)
if not categories:
dominant = "empty"
elif len(categories) == 1:
dominant = next(iter(categories))
else:
dominant = "mixed"
# Duplicate detection: in the dict we got, duplicates would have
# collapsed to one key. But we can still flag if the template
# someday passes a list — future-proofing, no-op today.
duplicates = [n for n in {x for x in names_full if names_full.count(x) > 1}]
return {
"fingerprint_type": "http_quirks",
"order_hash": order_hash,
"order": names_stable,
"casing_hash": casing_hash,
"casing_category": dominant,
"header_count": len(names_full),
"stable_count": len(names_stable),
"tool_guess": _guess_tool_from_order(lowered),
"duplicates": duplicates or None,
"method": log_data.get("fields", {}).get("method"),
"path": log_data.get("fields", {}).get("path"),
}

View File

@@ -92,6 +92,7 @@ const fpTypeLabel: Record<string, string> = {
tls_resumption: 'SESSION RESUMPTION', tls_resumption: 'SESSION RESUMPTION',
tls_certificate: 'CERTIFICATE', tls_certificate: 'CERTIFICATE',
http_useragent: 'HTTP USER-AGENT', http_useragent: 'HTTP USER-AGENT',
http_quirks: 'HTTP HEADER QUIRKS',
vnc_client_version: 'VNC CLIENT', vnc_client_version: 'VNC CLIENT',
jarm: 'JARM', jarm: 'JARM',
hassh_server: 'HASSH SERVER', hassh_server: 'HASSH SERVER',
@@ -104,6 +105,7 @@ const fpTypeIcon: Record<string, React.ReactNode> = {
tls_resumption: <Wifi size={14} />, tls_resumption: <Wifi size={14} />,
tls_certificate: <FileKey size={14} />, tls_certificate: <FileKey size={14} />,
http_useragent: <Shield size={14} />, http_useragent: <Shield size={14} />,
http_quirks: <Fingerprint size={14} />,
vnc_client_version: <Lock size={14} />, vnc_client_version: <Lock size={14} />,
jarm: <Crosshair size={14} />, jarm: <Crosshair size={14} />,
hassh_server: <Lock size={14} />, hassh_server: <Lock size={14} />,
@@ -338,6 +340,47 @@ const FpGeneric: React.FC<{ p: any }> = ({ p }) => (
</div> </div>
); );
const FpHttpQuirks: React.FC<{ p: any }> = ({ p }) => {
const order: string[] = Array.isArray(p.order) ? p.order : [];
return (
<div style={{ display: 'flex', flexDirection: 'column', gap: '6px' }}>
<HashRow label="ORDER HASH" value={p.order_hash} />
<HashRow label="CASING HASH" value={p.casing_hash} />
<div style={{ display: 'flex', gap: '8px', flexWrap: 'wrap' }}>
{p.tool_guess && (
<Tag color="var(--violet)">{String(p.tool_guess).toUpperCase()}</Tag>
)}
{p.casing_category && (
<Tag>CASE · {String(p.casing_category).toUpperCase()}</Tag>
)}
{typeof p.header_count === 'number' && (
<Tag>{p.header_count} HEADERS</Tag>
)}
{p.duplicates && (
<Tag color="var(--warn, #e0a040)">DUPLICATES</Tag>
)}
</div>
{order.length > 0 && (
<details>
<summary className="dim" style={{ fontSize: '0.7rem', cursor: 'pointer', letterSpacing: '1px' }}>
HEADER ORDER
</summary>
<div style={{ display: 'flex', gap: '4px', flexWrap: 'wrap', marginTop: '4px' }}>
{order.map((h, i) => (
<Tag key={`${h}-${i}`}>{h}</Tag>
))}
</div>
</details>
)}
{(p.method || p.path) && (
<div className="dim" style={{ fontSize: '0.7rem', fontFamily: 'monospace', marginTop: '2px' }}>
{p.method} {p.path}
</div>
)}
</div>
);
};
const FingerprintGroup: React.FC<{ fpType: string; items: any[] }> = ({ fpType, items }) => { const FingerprintGroup: React.FC<{ fpType: string; items: any[] }> = ({ fpType, items }) => {
const label = fpTypeLabel[fpType] || fpType.toUpperCase().replace(/_/g, ' '); const label = fpTypeLabel[fpType] || fpType.toUpperCase().replace(/_/g, ' ');
const icon = fpTypeIcon[fpType] || <Fingerprint size={14} />; const icon = fpTypeIcon[fpType] || <Fingerprint size={14} />;
@@ -365,6 +408,7 @@ const FingerprintGroup: React.FC<{ fpType: string; items: any[] }> = ({ fpType,
case 'jarm': return <FpJarm key={i} p={p} />; case 'jarm': return <FpJarm key={i} p={p} />;
case 'hassh_server': return <FpHassh key={i} p={p} />; case 'hassh_server': return <FpHassh key={i} p={p} />;
case 'tcpfp': return <FpTcpStack key={i} p={p} />; case 'tcpfp': return <FpTcpStack key={i} p={p} />;
case 'http_quirks': return <FpHttpQuirks key={i} p={p} />;
default: return <FpGeneric key={i} p={p} />; default: return <FpGeneric key={i} p={p} />;
} }
})} })}
@@ -1245,7 +1289,7 @@ const AttackerDetail: React.FC = () => {
// Active probes first, then passive, then unknown // Active probes first, then passive, then unknown
const activeTypes = ['jarm', 'hassh_server', 'tcpfp']; const activeTypes = ['jarm', 'hassh_server', 'tcpfp'];
const passiveTypes = ['ja3', 'ja4l', 'tls_resumption', 'tls_certificate', 'http_useragent', 'vnc_client_version']; const passiveTypes = ['ja3', 'ja4l', 'tls_resumption', 'tls_certificate', 'http_useragent', 'http_quirks', 'vnc_client_version'];
const knownTypes = [...activeTypes, ...passiveTypes]; const knownTypes = [...activeTypes, ...passiveTypes];
const unknownTypes = Object.keys(groups).filter((t) => !knownTypes.includes(t)); const unknownTypes = Object.keys(groups).filter((t) => !knownTypes.includes(t));

View File

@@ -0,0 +1,206 @@
"""HTTP header-quirks fingerprint extraction in the ingester."""
from __future__ import annotations
from unittest.mock import AsyncMock
import pytest
from decnet.web.ingester import (
_casing_category,
_guess_tool_from_order,
_http_quirks_fingerprint,
_short_hash,
_extract_bounty,
)
def _log_row(headers: dict[str, str], *, service: str = "http") -> dict:
return {
"decky": "http-01",
"service": service,
"attacker_ip": "1.2.3.4",
"event_type": "request",
"fields": {
"method": "GET",
"path": "/",
"headers": headers,
},
}
# ─── casing classifier ─────────────────────────────────────────────────────
def test_casing_title():
assert _casing_category("User-Agent") == "title"
assert _casing_category("Host") == "title"
assert _casing_category("X-Forwarded-For") == "title"
def test_casing_lower():
assert _casing_category("user-agent") == "lower"
assert _casing_category("x-forwarded-for") == "lower"
def test_casing_upper():
assert _casing_category("USER-AGENT") == "upper"
def test_casing_mixed():
assert _casing_category("USer-AgEnt") == "mixed"
# ─── order + casing hash stability ──────────────────────────────────────────
def test_same_order_same_hash():
row_a = _log_row({"Host": "x", "User-Agent": "curl/8", "Accept": "*/*"})
row_b = _log_row({"Host": "y", "User-Agent": "curl/7", "Accept": "*/*"})
fa = _http_quirks_fingerprint(row_a, row_a["fields"]["headers"])
fb = _http_quirks_fingerprint(row_b, row_b["fields"]["headers"])
assert fa["order_hash"] == fb["order_hash"]
assert fa["casing_hash"] == fb["casing_hash"]
def test_different_order_different_hash():
row_a = _log_row({"Host": "x", "User-Agent": "a", "Accept": "*/*"})
row_b = _log_row({"Accept": "*/*", "User-Agent": "a", "Host": "x"})
fa = _http_quirks_fingerprint(row_a, row_a["fields"]["headers"])
fb = _http_quirks_fingerprint(row_b, row_b["fields"]["headers"])
assert fa["order_hash"] != fb["order_hash"]
def test_different_casing_different_hash():
row_a = _log_row({"Host": "x", "User-Agent": "a"})
row_b = _log_row({"host": "x", "user-agent": "a"})
fa = _http_quirks_fingerprint(row_a, row_a["fields"]["headers"])
fb = _http_quirks_fingerprint(row_b, row_b["fields"]["headers"])
assert fa["casing_hash"] != fb["casing_hash"]
assert fa["casing_category"] == "title"
assert fb["casing_category"] == "lower"
def test_volatile_headers_excluded_from_hash():
"""Content-Length, Cookie, XFF etc. are per-request; the identity
hash shouldn't depend on them."""
row_a = _log_row({
"Host": "x", "User-Agent": "a", "Content-Length": "100",
})
row_b = _log_row({
"Host": "x", "User-Agent": "a", "Content-Length": "999",
"Cookie": "session=abc",
})
fa = _http_quirks_fingerprint(row_a, row_a["fields"]["headers"])
fb = _http_quirks_fingerprint(row_b, row_b["fields"]["headers"])
assert fa["order_hash"] == fb["order_hash"]
# Count reflects ALL headers (the volatile ones WERE there).
assert fa["header_count"] == 3
assert fb["header_count"] == 4
# Stable count excludes the volatile ones.
assert fa["stable_count"] == 2
assert fb["stable_count"] == 2
# ─── tool guesses ──────────────────────────────────────────────────────────
def test_curl_signature_guessed():
assert _guess_tool_from_order(["host", "user-agent", "accept"]) == "curl"
def test_python_requests_signature_guessed():
assert _guess_tool_from_order([
"host", "user-agent", "accept-encoding", "accept", "connection",
]) == "python-requests"
def test_go_http_client_signature_guessed():
assert _guess_tool_from_order([
"host", "user-agent", "accept-encoding",
]) == "go-http-client"
def test_nmap_nse_signature_guessed():
# Short order starting with host, user-agent → nmap-nse.
assert _guess_tool_from_order(["host", "user-agent"]) == "nmap-nse"
def test_unknown_tool_returns_none():
assert _guess_tool_from_order(["accept", "host", "user-agent"]) is None
def test_fingerprint_includes_tool_guess_curl():
row = _log_row({
"Host": "target", "User-Agent": "curl/8.0", "Accept": "*/*",
})
f = _http_quirks_fingerprint(row, row["fields"]["headers"])
assert f["tool_guess"] == "curl"
# ─── gating ─────────────────────────────────────────────────────────────────
def test_non_http_service_skipped():
row = _log_row({"Host": "x"}, service="ssh")
assert _http_quirks_fingerprint(row, row["fields"]["headers"]) is None
def test_empty_headers_skipped():
row = _log_row({})
assert _http_quirks_fingerprint(row, {}) is None
def test_only_volatile_headers_still_emits():
"""If every header is in the volatile set we still want a fingerprint,
just with empty order — header count alone is still a signal."""
row = _log_row({"Content-Length": "10", "Cookie": "a=b"})
f = _http_quirks_fingerprint(row, row["fields"]["headers"])
assert f is not None
assert f["header_count"] == 2
assert f["stable_count"] == 0
assert f["order"] == []
# ─── end-to-end via _extract_bounty ─────────────────────────────────────────
@pytest.mark.asyncio
async def test_extract_bounty_emits_http_quirks():
row = _log_row({
"Host": "target", "User-Agent": "curl/8.0", "Accept": "*/*",
})
repo = AsyncMock()
await _extract_bounty(repo, row)
calls = [
c.args[0] for c in repo.add_bounty.call_args_list
]
# Expect: http_useragent fingerprint + http_quirks fingerprint.
fp_types = [
c["payload"].get("fingerprint_type")
for c in calls
if c["bounty_type"] == "fingerprint"
]
assert "http_useragent" in fp_types
assert "http_quirks" in fp_types
quirks = next(
c for c in calls
if c["bounty_type"] == "fingerprint"
and c["payload"].get("fingerprint_type") == "http_quirks"
)
assert quirks["payload"]["tool_guess"] == "curl"
assert quirks["payload"]["casing_category"] == "title"
@pytest.mark.asyncio
async def test_extract_bounty_non_http_skips_quirks():
row = _log_row({"Host": "x"}, service="ssh")
repo = AsyncMock()
await _extract_bounty(repo, row)
for call in repo.add_bounty.call_args_list:
payload = call.args[0].get("payload") or {}
assert payload.get("fingerprint_type") != "http_quirks"
# ─── hash stability across restarts ─────────────────────────────────────────
def test_short_hash_deterministic():
assert _short_hash("abc") == _short_hash("abc")
assert _short_hash("abc") != _short_hash("def")
assert len(_short_hash("anything")) == 16