fix(bounties): strip per-request fields from fingerprint payloads
add_bounty dedups on (attacker_ip, bounty_type, full payload JSON).
Three fingerprint-family bounties (http_useragent, ip_leak,
http_quirks) were including method/path / header_count in their
payloads — fields that vary per request — so a scanner hitting 100
paths produced 100 rows instead of 1, which is what was swelling
AttackerDetail.
Payloads now carry identity-only fields:
- http_useragent: {fingerprint_type, value}. UA + path combinations
no longer collide; one row per distinct User-Agent string.
- ip_leak: {source_ip, real_ip_claim, source_header, headers_seen}.
One row per distinct (proxy source, leaked IP, leaking header)
triple; repeat hits with the same header on different paths dedup.
- http_quirks: {fingerprint_type, order_hash, order, casing_hash,
casing_category, stable_count, tool_guess}. No more header_count
(included volatile headers; Cookie-presence variance broke dedup).
Per-request context (path, method, etc.) was never load-bearing for
analysts — the logs table already answers "when + where" at
per-event resolution. The bounty table is for stable identity.
UI:
- FpHttpQuirks renderer drops the method/path footer line and the
header_count/duplicates tags; shows stable_count instead.
- LEAKED-IPs tooltip on AttackerDetail swaps "X on GET /path" for
"Leaked via X; source 203.0.113.42" — same information, stable.
Tests add a "payload stable across paths and methods" assertion on
http_quirks — locks the contract so a future regression that sneaks
a per-request field back in fails loudly.
Existing duplicate bounty rows don't retroactively collapse.
Dev: `decnet db-reset --i-know-what-im-doing drop-tables` and
restart. Prod: one SQL pass to dedup by (attacker_ip, bounty_type,
payload) — trivial but not automated.
This commit is contained in:
@@ -232,6 +232,11 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non
|
|||||||
_headers = {}
|
_headers = {}
|
||||||
_ua = _headers.get("User-Agent") or _headers.get("user-agent")
|
_ua = _headers.get("User-Agent") or _headers.get("user-agent")
|
||||||
if _ua:
|
if _ua:
|
||||||
|
# Payload must be identity-only (no per-request method/path) —
|
||||||
|
# add_bounty dedups on (attacker_ip, bounty_type, full payload
|
||||||
|
# JSON), so including path here would create one row per URL
|
||||||
|
# the scanner hits. Per-request context belongs in the logs
|
||||||
|
# table, not the bounty table.
|
||||||
await repo.add_bounty({
|
await repo.add_bounty({
|
||||||
"decky": log_data.get("decky"),
|
"decky": log_data.get("decky"),
|
||||||
"service": log_data.get("service"),
|
"service": log_data.get("service"),
|
||||||
@@ -240,8 +245,6 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non
|
|||||||
"payload": {
|
"payload": {
|
||||||
"fingerprint_type": "http_useragent",
|
"fingerprint_type": "http_useragent",
|
||||||
"value": _ua,
|
"value": _ua,
|
||||||
"method": _fields.get("method"),
|
|
||||||
"path": _fields.get("path"),
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -600,14 +603,15 @@ def _detect_ip_leak(
|
|||||||
if raw is not None:
|
if raw is not None:
|
||||||
seen[h] = raw
|
seen[h] = raw
|
||||||
|
|
||||||
|
# Identity-only payload — add_bounty dedups on the full payload
|
||||||
|
# string, so per-request method/path would create one row per URL
|
||||||
|
# the attacker hits with the same leaked IP. The bounty represents
|
||||||
|
# the LEAK itself, not each individual request.
|
||||||
return {
|
return {
|
||||||
"source_ip": source_ip,
|
"source_ip": source_ip,
|
||||||
"real_ip_claim": claimed,
|
"real_ip_claim": claimed,
|
||||||
"source_header": header_name,
|
"source_header": header_name,
|
||||||
"headers_seen": seen,
|
"headers_seen": seen,
|
||||||
"decky": log_data.get("decky"),
|
|
||||||
"path": log_data.get("fields", {}).get("path"),
|
|
||||||
"method": log_data.get("fields", {}).get("method"),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -732,22 +736,19 @@ def _http_quirks_fingerprint(
|
|||||||
else:
|
else:
|
||||||
dominant = "mixed"
|
dominant = "mixed"
|
||||||
|
|
||||||
# Duplicate detection: in the dict we got, duplicates would have
|
# Identity-only payload — every field must be stable for two
|
||||||
# collapsed to one key. But we can still flag if the template
|
# requests from the same client stack. add_bounty dedups on the
|
||||||
# someday passes a list — future-proofing, no-op today.
|
# full payload JSON, so a per-request-varying key (path, method,
|
||||||
duplicates = [n for n in {x for x in names_full if names_full.count(x) > 1}]
|
# header_count when Cookie presence varies) would spawn one row
|
||||||
|
# per request. The hashes ARE the identity; per-request context
|
||||||
|
# lives in the logs table.
|
||||||
return {
|
return {
|
||||||
"fingerprint_type": "http_quirks",
|
"fingerprint_type": "http_quirks",
|
||||||
"order_hash": order_hash,
|
"order_hash": order_hash,
|
||||||
"order": names_stable,
|
"order": names_stable,
|
||||||
"casing_hash": casing_hash,
|
"casing_hash": casing_hash,
|
||||||
"casing_category": dominant,
|
"casing_category": dominant,
|
||||||
"header_count": len(names_full),
|
|
||||||
"stable_count": len(names_stable),
|
"stable_count": len(names_stable),
|
||||||
"tool_guess": _guess_tool_from_order(lowered),
|
"tool_guess": _guess_tool_from_order(lowered),
|
||||||
"duplicates": duplicates or None,
|
|
||||||
"method": log_data.get("fields", {}).get("method"),
|
|
||||||
"path": log_data.get("fields", {}).get("path"),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -353,11 +353,8 @@ const FpHttpQuirks: React.FC<{ p: any }> = ({ p }) => {
|
|||||||
{p.casing_category && (
|
{p.casing_category && (
|
||||||
<Tag>CASE · {String(p.casing_category).toUpperCase()}</Tag>
|
<Tag>CASE · {String(p.casing_category).toUpperCase()}</Tag>
|
||||||
)}
|
)}
|
||||||
{typeof p.header_count === 'number' && (
|
{typeof p.stable_count === 'number' && (
|
||||||
<Tag>{p.header_count} HEADERS</Tag>
|
<Tag>{p.stable_count} STABLE HEADERS</Tag>
|
||||||
)}
|
|
||||||
{p.duplicates && (
|
|
||||||
<Tag color="var(--warn, #e0a040)">DUPLICATES</Tag>
|
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
{order.length > 0 && (
|
{order.length > 0 && (
|
||||||
@@ -372,11 +369,6 @@ const FpHttpQuirks: React.FC<{ p: any }> = ({ p }) => {
|
|||||||
</div>
|
</div>
|
||||||
</details>
|
</details>
|
||||||
)}
|
)}
|
||||||
{(p.method || p.path) && (
|
|
||||||
<div className="dim" style={{ fontSize: '0.7rem', fontFamily: 'monospace', marginTop: '2px' }}>
|
|
||||||
{p.method} {p.path}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
@@ -1101,9 +1093,7 @@ const AttackerDetail: React.FC = () => {
|
|||||||
(l) => l.payload?.real_ip_claim === ip,
|
(l) => l.payload?.real_ip_claim === ip,
|
||||||
);
|
);
|
||||||
const tooltip = latest
|
const tooltip = latest
|
||||||
? `${latest.payload.source_header ?? '?'} on ${
|
? `Leaked via ${latest.payload.source_header ?? '?'}; source ${latest.payload.source_ip ?? '?'}`
|
||||||
latest.payload.method ?? '?'
|
|
||||||
} ${latest.payload.path ?? '/'}`
|
|
||||||
: '';
|
: '';
|
||||||
return (
|
return (
|
||||||
<span
|
<span
|
||||||
|
|||||||
@@ -268,8 +268,9 @@ class TestGetAttackerDetail:
|
|||||||
"source_ip": "203.0.113.42",
|
"source_ip": "203.0.113.42",
|
||||||
"real_ip_claim": "198.51.100.7",
|
"real_ip_claim": "198.51.100.7",
|
||||||
"source_header": "X-Forwarded-For",
|
"source_header": "X-Forwarded-For",
|
||||||
"path": "/wp-admin/",
|
"headers_seen": {
|
||||||
"method": "GET",
|
"X-Forwarded-For": "198.51.100.7",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -80,7 +80,9 @@ def test_different_casing_different_hash():
|
|||||||
|
|
||||||
def test_volatile_headers_excluded_from_hash():
|
def test_volatile_headers_excluded_from_hash():
|
||||||
"""Content-Length, Cookie, XFF etc. are per-request; the identity
|
"""Content-Length, Cookie, XFF etc. are per-request; the identity
|
||||||
hash shouldn't depend on them."""
|
hash must not depend on them, otherwise two requests from the same
|
||||||
|
stack — one with Cookie, one without — would dedup-miss at the
|
||||||
|
bounty layer and spam the AttackerDetail page."""
|
||||||
row_a = _log_row({
|
row_a = _log_row({
|
||||||
"Host": "x", "User-Agent": "a", "Content-Length": "100",
|
"Host": "x", "User-Agent": "a", "Content-Length": "100",
|
||||||
})
|
})
|
||||||
@@ -90,13 +92,11 @@ def test_volatile_headers_excluded_from_hash():
|
|||||||
})
|
})
|
||||||
fa = _http_quirks_fingerprint(row_a, row_a["fields"]["headers"])
|
fa = _http_quirks_fingerprint(row_a, row_a["fields"]["headers"])
|
||||||
fb = _http_quirks_fingerprint(row_b, row_b["fields"]["headers"])
|
fb = _http_quirks_fingerprint(row_b, row_b["fields"]["headers"])
|
||||||
assert fa["order_hash"] == fb["order_hash"]
|
# Whole payload must be identical — add_bounty dedups on the full
|
||||||
# Count reflects ALL headers (the volatile ones WERE there).
|
# serialized payload, so ANY per-request-varying field would spawn
|
||||||
assert fa["header_count"] == 3
|
# new rows. This assertion is the contract.
|
||||||
assert fb["header_count"] == 4
|
assert fa == fb
|
||||||
# Stable count excludes the volatile ones.
|
|
||||||
assert fa["stable_count"] == 2
|
assert fa["stable_count"] == 2
|
||||||
assert fb["stable_count"] == 2
|
|
||||||
|
|
||||||
|
|
||||||
# ─── tool guesses ──────────────────────────────────────────────────────────
|
# ─── tool guesses ──────────────────────────────────────────────────────────
|
||||||
@@ -148,11 +148,10 @@ def test_empty_headers_skipped():
|
|||||||
|
|
||||||
def test_only_volatile_headers_still_emits():
|
def test_only_volatile_headers_still_emits():
|
||||||
"""If every header is in the volatile set we still want a fingerprint,
|
"""If every header is in the volatile set we still want a fingerprint,
|
||||||
just with empty order — header count alone is still a signal."""
|
just with empty order — "zero stable headers" is itself a signal."""
|
||||||
row = _log_row({"Content-Length": "10", "Cookie": "a=b"})
|
row = _log_row({"Content-Length": "10", "Cookie": "a=b"})
|
||||||
f = _http_quirks_fingerprint(row, row["fields"]["headers"])
|
f = _http_quirks_fingerprint(row, row["fields"]["headers"])
|
||||||
assert f is not None
|
assert f is not None
|
||||||
assert f["header_count"] == 2
|
|
||||||
assert f["stable_count"] == 0
|
assert f["stable_count"] == 0
|
||||||
assert f["order"] == []
|
assert f["order"] == []
|
||||||
|
|
||||||
@@ -198,6 +197,27 @@ async def test_extract_bounty_non_http_skips_quirks():
|
|||||||
assert payload.get("fingerprint_type") != "http_quirks"
|
assert payload.get("fingerprint_type") != "http_quirks"
|
||||||
|
|
||||||
|
|
||||||
|
def test_payload_stable_across_paths_and_methods():
|
||||||
|
"""Two requests from the same stack hitting different paths/methods
|
||||||
|
must produce byte-identical payloads so (ip, type, payload) dedup
|
||||||
|
collapses them into one bounty row. If this test breaks, check
|
||||||
|
whether a per-request field snuck back into _http_quirks_fingerprint."""
|
||||||
|
headers = {"Host": "target", "User-Agent": "curl/8.0", "Accept": "*/*"}
|
||||||
|
row_get = {
|
||||||
|
"decky": "http-01", "service": "http", "attacker_ip": "1.2.3.4",
|
||||||
|
"event_type": "request",
|
||||||
|
"fields": {"method": "GET", "path": "/admin", "headers": headers},
|
||||||
|
}
|
||||||
|
row_post = {
|
||||||
|
"decky": "http-01", "service": "http", "attacker_ip": "1.2.3.4",
|
||||||
|
"event_type": "request",
|
||||||
|
"fields": {"method": "POST", "path": "/wp-login.php", "headers": headers},
|
||||||
|
}
|
||||||
|
fa = _http_quirks_fingerprint(row_get, headers)
|
||||||
|
fb = _http_quirks_fingerprint(row_post, headers)
|
||||||
|
assert fa == fb, "payload must not depend on request method/path"
|
||||||
|
|
||||||
|
|
||||||
# ─── hash stability across restarts ─────────────────────────────────────────
|
# ─── hash stability across restarts ─────────────────────────────────────────
|
||||||
|
|
||||||
def test_short_hash_deterministic():
|
def test_short_hash_deterministic():
|
||||||
|
|||||||
@@ -39,7 +39,10 @@ def test_xff_leftmost_differs_from_source_emits_leak():
|
|||||||
assert result["source_ip"] == "203.0.113.42"
|
assert result["source_ip"] == "203.0.113.42"
|
||||||
assert result["real_ip_claim"] == "198.51.100.7"
|
assert result["real_ip_claim"] == "198.51.100.7"
|
||||||
assert result["source_header"] == "X-Forwarded-For"
|
assert result["source_header"] == "X-Forwarded-For"
|
||||||
assert result["path"] == "/wp-admin/"
|
# Identity-only payload — method/path intentionally omitted so the
|
||||||
|
# bounty dedup collapses repeat hits from the same attacker.
|
||||||
|
assert "method" not in result
|
||||||
|
assert "path" not in result
|
||||||
|
|
||||||
|
|
||||||
def test_xff_matches_source_no_leak():
|
def test_xff_matches_source_no_leak():
|
||||||
|
|||||||
Reference in New Issue
Block a user