From 2c876b4d86033e15401cdb55d00b8ae3f5cbc5d9 Mon Sep 17 00:00:00 2001 From: anti Date: Fri, 24 Apr 2026 17:58:54 -0400 Subject: [PATCH] fix(bounties): strip per-request fields from fingerprint payloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit add_bounty dedups on (attacker_ip, bounty_type, full payload JSON). Three fingerprint-family bounties (http_useragent, ip_leak, http_quirks) were including method/path / header_count in their payloads — fields that vary per request — so a scanner hitting 100 paths produced 100 rows instead of 1, which is what was swelling AttackerDetail. Payloads now carry identity-only fields: - http_useragent: {fingerprint_type, value}. UA + path combinations no longer collide; one row per distinct User-Agent string. - ip_leak: {source_ip, real_ip_claim, source_header, headers_seen}. One row per distinct (proxy source, leaked IP, leaking header) triple; repeat hits with the same header on different paths dedup. - http_quirks: {fingerprint_type, order_hash, order, casing_hash, casing_category, stable_count, tool_guess}. No more header_count (included volatile headers; Cookie-presence variance broke dedup). Per-request context (path, method, etc.) was never load-bearing for analysts — the logs table already answers "when + where" at per-event resolution. The bounty table is for stable identity. UI: - FpHttpQuirks renderer drops the method/path footer line and the header_count/duplicates tags; shows stable_count instead. - LEAKED-IPs tooltip on AttackerDetail swaps "X on GET /path" for "Leaked via X; source 203.0.113.42" — same information, stable. Tests add a "payload stable across paths and methods" assertion on http_quirks — locks the contract so a future regression that sneaks a per-request field back in fails loudly. Existing duplicate bounty rows don't retroactively collapse. Dev: `decnet db-reset --i-know-what-im-doing drop-tables` and restart. Prod: one SQL pass to dedup by (attacker_ip, bounty_type, payload) — trivial but not automated. --- decnet/web/ingester.py | 29 +++++++-------- decnet_web/src/components/AttackerDetail.tsx | 16 ++------- tests/web/test_api_attackers.py | 5 +-- tests/web/test_ingester_http_quirks.py | 38 +++++++++++++++----- tests/web/test_ingester_xff.py | 5 ++- 5 files changed, 54 insertions(+), 39 deletions(-) diff --git a/decnet/web/ingester.py b/decnet/web/ingester.py index f40f98b7..7003ef87 100644 --- a/decnet/web/ingester.py +++ b/decnet/web/ingester.py @@ -232,6 +232,11 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non _headers = {} _ua = _headers.get("User-Agent") or _headers.get("user-agent") if _ua: + # Payload must be identity-only (no per-request method/path) — + # add_bounty dedups on (attacker_ip, bounty_type, full payload + # JSON), so including path here would create one row per URL + # the scanner hits. Per-request context belongs in the logs + # table, not the bounty table. await repo.add_bounty({ "decky": log_data.get("decky"), "service": log_data.get("service"), @@ -240,8 +245,6 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non "payload": { "fingerprint_type": "http_useragent", "value": _ua, - "method": _fields.get("method"), - "path": _fields.get("path"), } }) @@ -600,14 +603,15 @@ def _detect_ip_leak( if raw is not None: seen[h] = raw + # Identity-only payload — add_bounty dedups on the full payload + # string, so per-request method/path would create one row per URL + # the attacker hits with the same leaked IP. The bounty represents + # the LEAK itself, not each individual request. return { "source_ip": source_ip, "real_ip_claim": claimed, "source_header": header_name, "headers_seen": seen, - "decky": log_data.get("decky"), - "path": log_data.get("fields", {}).get("path"), - "method": log_data.get("fields", {}).get("method"), } @@ -732,22 +736,19 @@ def _http_quirks_fingerprint( else: dominant = "mixed" - # Duplicate detection: in the dict we got, duplicates would have - # collapsed to one key. But we can still flag if the template - # someday passes a list — future-proofing, no-op today. - duplicates = [n for n in {x for x in names_full if names_full.count(x) > 1}] - + # Identity-only payload — every field must be stable for two + # requests from the same client stack. add_bounty dedups on the + # full payload JSON, so a per-request-varying key (path, method, + # header_count when Cookie presence varies) would spawn one row + # per request. The hashes ARE the identity; per-request context + # lives in the logs table. return { "fingerprint_type": "http_quirks", "order_hash": order_hash, "order": names_stable, "casing_hash": casing_hash, "casing_category": dominant, - "header_count": len(names_full), "stable_count": len(names_stable), "tool_guess": _guess_tool_from_order(lowered), - "duplicates": duplicates or None, - "method": log_data.get("fields", {}).get("method"), - "path": log_data.get("fields", {}).get("path"), } diff --git a/decnet_web/src/components/AttackerDetail.tsx b/decnet_web/src/components/AttackerDetail.tsx index beb419e7..7d0c47d4 100644 --- a/decnet_web/src/components/AttackerDetail.tsx +++ b/decnet_web/src/components/AttackerDetail.tsx @@ -353,11 +353,8 @@ const FpHttpQuirks: React.FC<{ p: any }> = ({ p }) => { {p.casing_category && ( CASE · {String(p.casing_category).toUpperCase()} )} - {typeof p.header_count === 'number' && ( - {p.header_count} HEADERS - )} - {p.duplicates && ( - DUPLICATES + {typeof p.stable_count === 'number' && ( + {p.stable_count} STABLE HEADERS )} {order.length > 0 && ( @@ -372,11 +369,6 @@ const FpHttpQuirks: React.FC<{ p: any }> = ({ p }) => { )} - {(p.method || p.path) && ( -
- {p.method} {p.path} -
- )} ); }; @@ -1101,9 +1093,7 @@ const AttackerDetail: React.FC = () => { (l) => l.payload?.real_ip_claim === ip, ); const tooltip = latest - ? `${latest.payload.source_header ?? '?'} on ${ - latest.payload.method ?? '?' - } ${latest.payload.path ?? '/'}` + ? `Leaked via ${latest.payload.source_header ?? '?'}; source ${latest.payload.source_ip ?? '?'}` : ''; return (