add_bounty dedups on (attacker_ip, bounty_type, full payload JSON).
Three fingerprint-family bounties (http_useragent, ip_leak,
http_quirks) were including method/path / header_count in their
payloads — fields that vary per request — so a scanner hitting 100
paths produced 100 rows instead of 1, which is what was swelling
AttackerDetail.
Payloads now carry identity-only fields:
- http_useragent: {fingerprint_type, value}. UA + path combinations
no longer collide; one row per distinct User-Agent string.
- ip_leak: {source_ip, real_ip_claim, source_header, headers_seen}.
One row per distinct (proxy source, leaked IP, leaking header)
triple; repeat hits with the same header on different paths dedup.
- http_quirks: {fingerprint_type, order_hash, order, casing_hash,
casing_category, stable_count, tool_guess}. No more header_count
(included volatile headers; Cookie-presence variance broke dedup).
Per-request context (path, method, etc.) was never load-bearing for
analysts — the logs table already answers "when + where" at
per-event resolution. The bounty table is for stable identity.
UI:
- FpHttpQuirks renderer drops the method/path footer line and the
header_count/duplicates tags; shows stable_count instead.
- LEAKED-IPs tooltip on AttackerDetail swaps "X on GET /path" for
"Leaked via X; source 203.0.113.42" — same information, stable.
Tests add a "payload stable across paths and methods" assertion on
http_quirks — locks the contract so a future regression that sneaks
a per-request field back in fails loudly.
Existing duplicate bounty rows don't retroactively collapse.
Dev: `decnet db-reset --i-know-what-im-doing drop-tables` and
restart. Prod: one SQL pass to dedup by (attacker_ip, bounty_type,
payload) — trivial but not automated.
187 lines
6.0 KiB
Python
187 lines
6.0 KiB
Python
"""XFF / proxy-family mismatch detection in the ingester's bounty extractor."""
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
|
|
from decnet.web.ingester import _detect_ip_leak, _extract_bounty
|
|
|
|
|
|
def _log_row(
|
|
headers: dict[str, str] | None = None,
|
|
*,
|
|
source_ip: str = "203.0.113.42",
|
|
service: str = "http",
|
|
event_type: str = "request",
|
|
) -> dict:
|
|
return {
|
|
"decky": "http-01",
|
|
"service": service,
|
|
"attacker_ip": source_ip,
|
|
"event_type": event_type,
|
|
"fields": {
|
|
"method": "GET",
|
|
"path": "/wp-admin/",
|
|
"headers": headers or {},
|
|
},
|
|
}
|
|
|
|
|
|
# ─── pure detector ──────────────────────────────────────────────────────────
|
|
|
|
def test_xff_leftmost_differs_from_source_emits_leak():
|
|
row = _log_row({
|
|
"X-Forwarded-For": "198.51.100.7, 10.0.0.1",
|
|
})
|
|
result = _detect_ip_leak(row, row["fields"]["headers"])
|
|
assert result is not None
|
|
assert result["source_ip"] == "203.0.113.42"
|
|
assert result["real_ip_claim"] == "198.51.100.7"
|
|
assert result["source_header"] == "X-Forwarded-For"
|
|
# Identity-only payload — method/path intentionally omitted so the
|
|
# bounty dedup collapses repeat hits from the same attacker.
|
|
assert "method" not in result
|
|
assert "path" not in result
|
|
|
|
|
|
def test_xff_matches_source_no_leak():
|
|
row = _log_row({"X-Forwarded-For": "203.0.113.42"})
|
|
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
|
|
|
|
|
def test_forwarded_header_rfc7239_parsed():
|
|
row = _log_row({"Forwarded": "for=1.2.3.4;by=5.6.7.8"})
|
|
result = _detect_ip_leak(row, row["fields"]["headers"])
|
|
assert result is not None
|
|
assert result["real_ip_claim"] == "1.2.3.4"
|
|
assert result["source_header"] == "Forwarded"
|
|
|
|
|
|
def test_forwarded_with_ipv6_and_port():
|
|
row = _log_row({"Forwarded": 'for="[2001:db8::1]:4711"'})
|
|
result = _detect_ip_leak(row, row["fields"]["headers"])
|
|
assert result is not None
|
|
assert result["real_ip_claim"] == "2001:db8::1"
|
|
|
|
|
|
def test_x_real_ip_fallback():
|
|
row = _log_row({"X-Real-IP": "198.51.100.7"})
|
|
result = _detect_ip_leak(row, row["fields"]["headers"])
|
|
assert result is not None
|
|
assert result["source_header"] == "X-Real-IP"
|
|
assert result["real_ip_claim"] == "198.51.100.7"
|
|
|
|
|
|
def test_cf_connecting_ip_variant():
|
|
row = _log_row({"CF-Connecting-IP": "198.51.100.9"})
|
|
result = _detect_ip_leak(row, row["fields"]["headers"])
|
|
assert result is not None
|
|
assert result["source_header"] == "CF-Connecting-IP"
|
|
assert result["real_ip_claim"] == "198.51.100.9"
|
|
|
|
|
|
def test_priority_forwarded_over_xff():
|
|
row = _log_row({
|
|
"Forwarded": "for=1.1.1.1",
|
|
"X-Forwarded-For": "2.2.2.2",
|
|
"X-Real-IP": "3.3.3.3",
|
|
})
|
|
result = _detect_ip_leak(row, row["fields"]["headers"])
|
|
assert result is not None
|
|
assert result["source_header"] == "Forwarded"
|
|
assert result["real_ip_claim"] == "1.1.1.1"
|
|
# All proxy headers preserved in metadata.
|
|
assert "X-Forwarded-For" in result["headers_seen"]
|
|
assert "X-Real-IP" in result["headers_seen"]
|
|
|
|
|
|
def test_case_insensitive_header_match():
|
|
row = _log_row({"x-forwarded-for": "198.51.100.7"})
|
|
result = _detect_ip_leak(row, row["fields"]["headers"])
|
|
assert result is not None
|
|
assert result["real_ip_claim"] == "198.51.100.7"
|
|
|
|
|
|
def test_trusted_proxy_source_skipped(monkeypatch):
|
|
monkeypatch.setenv("DECNET_TRUSTED_PROXIES", "203.0.113.42")
|
|
row = _log_row({"X-Forwarded-For": "198.51.100.7"})
|
|
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
|
|
|
|
|
def test_trusted_proxy_cidr(monkeypatch):
|
|
monkeypatch.setenv("DECNET_TRUSTED_PROXIES", "203.0.113.0/24")
|
|
row = _log_row({"X-Forwarded-For": "198.51.100.7"})
|
|
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
|
|
|
|
|
def test_malformed_xff_falls_through_to_next_parseable():
|
|
row = _log_row({"X-Forwarded-For": "garbage, 198.51.100.7, not-ip"})
|
|
result = _detect_ip_leak(row, row["fields"]["headers"])
|
|
assert result is not None
|
|
assert result["real_ip_claim"] == "198.51.100.7"
|
|
|
|
|
|
def test_all_values_unparseable_bails():
|
|
row = _log_row({"X-Forwarded-For": "not-ip, still-not-ip"})
|
|
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
|
|
|
|
|
def test_no_headers_skipped():
|
|
row = _log_row({})
|
|
assert _detect_ip_leak(row, {}) is None
|
|
|
|
|
|
def test_non_http_service_skipped():
|
|
row = _log_row(
|
|
{"X-Forwarded-For": "198.51.100.7"},
|
|
service="ssh",
|
|
)
|
|
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
|
|
|
|
|
def test_missing_attacker_ip_bails():
|
|
row = _log_row({"X-Forwarded-For": "198.51.100.7"}, source_ip="")
|
|
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
|
|
|
|
|
# ─── end-to-end via _extract_bounty ─────────────────────────────────────────
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_bounty_emits_ip_leak_row():
|
|
row = _log_row({
|
|
"X-Forwarded-For": "198.51.100.7",
|
|
"User-Agent": "curl/7.81.0",
|
|
})
|
|
repo = AsyncMock()
|
|
await _extract_bounty(repo, row)
|
|
|
|
# Expect two bounty calls — User-Agent fingerprint + ip_leak.
|
|
types = [
|
|
call.args[0]["bounty_type"]
|
|
for call in repo.add_bounty.call_args_list
|
|
]
|
|
assert "fingerprint" in types
|
|
assert "ip_leak" in types
|
|
|
|
leak_call = next(
|
|
c for c in repo.add_bounty.call_args_list
|
|
if c.args[0]["bounty_type"] == "ip_leak"
|
|
)
|
|
payload = leak_call.args[0]["payload"]
|
|
assert payload["real_ip_claim"] == "198.51.100.7"
|
|
assert payload["source_ip"] == "203.0.113.42"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_bounty_no_leak_no_call():
|
|
row = _log_row({"X-Forwarded-For": "203.0.113.42"}) # matches source
|
|
repo = AsyncMock()
|
|
await _extract_bounty(repo, row)
|
|
|
|
types = [
|
|
call.args[0]["bounty_type"]
|
|
for call in repo.add_bounty.call_args_list
|
|
]
|
|
assert "ip_leak" not in types
|