Files
DECNET/tests/web/test_ingester_xff.py
anti 6d1d69443a fix(xff): split leak from spoof — loopback/private claims aren't leaks
An attacker hitting /admin with `X-Forwarded-For: 127.0.0.1` was
previously flagged as an IP leak. It isn't — that's the classic
IP-allowlist / WAF-bypass payload ("treat me as localhost and skip
your auth checks"). Misclassifying it as "LEAKED IPs" in the UI
confuses analysts and burns trust in the signal.

Split by claim category. After pulling the left-most claimed IP
from the proxy header, classify:

- public (routable) → bounty_type=ip_leak (real attribution leak;
  the attacker's upstream proxy forwarded their real IP).
- loopback / private / link-local / multicast / reserved /
  unspecified → bounty_type=fingerprint, fingerprint_type=
  spoofed_source (WAF-bypass / allowlist-probing attempt; the
  attacker is telling us they know what XFF does).
- unparseable → dropped.

Same extraction pipeline; diverges only at the last step. A new
shared _classify_proxy_header_claim returns (kind, payload);
_detect_ip_leak keeps its public-only contract for backward-
compat; _detect_spoofed_source is the new sibling.

UI renderer FpSpoofedSource shows the claimed IP in warn color with
the claim_category tag (LOOPBACK / PRIVATE / ...) and a WAF-BYPASS
ATTEMPT badge — distinct visual from the "LEAKED IPs" row which
stays reserved for genuine public-IP leaks.

Test addresses updated: RFC 5737 doc ranges (198.51.100.0/24,
203.0.113.0/24) are flagged `is_reserved` in Python's ipaddress
module, so they now correctly belong to the spoof bucket — tests
that meant to exercise real public IPs now use 8.8.8.8 / 1.1.1.1 /
Cloudflare DNS. Added eleven new tests locking the classifier +
the two detectors' mutual exclusion.
2026-04-24 18:06:29 -04:00

303 lines
10 KiB
Python

"""XFF / proxy-family mismatch detection in the ingester's bounty extractor."""
from __future__ import annotations
from unittest.mock import AsyncMock
import pytest
from decnet.web.ingester import (
_categorize_claimed_ip,
_detect_ip_leak,
_detect_spoofed_source,
_extract_bounty,
)
def _log_row(
headers: dict[str, str] | None = None,
*,
source_ip: str = "8.8.8.8",
service: str = "http",
event_type: str = "request",
) -> dict:
return {
"decky": "http-01",
"service": service,
"attacker_ip": source_ip,
"event_type": event_type,
"fields": {
"method": "GET",
"path": "/wp-admin/",
"headers": headers or {},
},
}
# ─── pure detector ──────────────────────────────────────────────────────────
def test_xff_leftmost_differs_from_source_emits_leak():
row = _log_row({
"X-Forwarded-For": "1.1.1.1, 10.0.0.1",
})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["source_ip"] == "8.8.8.8"
assert result["real_ip_claim"] == "1.1.1.1"
assert result["source_header"] == "X-Forwarded-For"
# Identity-only payload — method/path intentionally omitted so the
# bounty dedup collapses repeat hits from the same attacker.
assert "method" not in result
assert "path" not in result
def test_xff_matches_source_no_leak():
row = _log_row({"X-Forwarded-For": "8.8.8.8"})
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_xff_loopback_is_not_a_leak():
"""curl -H 'X-Forwarded-For: 127.0.0.1' is the classic WAF-bypass
payload. Must not be classified as an attribution leak — loopback
is not a routable IP anyone could actually have as their real
address."""
row = _log_row({"X-Forwarded-For": "127.0.0.1"})
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_xff_rfc1918_is_not_a_leak():
"""RFC1918 private addresses are forgery attempts, not leaks."""
for ip in ("10.0.0.1", "172.16.0.1", "192.168.1.1"):
row = _log_row({"X-Forwarded-For": ip})
assert _detect_ip_leak(row, row["fields"]["headers"]) is None, ip
def test_xff_link_local_is_not_a_leak():
row = _log_row({"X-Forwarded-For": "169.254.1.1"})
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_forwarded_header_rfc7239_parsed():
row = _log_row({"Forwarded": "for=1.2.3.4;by=5.6.7.8"})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["real_ip_claim"] == "1.2.3.4"
assert result["source_header"] == "Forwarded"
def test_forwarded_with_ipv6_and_port():
row = _log_row({"Forwarded": 'for="[2606:4700:4700::1111]:4711"'})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["real_ip_claim"] == "2606:4700:4700::1111"
def test_x_real_ip_fallback():
row = _log_row({"X-Real-IP": "1.1.1.1"})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["source_header"] == "X-Real-IP"
assert result["real_ip_claim"] == "1.1.1.1"
def test_cf_connecting_ip_variant():
row = _log_row({"CF-Connecting-IP": "1.0.0.1"})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["source_header"] == "CF-Connecting-IP"
assert result["real_ip_claim"] == "1.0.0.1"
def test_priority_forwarded_over_xff():
row = _log_row({
"Forwarded": "for=1.1.1.1",
"X-Forwarded-For": "2.2.2.2",
"X-Real-IP": "3.3.3.3",
})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["source_header"] == "Forwarded"
assert result["real_ip_claim"] == "1.1.1.1"
# All proxy headers preserved in metadata.
assert "X-Forwarded-For" in result["headers_seen"]
assert "X-Real-IP" in result["headers_seen"]
def test_case_insensitive_header_match():
row = _log_row({"x-forwarded-for": "1.1.1.1"})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["real_ip_claim"] == "1.1.1.1"
def test_trusted_proxy_source_skipped(monkeypatch):
monkeypatch.setenv("DECNET_TRUSTED_PROXIES", "8.8.8.8")
row = _log_row({"X-Forwarded-For": "1.1.1.1"})
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_trusted_proxy_cidr(monkeypatch):
monkeypatch.setenv("DECNET_TRUSTED_PROXIES", "8.8.8.0/24")
row = _log_row({"X-Forwarded-For": "1.1.1.1"})
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_malformed_xff_falls_through_to_next_parseable():
row = _log_row({"X-Forwarded-For": "garbage, 1.1.1.1, not-ip"})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["real_ip_claim"] == "1.1.1.1"
def test_all_values_unparseable_bails():
row = _log_row({"X-Forwarded-For": "not-ip, still-not-ip"})
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_no_headers_skipped():
row = _log_row({})
assert _detect_ip_leak(row, {}) is None
def test_non_http_service_skipped():
row = _log_row(
{"X-Forwarded-For": "1.1.1.1"},
service="ssh",
)
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_missing_attacker_ip_bails():
row = _log_row({"X-Forwarded-For": "1.1.1.1"}, source_ip="")
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
# ─── end-to-end via _extract_bounty ─────────────────────────────────────────
@pytest.mark.asyncio
async def test_extract_bounty_emits_ip_leak_row():
row = _log_row({
"X-Forwarded-For": "1.1.1.1",
"User-Agent": "curl/7.81.0",
})
repo = AsyncMock()
await _extract_bounty(repo, row)
# Expect two bounty calls — User-Agent fingerprint + ip_leak.
types = [
call.args[0]["bounty_type"]
for call in repo.add_bounty.call_args_list
]
assert "fingerprint" in types
assert "ip_leak" in types
leak_call = next(
c for c in repo.add_bounty.call_args_list
if c.args[0]["bounty_type"] == "ip_leak"
)
payload = leak_call.args[0]["payload"]
assert payload["real_ip_claim"] == "1.1.1.1"
assert payload["source_ip"] == "8.8.8.8"
@pytest.mark.asyncio
async def test_extract_bounty_no_leak_no_call():
row = _log_row({"X-Forwarded-For": "8.8.8.8"}) # matches source
repo = AsyncMock()
await _extract_bounty(repo, row)
types = [
call.args[0]["bounty_type"]
for call in repo.add_bounty.call_args_list
]
assert "ip_leak" not in types
# ─── spoofed-source (non-routable claim) classification ─────────────────────
def test_categorize_public():
assert _categorize_claimed_ip("8.8.8.8") == "public"
assert _categorize_claimed_ip("2606:4700:4700::1111") == "public"
def test_categorize_loopback():
assert _categorize_claimed_ip("127.0.0.1") == "loopback"
assert _categorize_claimed_ip("::1") == "loopback"
def test_categorize_private():
for ip in ("10.0.0.1", "172.16.0.1", "192.168.1.1"):
assert _categorize_claimed_ip(ip) == "private", ip
def test_categorize_link_local():
assert _categorize_claimed_ip("169.254.1.1") == "link_local"
assert _categorize_claimed_ip("fe80::1") == "link_local"
def test_categorize_multicast_and_reserved():
assert _categorize_claimed_ip("224.0.0.1") == "multicast"
# 240.0.0.1 is reserved (class E).
assert _categorize_claimed_ip("240.0.0.1") == "reserved"
def test_categorize_unparseable():
assert _categorize_claimed_ip("not-an-ip") == "unparseable"
assert _categorize_claimed_ip("") == "unparseable"
def test_spoofed_source_fires_on_loopback_waf_bypass():
"""The original motivating case: curl -H 'X-Forwarded-For: 127.0.0.1'
must produce a spoofed_source fingerprint, NOT an ip_leak."""
row = _log_row({"X-Forwarded-For": "127.0.0.1"})
result = _detect_spoofed_source(row, row["fields"]["headers"])
assert result is not None
assert result["fingerprint_type"] == "spoofed_source"
assert result["claim_category"] == "loopback"
assert result["claimed_ip"] == "127.0.0.1"
assert result["source_ip"] == "8.8.8.8"
def test_spoofed_source_fires_on_rfc1918():
row = _log_row({"X-Forwarded-For": "10.0.0.5"})
result = _detect_spoofed_source(row, row["fields"]["headers"])
assert result is not None
assert result["claim_category"] == "private"
def test_spoofed_source_skipped_on_public_claim():
"""A public-IP claim is a leak, not a spoof — the two detectors
are mutually exclusive."""
row = _log_row({"X-Forwarded-For": "1.1.1.1"})
assert _detect_spoofed_source(row, row["fields"]["headers"]) is None
def test_spoofed_source_skipped_when_matches_source():
row = _log_row({"X-Forwarded-For": "8.8.8.8"})
assert _detect_spoofed_source(row, row["fields"]["headers"]) is None
def test_spoofed_source_respects_trusted_proxy(monkeypatch):
monkeypatch.setenv("DECNET_TRUSTED_PROXIES", "8.8.8.8")
row = _log_row({"X-Forwarded-For": "127.0.0.1"})
assert _detect_spoofed_source(row, row["fields"]["headers"]) is None
@pytest.mark.asyncio
async def test_extract_bounty_emits_spoofed_source_fingerprint():
row = _log_row({"X-Forwarded-For": "127.0.0.1"})
repo = AsyncMock()
await _extract_bounty(repo, row)
calls = [c.args[0] for c in repo.add_bounty.call_args_list]
# ip_leak must NOT fire for the loopback case.
assert all(c["bounty_type"] != "ip_leak" for c in calls)
# A fingerprint with fingerprint_type=spoofed_source should fire.
spoof = next(
(c for c in calls
if c["bounty_type"] == "fingerprint"
and c["payload"].get("fingerprint_type") == "spoofed_source"),
None,
)
assert spoof is not None
assert spoof["payload"]["claim_category"] == "loopback"