merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
302
tests/web/test_ingester_xff.py
Normal file
302
tests/web/test_ingester_xff.py
Normal file
@@ -0,0 +1,302 @@
|
||||
"""XFF / proxy-family mismatch detection in the ingester's bounty extractor."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.web.ingester import (
|
||||
_categorize_claimed_ip,
|
||||
_detect_ip_leak,
|
||||
_detect_spoofed_source,
|
||||
_extract_bounty,
|
||||
)
|
||||
|
||||
|
||||
def _log_row(
|
||||
headers: dict[str, str] | None = None,
|
||||
*,
|
||||
source_ip: str = "8.8.8.8",
|
||||
service: str = "http",
|
||||
event_type: str = "request",
|
||||
) -> dict:
|
||||
return {
|
||||
"decky": "http-01",
|
||||
"service": service,
|
||||
"attacker_ip": source_ip,
|
||||
"event_type": event_type,
|
||||
"fields": {
|
||||
"method": "GET",
|
||||
"path": "/wp-admin/",
|
||||
"headers": headers or {},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ─── pure detector ──────────────────────────────────────────────────────────
|
||||
|
||||
def test_xff_leftmost_differs_from_source_emits_leak():
|
||||
row = _log_row({
|
||||
"X-Forwarded-For": "1.1.1.1, 10.0.0.1",
|
||||
})
|
||||
result = _detect_ip_leak(row, row["fields"]["headers"])
|
||||
assert result is not None
|
||||
assert result["source_ip"] == "8.8.8.8"
|
||||
assert result["real_ip_claim"] == "1.1.1.1"
|
||||
assert result["source_header"] == "X-Forwarded-For"
|
||||
# Identity-only payload — method/path intentionally omitted so the
|
||||
# bounty dedup collapses repeat hits from the same attacker.
|
||||
assert "method" not in result
|
||||
assert "path" not in result
|
||||
|
||||
|
||||
def test_xff_matches_source_no_leak():
|
||||
row = _log_row({"X-Forwarded-For": "8.8.8.8"})
|
||||
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
def test_xff_loopback_is_not_a_leak():
|
||||
"""curl -H 'X-Forwarded-For: 127.0.0.1' is the classic WAF-bypass
|
||||
payload. Must not be classified as an attribution leak — loopback
|
||||
is not a routable IP anyone could actually have as their real
|
||||
address."""
|
||||
row = _log_row({"X-Forwarded-For": "127.0.0.1"})
|
||||
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
def test_xff_rfc1918_is_not_a_leak():
|
||||
"""RFC1918 private addresses are forgery attempts, not leaks."""
|
||||
for ip in ("10.0.0.1", "172.16.0.1", "192.168.1.1"):
|
||||
row = _log_row({"X-Forwarded-For": ip})
|
||||
assert _detect_ip_leak(row, row["fields"]["headers"]) is None, ip
|
||||
|
||||
|
||||
def test_xff_link_local_is_not_a_leak():
|
||||
row = _log_row({"X-Forwarded-For": "169.254.1.1"})
|
||||
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
def test_forwarded_header_rfc7239_parsed():
|
||||
row = _log_row({"Forwarded": "for=1.2.3.4;by=5.6.7.8"})
|
||||
result = _detect_ip_leak(row, row["fields"]["headers"])
|
||||
assert result is not None
|
||||
assert result["real_ip_claim"] == "1.2.3.4"
|
||||
assert result["source_header"] == "Forwarded"
|
||||
|
||||
|
||||
def test_forwarded_with_ipv6_and_port():
|
||||
row = _log_row({"Forwarded": 'for="[2606:4700:4700::1111]:4711"'})
|
||||
result = _detect_ip_leak(row, row["fields"]["headers"])
|
||||
assert result is not None
|
||||
assert result["real_ip_claim"] == "2606:4700:4700::1111"
|
||||
|
||||
|
||||
def test_x_real_ip_fallback():
|
||||
row = _log_row({"X-Real-IP": "1.1.1.1"})
|
||||
result = _detect_ip_leak(row, row["fields"]["headers"])
|
||||
assert result is not None
|
||||
assert result["source_header"] == "X-Real-IP"
|
||||
assert result["real_ip_claim"] == "1.1.1.1"
|
||||
|
||||
|
||||
def test_cf_connecting_ip_variant():
|
||||
row = _log_row({"CF-Connecting-IP": "1.0.0.1"})
|
||||
result = _detect_ip_leak(row, row["fields"]["headers"])
|
||||
assert result is not None
|
||||
assert result["source_header"] == "CF-Connecting-IP"
|
||||
assert result["real_ip_claim"] == "1.0.0.1"
|
||||
|
||||
|
||||
def test_priority_forwarded_over_xff():
|
||||
row = _log_row({
|
||||
"Forwarded": "for=1.1.1.1",
|
||||
"X-Forwarded-For": "2.2.2.2",
|
||||
"X-Real-IP": "3.3.3.3",
|
||||
})
|
||||
result = _detect_ip_leak(row, row["fields"]["headers"])
|
||||
assert result is not None
|
||||
assert result["source_header"] == "Forwarded"
|
||||
assert result["real_ip_claim"] == "1.1.1.1"
|
||||
# All proxy headers preserved in metadata.
|
||||
assert "X-Forwarded-For" in result["headers_seen"]
|
||||
assert "X-Real-IP" in result["headers_seen"]
|
||||
|
||||
|
||||
def test_case_insensitive_header_match():
|
||||
row = _log_row({"x-forwarded-for": "1.1.1.1"})
|
||||
result = _detect_ip_leak(row, row["fields"]["headers"])
|
||||
assert result is not None
|
||||
assert result["real_ip_claim"] == "1.1.1.1"
|
||||
|
||||
|
||||
def test_trusted_proxy_source_skipped(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_TRUSTED_PROXIES", "8.8.8.8")
|
||||
row = _log_row({"X-Forwarded-For": "1.1.1.1"})
|
||||
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
def test_trusted_proxy_cidr(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_TRUSTED_PROXIES", "8.8.8.0/24")
|
||||
row = _log_row({"X-Forwarded-For": "1.1.1.1"})
|
||||
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
def test_malformed_xff_falls_through_to_next_parseable():
|
||||
row = _log_row({"X-Forwarded-For": "garbage, 1.1.1.1, not-ip"})
|
||||
result = _detect_ip_leak(row, row["fields"]["headers"])
|
||||
assert result is not None
|
||||
assert result["real_ip_claim"] == "1.1.1.1"
|
||||
|
||||
|
||||
def test_all_values_unparseable_bails():
|
||||
row = _log_row({"X-Forwarded-For": "not-ip, still-not-ip"})
|
||||
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
def test_no_headers_skipped():
|
||||
row = _log_row({})
|
||||
assert _detect_ip_leak(row, {}) is None
|
||||
|
||||
|
||||
def test_non_http_service_skipped():
|
||||
row = _log_row(
|
||||
{"X-Forwarded-For": "1.1.1.1"},
|
||||
service="ssh",
|
||||
)
|
||||
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
def test_missing_attacker_ip_bails():
|
||||
row = _log_row({"X-Forwarded-For": "1.1.1.1"}, source_ip="")
|
||||
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
# ─── end-to-end via _extract_bounty ─────────────────────────────────────────
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_bounty_emits_ip_leak_row():
|
||||
row = _log_row({
|
||||
"X-Forwarded-For": "1.1.1.1",
|
||||
"User-Agent": "curl/7.81.0",
|
||||
})
|
||||
repo = AsyncMock()
|
||||
await _extract_bounty(repo, row)
|
||||
|
||||
# Expect two bounty calls — User-Agent fingerprint + ip_leak.
|
||||
types = [
|
||||
call.args[0]["bounty_type"]
|
||||
for call in repo.add_bounty.call_args_list
|
||||
]
|
||||
assert "fingerprint" in types
|
||||
assert "ip_leak" in types
|
||||
|
||||
leak_call = next(
|
||||
c for c in repo.add_bounty.call_args_list
|
||||
if c.args[0]["bounty_type"] == "ip_leak"
|
||||
)
|
||||
payload = leak_call.args[0]["payload"]
|
||||
assert payload["real_ip_claim"] == "1.1.1.1"
|
||||
assert payload["source_ip"] == "8.8.8.8"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_bounty_no_leak_no_call():
|
||||
row = _log_row({"X-Forwarded-For": "8.8.8.8"}) # matches source
|
||||
repo = AsyncMock()
|
||||
await _extract_bounty(repo, row)
|
||||
|
||||
types = [
|
||||
call.args[0]["bounty_type"]
|
||||
for call in repo.add_bounty.call_args_list
|
||||
]
|
||||
assert "ip_leak" not in types
|
||||
|
||||
|
||||
# ─── spoofed-source (non-routable claim) classification ─────────────────────
|
||||
|
||||
def test_categorize_public():
|
||||
assert _categorize_claimed_ip("8.8.8.8") == "public"
|
||||
assert _categorize_claimed_ip("2606:4700:4700::1111") == "public"
|
||||
|
||||
|
||||
def test_categorize_loopback():
|
||||
assert _categorize_claimed_ip("127.0.0.1") == "loopback"
|
||||
assert _categorize_claimed_ip("::1") == "loopback"
|
||||
|
||||
|
||||
def test_categorize_private():
|
||||
for ip in ("10.0.0.1", "172.16.0.1", "192.168.1.1"):
|
||||
assert _categorize_claimed_ip(ip) == "private", ip
|
||||
|
||||
|
||||
def test_categorize_link_local():
|
||||
assert _categorize_claimed_ip("169.254.1.1") == "link_local"
|
||||
assert _categorize_claimed_ip("fe80::1") == "link_local"
|
||||
|
||||
|
||||
def test_categorize_multicast_and_reserved():
|
||||
assert _categorize_claimed_ip("224.0.0.1") == "multicast"
|
||||
# 240.0.0.1 is reserved (class E).
|
||||
assert _categorize_claimed_ip("240.0.0.1") == "reserved"
|
||||
|
||||
|
||||
def test_categorize_unparseable():
|
||||
assert _categorize_claimed_ip("not-an-ip") == "unparseable"
|
||||
assert _categorize_claimed_ip("") == "unparseable"
|
||||
|
||||
|
||||
def test_spoofed_source_fires_on_loopback_waf_bypass():
|
||||
"""The original motivating case: curl -H 'X-Forwarded-For: 127.0.0.1'
|
||||
must produce a spoofed_source fingerprint, NOT an ip_leak."""
|
||||
row = _log_row({"X-Forwarded-For": "127.0.0.1"})
|
||||
result = _detect_spoofed_source(row, row["fields"]["headers"])
|
||||
assert result is not None
|
||||
assert result["fingerprint_type"] == "spoofed_source"
|
||||
assert result["claim_category"] == "loopback"
|
||||
assert result["claimed_ip"] == "127.0.0.1"
|
||||
assert result["source_ip"] == "8.8.8.8"
|
||||
|
||||
|
||||
def test_spoofed_source_fires_on_rfc1918():
|
||||
row = _log_row({"X-Forwarded-For": "10.0.0.5"})
|
||||
result = _detect_spoofed_source(row, row["fields"]["headers"])
|
||||
assert result is not None
|
||||
assert result["claim_category"] == "private"
|
||||
|
||||
|
||||
def test_spoofed_source_skipped_on_public_claim():
|
||||
"""A public-IP claim is a leak, not a spoof — the two detectors
|
||||
are mutually exclusive."""
|
||||
row = _log_row({"X-Forwarded-For": "1.1.1.1"})
|
||||
assert _detect_spoofed_source(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
def test_spoofed_source_skipped_when_matches_source():
|
||||
row = _log_row({"X-Forwarded-For": "8.8.8.8"})
|
||||
assert _detect_spoofed_source(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
def test_spoofed_source_respects_trusted_proxy(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_TRUSTED_PROXIES", "8.8.8.8")
|
||||
row = _log_row({"X-Forwarded-For": "127.0.0.1"})
|
||||
assert _detect_spoofed_source(row, row["fields"]["headers"]) is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_bounty_emits_spoofed_source_fingerprint():
|
||||
row = _log_row({"X-Forwarded-For": "127.0.0.1"})
|
||||
repo = AsyncMock()
|
||||
await _extract_bounty(repo, row)
|
||||
|
||||
calls = [c.args[0] for c in repo.add_bounty.call_args_list]
|
||||
# ip_leak must NOT fire for the loopback case.
|
||||
assert all(c["bounty_type"] != "ip_leak" for c in calls)
|
||||
# A fingerprint with fingerprint_type=spoofed_source should fire.
|
||||
spoof = next(
|
||||
(c for c in calls
|
||||
if c["bounty_type"] == "fingerprint"
|
||||
and c["payload"].get("fingerprint_type") == "spoofed_source"),
|
||||
None,
|
||||
)
|
||||
assert spoof is not None
|
||||
assert spoof["payload"]["claim_category"] == "loopback"
|
||||
Reference in New Issue
Block a user