feat(attackers): XFF mismatch detection — attacker IP leak bounties

Attackers routinely front their scanners with VPNs/proxies, so the
TCP source we log is the proxy egress, not the real host. But a
surprising number of attacker setups are misconfigured: the proxy
forwards the real IP in an X-Forwarded-For (or Forwarded / X-Real-IP
/ CDN-variant) header. From our side that's a free attribution leak.

New _detect_ip_leak extractor in decnet/web/ingester.py fires at
ingest time per HTTP request. Logic:

1. Require service=http, source_ip present, headers present.
2. If source_ip ∈ DECNET_TRUSTED_PROXIES (comma-separated IPs or
   CIDRs) → legitimate reverse-proxy forwarding, skip.
3. Walk proxy-family headers in priority order: Forwarded (RFC 7239)
   → X-Forwarded-For → X-Real-IP → True-Client-IP → CF-Connecting-IP.
4. Extract the left-most parseable IP from the winning header.
5. If that IP differs from the TCP source → emit a bounty with
   bounty_type="ip_leak" carrying {source_ip, real_ip_claim,
   source_header, headers_seen, path, method}.

Storage is the existing Bounty table — no schema change; de-dup is
handled by Bounty's (attacker_ip, bounty_type, payload_hash) key, so
repeat requests with the same leaked IP don't spam.

AttackerDetail renders a warn-accent "LEAKED IPs:" row under ORIGIN
listing distinct real_ip_claim values; hover tooltip shows the source
header + path of the most recent leak. Only shown when at least one
ip_leak bounty exists.

RFC 7239 Forwarded parser handles the full vocabulary — bare IPv4,
IPv4:port, quoted, IPv6 in brackets, IPv6 with port — returning only
IPs that actually parse.

Closes DEVELOPMENT.md "Network Topology Leakage → X-Forwarded-For
mismatches". Phase 3 of the three-phase Attacker Intelligence series
(phases 1: scanned-vs-interacted, 2: PTR records already shipped).

DECNET_TRUSTED_PROXIES env shape matches THREAT_MODEL DA-08's
"revisit when verified-proxy config lands" note — same token set
future rate-limit work will consume.
This commit is contained in:
2026-04-24 17:39:03 -04:00
parent 5a34371009
commit 2a0c5ca410
7 changed files with 518 additions and 1 deletions

View File

@@ -257,6 +257,15 @@ class BaseRepository(ABC):
query."""
raise NotImplementedError
async def get_attacker_ip_leaks(
self, attacker_uuid: str
) -> list[dict[str, Any]]:
"""Return ``bounty_type='ip_leak'`` rows for the attacker, newest
first. Each row's payload carries the TCP source IP, the header
that leaked, and the claimed real IP — see the XFF-mismatch
extractor in ``decnet.web.ingester`` for the shape."""
raise NotImplementedError
@abstractmethod
async def get_session_log(self, sid: str) -> Optional[dict[str, Any]]:
"""Look up the `session_recorded` Log row for a given session UUID."""

View File

@@ -907,6 +907,39 @@ class SQLModelRepository(BaseRepository):
)
return [(svc, evt) for svc, evt in rows.all()]
async def get_attacker_ip_leaks(
self, attacker_uuid: str
) -> list[dict[str, Any]]:
"""Return ``bounty_type='ip_leak'`` rows for this attacker, newest
first. Shape matches the XFF-mismatch payload emitted by the
ingester: keys include ``real_ip_claim``, ``source_header``,
``headers_seen``, ``path``, ``method``."""
async with self._session() as session:
ip_res = await session.execute(
select(Attacker.ip).where(Attacker.uuid == attacker_uuid)
)
ip = ip_res.scalar_one_or_none()
if not ip:
return []
rows = await session.execute(
select(Bounty)
.where(Bounty.attacker_ip == ip)
.where(Bounty.bounty_type == "ip_leak")
.order_by(desc(Bounty.timestamp))
)
out: list[dict[str, Any]] = []
for row in rows.scalars().all():
rec = row.model_dump(mode="json")
# Bounty.payload is stored JSON-encoded; pre-decode for UX.
raw = rec.get("payload")
if isinstance(raw, str):
try:
rec["payload"] = json.loads(raw)
except (ValueError, TypeError):
rec["payload"] = {}
out.append(rec)
return out
async def get_attacker_artifacts(self, uuid: str) -> list[dict[str, Any]]:
"""Return `file_captured` logs for the attacker identified by UUID.

View File

@@ -1,9 +1,11 @@
import asyncio
import contextlib
import ipaddress
import os
import json
import re
import time
from typing import Any
from typing import Any, Optional
from pathlib import Path
from decnet.bus import topics as _topics
@@ -243,6 +245,22 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non
}
})
# 2b. IP leak — the attacker's real IP accidentally forwarded in a
# proxy-family header (X-Forwarded-For / Forwarded / X-Real-IP /
# CDN variants). Left-most value differing from the TCP source is
# a high-confidence attribution signal. DECNET_TRUSTED_PROXIES
# opts specific source IPs out (legitimate reverse proxy in front
# of DECNET).
_leak = _detect_ip_leak(log_data, _headers)
if _leak is not None:
await repo.add_bounty({
"decky": log_data.get("decky"),
"service": log_data.get("service"),
"attacker_ip": log_data.get("attacker_ip"),
"bounty_type": "ip_leak",
"payload": _leak,
})
# 3. VNC client version fingerprint
_vnc_ver = _fields.get("client_version")
if _vnc_ver and log_data.get("event_type") == "version":
@@ -393,3 +411,185 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non
"options_order": _fields.get("options_order"),
},
})
# ─── IP-leak detection (XFF / Forwarded / X-Real-IP / CDN variants) ──────────
# Proxy-family headers we inspect, in priority order. Forwarded (RFC 7239)
# is the "proper" way; X-Forwarded-For is de-facto; X-Real-IP and CDN
# variants are common nginx / CloudFlare conventions.
_PROXY_HEADERS = (
"Forwarded",
"X-Forwarded-For",
"X-Real-IP",
"True-Client-IP",
"CF-Connecting-IP",
)
# RFC 7239 `Forwarded: for=1.2.3.4` / `for="[2001:db8::1]:4711"`. The
# capture grabs the raw for= value up to the next pair/element
# delimiter (; or ,) or end-of-string; _parse_forwarded strips quotes
# / IPv6 brackets / port afterwards.
_FORWARDED_KV_RE = re.compile(
r'for\s*=\s*"?([^",;]+?)"?(?=[;,]|$)',
re.IGNORECASE,
)
def _get_trusted_proxies() -> list[ipaddress._BaseNetwork]:
"""Parse DECNET_TRUSTED_PROXIES once per process into network objects.
Empty / unset → empty list (no opt-outs). Malformed entries are logged
at WARNING and silently dropped — a typo in the env shouldn't brick
the ingester.
"""
raw = os.environ.get("DECNET_TRUSTED_PROXIES", "")
out: list[ipaddress._BaseNetwork] = []
for token in raw.split(","):
token = token.strip()
if not token:
continue
try:
# Accept both bare IPs ("1.2.3.4") and CIDRs ("10.0.0.0/8").
if "/" in token:
out.append(ipaddress.ip_network(token, strict=False))
else:
out.append(ipaddress.ip_network(f"{token}/32", strict=False))
except (ValueError, TypeError) as exc:
logger.warning("DECNET_TRUSTED_PROXIES: ignoring %r: %s", token, exc)
return out
def _is_trusted_source(source_ip: str) -> bool:
try:
addr = ipaddress.ip_address(source_ip)
except (ValueError, TypeError):
return False
for net in _get_trusted_proxies():
try:
if addr in net:
return True
except (ValueError, TypeError):
continue
return False
def _lookup_header(headers: dict[str, Any], name: str) -> Optional[str]:
"""Case-insensitive header fetch; HTTP template logs headers as-received."""
lowered = name.lower()
for k, v in headers.items():
if isinstance(k, str) and k.lower() == lowered:
if isinstance(v, str) and v.strip():
return v.strip()
return None
def _parse_forwarded(value: str) -> Optional[str]:
"""Return the first `for=` IP from an RFC 7239 Forwarded header.
Handles the quoted IPv6-bracket-port form (`for="[2001:db8::1]:4711"`)
plus the bare IPv4 (`for=1.2.3.4`) and IPv4:port (`for=1.2.3.4:80`)
variants. Returns None on any parse failure.
"""
match = _FORWARDED_KV_RE.search(value)
if not match:
return None
token = match.group(1).strip()
if not token:
return None
# Strip IPv6 brackets (+ optional :port after them).
if token.startswith("["):
end = token.find("]")
if end > 0:
token = token[1:end]
elif token.count(":") == 1:
# IPv4:port. IPv6 bare literals have ≥2 colons so we leave those.
token = token.split(":")[0]
try:
ipaddress.ip_address(token)
except (ValueError, TypeError):
return None
return token
def _parse_xff_chain(value: str) -> Optional[str]:
"""Return the left-most parseable IP from an X-Forwarded-For chain."""
for token in value.split(","):
token = token.strip().strip('"').lstrip("[").rstrip("]")
if not token:
continue
try:
ipaddress.ip_address(token)
except (ValueError, TypeError):
continue
return token
return None
def _extract_claimed_ip(headers: dict[str, Any]) -> tuple[Optional[str], Optional[str]]:
"""Walk the proxy-header priority list; return (claimed_ip, header_name)."""
for header in _PROXY_HEADERS:
raw = _lookup_header(headers, header)
if raw is None:
continue
if header == "Forwarded":
claimed = _parse_forwarded(raw)
elif header == "X-Forwarded-For":
claimed = _parse_xff_chain(raw)
else:
# Single-IP headers — may still carry port or IPv6 brackets.
token = raw.strip().strip('"').lstrip("[").rstrip("]")
try:
ipaddress.ip_address(token)
claimed = token
except (ValueError, TypeError):
claimed = None
if claimed is not None:
return claimed, header
return None, None
def _detect_ip_leak(
log_data: dict[str, Any], headers: dict[str, Any],
) -> Optional[dict[str, Any]]:
"""Return a bounty payload iff an attribution-leak mismatch is present.
See :data:`_PROXY_HEADERS` for the set of headers checked. A leak is
claimed when:
- the TCP source IP is NOT in ``DECNET_TRUSTED_PROXIES``,
- a proxy-family header is present with a parseable IP, and
- that IP differs from the TCP source.
Otherwise returns ``None``.
"""
if log_data.get("service") != "http":
return None
if not isinstance(headers, dict) or not headers:
return None
source_ip = log_data.get("attacker_ip")
if not isinstance(source_ip, str) or not source_ip:
return None
if _is_trusted_source(source_ip):
return None
claimed, header_name = _extract_claimed_ip(headers)
if claimed is None or claimed == source_ip:
return None
# Keep only the proxy-family values in the echoed-back metadata so
# the bounty payload stays compact.
seen = {}
for h in _PROXY_HEADERS:
raw = _lookup_header(headers, h)
if raw is not None:
seen[h] = raw
return {
"source_ip": source_ip,
"real_ip_claim": claimed,
"source_header": header_name,
"headers_seen": seen,
"decky": log_data.get("decky"),
"path": log_data.get("fields", {}).get("path"),
"method": log_data.get("fields", {}).get("method"),
}

View File

@@ -34,4 +34,8 @@ async def get_attacker_detail(
# immediately without a profiler re-tick.
pairs = await repo.get_attacker_service_activity(uuid)
attacker["service_activity"] = bucket_services(pairs)
# Attribution leaks — XFF / Forwarded / X-Real-IP mismatches captured
# by the HTTP bounty extractor. Empty list when no HTTP interaction
# or no mismatch.
attacker["ip_leaks"] = await repo.get_attacker_ip_leaks(uuid)
return attacker

View File

@@ -68,6 +68,20 @@ interface AttackerData {
interacted: string[];
scanned: string[];
};
ip_leaks?: Array<{
timestamp: string;
decky?: string;
service?: string;
bounty_type: string;
payload: {
source_ip?: string;
real_ip_claim?: string;
source_header?: string;
headers_seen?: Record<string, string>;
path?: string;
method?: string;
};
}>;
}
// ─── Fingerprint rendering ───────────────────────────────────────────────────
@@ -1027,6 +1041,42 @@ const AttackerDetail: React.FC = () => {
<span className="dim"></span>
)}
</div>
{attacker.ip_leaks && attacker.ip_leaks.length > 0 && (
<div>
<span className="dim" style={{ color: 'var(--warn, #e0a040)' }}>
LEAKED IPs:{' '}
</span>
{Array.from(
new Set(
(attacker.ip_leaks || [])
.map((l) => l.payload?.real_ip_claim)
.filter((v): v is string => !!v),
),
).map((ip, i, arr) => {
const latest = (attacker.ip_leaks || []).find(
(l) => l.payload?.real_ip_claim === ip,
);
const tooltip = latest
? `${latest.payload.source_header ?? '?'} on ${
latest.payload.method ?? '?'
} ${latest.payload.path ?? '/'}`
: '';
return (
<span
key={ip}
style={{
color: 'var(--warn, #e0a040)',
fontFamily: 'monospace',
}}
title={tooltip}
>
{ip}
{i < arr.length - 1 ? ', ' : ''}
</span>
);
})}
</div>
)}
</div>
</Section>

View File

@@ -184,6 +184,7 @@ class TestGetAttackerDetail:
mock_repo.get_attacker_by_uuid = AsyncMock(return_value=sample)
mock_repo.get_attacker_behavior = AsyncMock(return_value=None)
mock_repo.get_attacker_service_activity = AsyncMock(return_value=[])
mock_repo.get_attacker_ip_leaks = AsyncMock(return_value=[])
result = await get_attacker_detail(uuid="att-uuid-1", user={"uuid": "test-user", "role": "viewer"})
@@ -213,6 +214,7 @@ class TestGetAttackerDetail:
mock_repo.get_attacker_by_uuid = AsyncMock(return_value=sample)
mock_repo.get_attacker_behavior = AsyncMock(return_value=None)
mock_repo.get_attacker_service_activity = AsyncMock(return_value=[])
mock_repo.get_attacker_ip_leaks = AsyncMock(return_value=[])
result = await get_attacker_detail(uuid="att-uuid-1", user={"uuid": "test-user", "role": "viewer"})
@@ -238,6 +240,7 @@ class TestGetAttackerDetail:
mock_repo.get_attacker_by_uuid = AsyncMock(return_value=sample)
mock_repo.get_attacker_behavior = AsyncMock(return_value=None)
mock_repo.get_attacker_service_activity = AsyncMock(return_value=pairs)
mock_repo.get_attacker_ip_leaks = AsyncMock(return_value=[])
result = await get_attacker_detail(
uuid="att-uuid-1",
@@ -249,6 +252,41 @@ class TestGetAttackerDetail:
"scanned": ["http"],
}
@pytest.mark.asyncio
async def test_ip_leaks_included_in_response(self):
"""Attacker detail surfaces ip_leak bounty rows for the UI."""
from decnet.web.router.attackers.api_get_attacker_detail import get_attacker_detail
sample = _sample_attacker()
leaks = [
{
"timestamp": "2026-04-24T12:00:00+00:00",
"decky": "http-01",
"service": "http",
"bounty_type": "ip_leak",
"payload": {
"source_ip": "203.0.113.42",
"real_ip_claim": "198.51.100.7",
"source_header": "X-Forwarded-For",
"path": "/wp-admin/",
"method": "GET",
},
},
]
with patch("decnet.web.router.attackers.api_get_attacker_detail.repo") as mock_repo:
mock_repo.get_attacker_by_uuid = AsyncMock(return_value=sample)
mock_repo.get_attacker_behavior = AsyncMock(return_value=None)
mock_repo.get_attacker_service_activity = AsyncMock(return_value=[])
mock_repo.get_attacker_ip_leaks = AsyncMock(return_value=leaks)
result = await get_attacker_detail(
uuid="att-uuid-1",
user={"uuid": "test-user", "role": "viewer"},
)
assert result["ip_leaks"] == leaks
assert result["ip_leaks"][0]["payload"]["real_ip_claim"] == "198.51.100.7"
# ─── GET /attackers/{uuid}/commands ──────────────────────────────────────────

View File

@@ -0,0 +1,183 @@
"""XFF / proxy-family mismatch detection in the ingester's bounty extractor."""
from __future__ import annotations
from unittest.mock import AsyncMock
import pytest
from decnet.web.ingester import _detect_ip_leak, _extract_bounty
def _log_row(
headers: dict[str, str] | None = None,
*,
source_ip: str = "203.0.113.42",
service: str = "http",
event_type: str = "request",
) -> dict:
return {
"decky": "http-01",
"service": service,
"attacker_ip": source_ip,
"event_type": event_type,
"fields": {
"method": "GET",
"path": "/wp-admin/",
"headers": headers or {},
},
}
# ─── pure detector ──────────────────────────────────────────────────────────
def test_xff_leftmost_differs_from_source_emits_leak():
row = _log_row({
"X-Forwarded-For": "198.51.100.7, 10.0.0.1",
})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["source_ip"] == "203.0.113.42"
assert result["real_ip_claim"] == "198.51.100.7"
assert result["source_header"] == "X-Forwarded-For"
assert result["path"] == "/wp-admin/"
def test_xff_matches_source_no_leak():
row = _log_row({"X-Forwarded-For": "203.0.113.42"})
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_forwarded_header_rfc7239_parsed():
row = _log_row({"Forwarded": "for=1.2.3.4;by=5.6.7.8"})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["real_ip_claim"] == "1.2.3.4"
assert result["source_header"] == "Forwarded"
def test_forwarded_with_ipv6_and_port():
row = _log_row({"Forwarded": 'for="[2001:db8::1]:4711"'})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["real_ip_claim"] == "2001:db8::1"
def test_x_real_ip_fallback():
row = _log_row({"X-Real-IP": "198.51.100.7"})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["source_header"] == "X-Real-IP"
assert result["real_ip_claim"] == "198.51.100.7"
def test_cf_connecting_ip_variant():
row = _log_row({"CF-Connecting-IP": "198.51.100.9"})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["source_header"] == "CF-Connecting-IP"
assert result["real_ip_claim"] == "198.51.100.9"
def test_priority_forwarded_over_xff():
row = _log_row({
"Forwarded": "for=1.1.1.1",
"X-Forwarded-For": "2.2.2.2",
"X-Real-IP": "3.3.3.3",
})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["source_header"] == "Forwarded"
assert result["real_ip_claim"] == "1.1.1.1"
# All proxy headers preserved in metadata.
assert "X-Forwarded-For" in result["headers_seen"]
assert "X-Real-IP" in result["headers_seen"]
def test_case_insensitive_header_match():
row = _log_row({"x-forwarded-for": "198.51.100.7"})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["real_ip_claim"] == "198.51.100.7"
def test_trusted_proxy_source_skipped(monkeypatch):
monkeypatch.setenv("DECNET_TRUSTED_PROXIES", "203.0.113.42")
row = _log_row({"X-Forwarded-For": "198.51.100.7"})
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_trusted_proxy_cidr(monkeypatch):
monkeypatch.setenv("DECNET_TRUSTED_PROXIES", "203.0.113.0/24")
row = _log_row({"X-Forwarded-For": "198.51.100.7"})
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_malformed_xff_falls_through_to_next_parseable():
row = _log_row({"X-Forwarded-For": "garbage, 198.51.100.7, not-ip"})
result = _detect_ip_leak(row, row["fields"]["headers"])
assert result is not None
assert result["real_ip_claim"] == "198.51.100.7"
def test_all_values_unparseable_bails():
row = _log_row({"X-Forwarded-For": "not-ip, still-not-ip"})
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_no_headers_skipped():
row = _log_row({})
assert _detect_ip_leak(row, {}) is None
def test_non_http_service_skipped():
row = _log_row(
{"X-Forwarded-For": "198.51.100.7"},
service="ssh",
)
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
def test_missing_attacker_ip_bails():
row = _log_row({"X-Forwarded-For": "198.51.100.7"}, source_ip="")
assert _detect_ip_leak(row, row["fields"]["headers"]) is None
# ─── end-to-end via _extract_bounty ─────────────────────────────────────────
@pytest.mark.asyncio
async def test_extract_bounty_emits_ip_leak_row():
row = _log_row({
"X-Forwarded-For": "198.51.100.7",
"User-Agent": "curl/7.81.0",
})
repo = AsyncMock()
await _extract_bounty(repo, row)
# Expect two bounty calls — User-Agent fingerprint + ip_leak.
types = [
call.args[0]["bounty_type"]
for call in repo.add_bounty.call_args_list
]
assert "fingerprint" in types
assert "ip_leak" in types
leak_call = next(
c for c in repo.add_bounty.call_args_list
if c.args[0]["bounty_type"] == "ip_leak"
)
payload = leak_call.args[0]["payload"]
assert payload["real_ip_claim"] == "198.51.100.7"
assert payload["source_ip"] == "203.0.113.42"
@pytest.mark.asyncio
async def test_extract_bounty_no_leak_no_call():
row = _log_row({"X-Forwarded-For": "203.0.113.42"}) # matches source
repo = AsyncMock()
await _extract_bounty(repo, row)
types = [
call.args[0]["bounty_type"]
for call in repo.add_bounty.call_args_list
]
assert "ip_leak" not in types