feat(attackers): XFF mismatch detection — attacker IP leak bounties

Attackers routinely front their scanners with VPNs/proxies, so the TCP source we log is the proxy egress, not the real host. But a surprising number of attacker setups are misconfigured: the proxy forwards the real IP in an X-Forwarded-For (or Forwarded / X-Real-IP / CDN-variant) header. From our side that's a free attribution leak. New _detect_ip_leak extractor in decnet/web/ingester.py fires at ingest time per HTTP request. Logic: 1. Require service=http, source_ip present, headers present. 2. If source_ip ∈ DECNET_TRUSTED_PROXIES (comma-separated IPs or CIDRs) → legitimate reverse-proxy forwarding, skip. 3. Walk proxy-family headers in priority order: Forwarded (RFC 7239) → X-Forwarded-For → X-Real-IP → True-Client-IP → CF-Connecting-IP. 4. Extract the left-most parseable IP from the winning header. 5. If that IP differs from the TCP source → emit a bounty with bounty_type="ip_leak" carrying {source_ip, real_ip_claim, source_header, headers_seen, path, method}. Storage is the existing Bounty table — no schema change; de-dup is handled by Bounty's (attacker_ip, bounty_type, payload_hash) key, so repeat requests with the same leaked IP don't spam. AttackerDetail renders a warn-accent "LEAKED IPs:" row under ORIGIN listing distinct real_ip_claim values; hover tooltip shows the source header + path of the most recent leak. Only shown when at least one ip_leak bounty exists. RFC 7239 Forwarded parser handles the full vocabulary — bare IPv4, IPv4:port, quoted, IPv6 in brackets, IPv6 with port — returning only IPs that actually parse. Closes DEVELOPMENT.md "Network Topology Leakage → X-Forwarded-For mismatches". Phase 3 of the three-phase Attacker Intelligence series (phases 1: scanned-vs-interacted, 2: PTR records already shipped). DECNET_TRUSTED_PROXIES env shape matches THREAT_MODEL DA-08's "revisit when verified-proxy config lands" note — same token set future rate-limit work will consume.
2026-04-24 17:39:03 -04:00
parent 5a34371009
commit 2a0c5ca410
7 changed files with 518 additions and 1 deletions
--- a/decnet/web/db/repository.py
+++ b/decnet/web/db/repository.py
@@ -257,6 +257,15 @@ class BaseRepository(ABC):
        query."""
        raise NotImplementedError

+    async def get_attacker_ip_leaks(
+        self, attacker_uuid: str
+    ) -> list[dict[str, Any]]:
+        """Return ``bounty_type='ip_leak'`` rows for the attacker, newest
+        first. Each row's payload carries the TCP source IP, the header
+        that leaked, and the claimed real IP — see the XFF-mismatch
+        extractor in ``decnet.web.ingester`` for the shape."""
+        raise NotImplementedError
+
    @abstractmethod
    async def get_session_log(self, sid: str) -> Optional[dict[str, Any]]:
        """Look up the `session_recorded` Log row for a given session UUID."""
--- a/decnet/web/db/sqlmodel_repo.py
+++ b/decnet/web/db/sqlmodel_repo.py
@@ -907,6 +907,39 @@ class SQLModelRepository(BaseRepository):
            )
            return [(svc, evt) for svc, evt in rows.all()]

+    async def get_attacker_ip_leaks(
+        self, attacker_uuid: str
+    ) -> list[dict[str, Any]]:
+        """Return ``bounty_type='ip_leak'`` rows for this attacker, newest
+        first.  Shape matches the XFF-mismatch payload emitted by the
+        ingester: keys include ``real_ip_claim``, ``source_header``,
+        ``headers_seen``, ``path``, ``method``."""
+        async with self._session() as session:
+            ip_res = await session.execute(
+                select(Attacker.ip).where(Attacker.uuid == attacker_uuid)
+            )
+            ip = ip_res.scalar_one_or_none()
+            if not ip:
+                return []
+            rows = await session.execute(
+                select(Bounty)
+                .where(Bounty.attacker_ip == ip)
+                .where(Bounty.bounty_type == "ip_leak")
+                .order_by(desc(Bounty.timestamp))
+            )
+            out: list[dict[str, Any]] = []
+            for row in rows.scalars().all():
+                rec = row.model_dump(mode="json")
+                # Bounty.payload is stored JSON-encoded; pre-decode for UX.
+                raw = rec.get("payload")
+                if isinstance(raw, str):
+                    try:
+                        rec["payload"] = json.loads(raw)
+                    except (ValueError, TypeError):
+                        rec["payload"] = {}
+                out.append(rec)
+            return out
+
    async def get_attacker_artifacts(self, uuid: str) -> list[dict[str, Any]]:
        """Return `file_captured` logs for the attacker identified by UUID.

--- a/decnet/web/ingester.py
+++ b/decnet/web/ingester.py
@@ -1,9 +1,11 @@
 import asyncio
 import contextlib
+import ipaddress
 import os
 import json
+import re
 import time
-from typing import Any
+from typing import Any, Optional
 from pathlib import Path

 from decnet.bus import topics as _topics
@@ -243,6 +245,22 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non
            }
        })

+    # 2b. IP leak — the attacker's real IP accidentally forwarded in a
+    # proxy-family header (X-Forwarded-For / Forwarded / X-Real-IP /
+    # CDN variants). Left-most value differing from the TCP source is
+    # a high-confidence attribution signal. DECNET_TRUSTED_PROXIES
+    # opts specific source IPs out (legitimate reverse proxy in front
+    # of DECNET).
+    _leak = _detect_ip_leak(log_data, _headers)
+    if _leak is not None:
+        await repo.add_bounty({
+            "decky": log_data.get("decky"),
+            "service": log_data.get("service"),
+            "attacker_ip": log_data.get("attacker_ip"),
+            "bounty_type": "ip_leak",
+            "payload": _leak,
+        })
+
    # 3. VNC client version fingerprint
    _vnc_ver = _fields.get("client_version")
    if _vnc_ver and log_data.get("event_type") == "version":
@@ -393,3 +411,185 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non
                "options_order": _fields.get("options_order"),
            },
        })
+
+
+# ─── IP-leak detection (XFF / Forwarded / X-Real-IP / CDN variants) ──────────
+
+# Proxy-family headers we inspect, in priority order. Forwarded (RFC 7239)
+# is the "proper" way; X-Forwarded-For is de-facto; X-Real-IP and CDN
+# variants are common nginx / CloudFlare conventions.
+_PROXY_HEADERS = (
+    "Forwarded",
+    "X-Forwarded-For",
+    "X-Real-IP",
+    "True-Client-IP",
+    "CF-Connecting-IP",
+)
+
+# RFC 7239 `Forwarded: for=1.2.3.4` / `for="[2001:db8::1]:4711"`. The
+# capture grabs the raw for= value up to the next pair/element
+# delimiter (; or ,) or end-of-string; _parse_forwarded strips quotes
+# / IPv6 brackets / port afterwards.
+_FORWARDED_KV_RE = re.compile(
+    r'for\s*=\s*"?([^",;]+?)"?(?=[;,]|$)',
+    re.IGNORECASE,
+)
+
+
+def _get_trusted_proxies() -> list[ipaddress._BaseNetwork]:
+    """Parse DECNET_TRUSTED_PROXIES once per process into network objects.
+
+    Empty / unset → empty list (no opt-outs). Malformed entries are logged
+    at WARNING and silently dropped — a typo in the env shouldn't brick
+    the ingester.
+    """
+    raw = os.environ.get("DECNET_TRUSTED_PROXIES", "")
+    out: list[ipaddress._BaseNetwork] = []
+    for token in raw.split(","):
+        token = token.strip()
+        if not token:
+            continue
+        try:
+            # Accept both bare IPs ("1.2.3.4") and CIDRs ("10.0.0.0/8").
+            if "/" in token:
+                out.append(ipaddress.ip_network(token, strict=False))
+            else:
+                out.append(ipaddress.ip_network(f"{token}/32", strict=False))
+        except (ValueError, TypeError) as exc:
+            logger.warning("DECNET_TRUSTED_PROXIES: ignoring %r: %s", token, exc)
+    return out
+
+
+def _is_trusted_source(source_ip: str) -> bool:
+    try:
+        addr = ipaddress.ip_address(source_ip)
+    except (ValueError, TypeError):
+        return False
+    for net in _get_trusted_proxies():
+        try:
+            if addr in net:
+                return True
+        except (ValueError, TypeError):
+            continue
+    return False
+
+
+def _lookup_header(headers: dict[str, Any], name: str) -> Optional[str]:
+    """Case-insensitive header fetch; HTTP template logs headers as-received."""
+    lowered = name.lower()
+    for k, v in headers.items():
+        if isinstance(k, str) and k.lower() == lowered:
+            if isinstance(v, str) and v.strip():
+                return v.strip()
+    return None
+
+
+def _parse_forwarded(value: str) -> Optional[str]:
+    """Return the first `for=` IP from an RFC 7239 Forwarded header.
+
+    Handles the quoted IPv6-bracket-port form (`for="[2001:db8::1]:4711"`)
+    plus the bare IPv4 (`for=1.2.3.4`) and IPv4:port (`for=1.2.3.4:80`)
+    variants. Returns None on any parse failure.
+    """
+    match = _FORWARDED_KV_RE.search(value)
+    if not match:
+        return None
+    token = match.group(1).strip()
+    if not token:
+        return None
+    # Strip IPv6 brackets (+ optional :port after them).
+    if token.startswith("["):
+        end = token.find("]")
+        if end > 0:
+            token = token[1:end]
+    elif token.count(":") == 1:
+        # IPv4:port. IPv6 bare literals have ≥2 colons so we leave those.
+        token = token.split(":")[0]
+    try:
+        ipaddress.ip_address(token)
+    except (ValueError, TypeError):
+        return None
+    return token
+
+
+def _parse_xff_chain(value: str) -> Optional[str]:
+    """Return the left-most parseable IP from an X-Forwarded-For chain."""
+    for token in value.split(","):
+        token = token.strip().strip('"').lstrip("[").rstrip("]")
+        if not token:
+            continue
+        try:
+            ipaddress.ip_address(token)
+        except (ValueError, TypeError):
+            continue
+        return token
+    return None
+
+
+def _extract_claimed_ip(headers: dict[str, Any]) -> tuple[Optional[str], Optional[str]]:
+    """Walk the proxy-header priority list; return (claimed_ip, header_name)."""
+    for header in _PROXY_HEADERS:
+        raw = _lookup_header(headers, header)
+        if raw is None:
+            continue
+        if header == "Forwarded":
+            claimed = _parse_forwarded(raw)
+        elif header == "X-Forwarded-For":
+            claimed = _parse_xff_chain(raw)
+        else:
+            # Single-IP headers — may still carry port or IPv6 brackets.
+            token = raw.strip().strip('"').lstrip("[").rstrip("]")
+            try:
+                ipaddress.ip_address(token)
+                claimed = token
+            except (ValueError, TypeError):
+                claimed = None
+        if claimed is not None:
+            return claimed, header
+    return None, None
+
+
+def _detect_ip_leak(
+    log_data: dict[str, Any], headers: dict[str, Any],
+) -> Optional[dict[str, Any]]:
+    """Return a bounty payload iff an attribution-leak mismatch is present.
+
+    See :data:`_PROXY_HEADERS` for the set of headers checked. A leak is
+    claimed when:
+      - the TCP source IP is NOT in ``DECNET_TRUSTED_PROXIES``,
+      - a proxy-family header is present with a parseable IP, and
+      - that IP differs from the TCP source.
+    Otherwise returns ``None``.
+    """
+    if log_data.get("service") != "http":
+        return None
+    if not isinstance(headers, dict) or not headers:
+        return None
+    source_ip = log_data.get("attacker_ip")
+    if not isinstance(source_ip, str) or not source_ip:
+        return None
+    if _is_trusted_source(source_ip):
+        return None
+
+    claimed, header_name = _extract_claimed_ip(headers)
+    if claimed is None or claimed == source_ip:
+        return None
+
+    # Keep only the proxy-family values in the echoed-back metadata so
+    # the bounty payload stays compact.
+    seen = {}
+    for h in _PROXY_HEADERS:
+        raw = _lookup_header(headers, h)
+        if raw is not None:
+            seen[h] = raw
+
+    return {
+        "source_ip": source_ip,
+        "real_ip_claim": claimed,
+        "source_header": header_name,
+        "headers_seen": seen,
+        "decky": log_data.get("decky"),
+        "path": log_data.get("fields", {}).get("path"),
+        "method": log_data.get("fields", {}).get("method"),
+    }
+
--- a/decnet/web/router/attackers/api_get_attacker_detail.py
+++ b/decnet/web/router/attackers/api_get_attacker_detail.py
@@ -34,4 +34,8 @@ async def get_attacker_detail(
    # immediately without a profiler re-tick.
    pairs = await repo.get_attacker_service_activity(uuid)
    attacker["service_activity"] = bucket_services(pairs)
+    # Attribution leaks — XFF / Forwarded / X-Real-IP mismatches captured
+    # by the HTTP bounty extractor. Empty list when no HTTP interaction
+    # or no mismatch.
+    attacker["ip_leaks"] = await repo.get_attacker_ip_leaks(uuid)
    return attacker