feat(intel): wire GreyNoise, AbuseIPDB, Feodo Tracker + ThreatFox

Four concrete IntelProvider impls — three per-IP queries plus one bulk feed: * GreyNoiseProvider — community endpoint, optional API key for higher rate limit. 404 = unknown (cache the absence so we don't re-query). * AbuseIPDBProvider — score threshold mapping (>=75 malicious, >=25 suspicious, else benign). Self-disables with a clear error when no API key is configured rather than burning quota. * FeodoProvider — fetches the bulk botnet C2 IP feed once per refresh window and answers every lookup from an in-memory set. Listed = C2. * ThreatFoxProvider — POST /api/v1/ search_ioc query, optional Auth-Key header. Match in data[] = malicious; no_result = absence-not-benign. Every provider routes through decnet.net.http.stealth_client so the egress UA never leaks 'DECNET'.
2026-04-26 05:15:17 -04:00
parent f49a7db07d
commit cd70136d09
8 changed files with 868 additions and 0 deletions
--- a/decnet/intel/abuseipdb.py
+++ b/decnet/intel/abuseipdb.py
@@ -0,0 +1,104 @@
+"""AbuseIPDB provider.
+
+Endpoint: ``GET https://api.abuseipdb.com/api/v2/check``
+
+Free tier: 1000 lookups/day. Always requires an API key passed in the
+``Key`` header — the provider self-disables (returns an error) when no
+key is configured rather than burning quota at the free public IP.
+
+Verdict mapping is tier-based on the ``abuseConfidenceScore`` (0–100):
+
+* ``>= 75`` — ``malicious``
+* ``25..74`` — ``suspicious``
+* ``< 25``  — ``benign``
+
+This matches AbuseIPDB's own UI thresholds reasonably closely; tune
+later if operators report drift.
+"""
+from __future__ import annotations
+
+import json
+import os
+from datetime import datetime, timezone
+from typing import Optional
+
+from decnet.intel.base import IntelProvider, IntelResult
+from decnet.logging import get_logger
+from decnet.net.http import stealth_client
+
+log = get_logger("intel.abuseipdb")
+
+_ENDPOINT = "https://api.abuseipdb.com/api/v2/check"
+_DEFAULT_MAX_AGE_DAYS = 30
+
+
+def _score_to_verdict(score: int) -> str:
+    if score >= 75:
+        return "malicious"
+    if score >= 25:
+        return "suspicious"
+    return "benign"
+
+
+class AbuseIPDBProvider(IntelProvider):
+    name = "abuseipdb"
+    concurrency = 4
+    # 1000/day = avg 1 every ~86s. We don't enforce the daily cap here —
+    # operators who burn it through the worker will see HTTP 429 and the
+    # row gets retried after the TTL window.
+    min_dispatch_interval_s = 0.5
+
+    def __init__(
+        self,
+        *,
+        api_key: Optional[str] = None,
+        max_age_days: int = _DEFAULT_MAX_AGE_DAYS,
+    ) -> None:
+        super().__init__()
+        self._api_key = api_key or os.environ.get(
+            "DECNET_ABUSEIPDB_API_KEY"
+        ) or None
+        self._max_age_days = max_age_days
+
+    async def lookup(self, ip: str) -> IntelResult:
+        if not self._api_key:
+            return IntelResult(
+                provider=self.name,
+                error="DECNET_ABUSEIPDB_API_KEY not configured",
+            )
+        params = {
+            "ipAddress": ip,
+            "maxAgeInDays": str(self._max_age_days),
+        }
+        headers = {
+            "Key": self._api_key,
+            "Accept": "application/json",
+        }
+        try:
+            async with stealth_client() as client:
+                resp = await client.get(_ENDPOINT, headers=headers, params=params)
+        except Exception as exc:  # noqa: BLE001
+            return IntelResult(provider=self.name, error=f"network: {exc}")
+
+        if resp.status_code != 200:
+            return IntelResult(
+                provider=self.name,
+                error=f"HTTP {resp.status_code}",
+            )
+        try:
+            payload = resp.json()
+        except Exception as exc:  # noqa: BLE001
+            return IntelResult(provider=self.name, error=f"parse: {exc}")
+
+        data = payload.get("data") or {}
+        score = int(data.get("abuseConfidenceScore") or 0)
+        verdict = _score_to_verdict(score)
+        return IntelResult(
+            provider=self.name,
+            verdict=verdict,
+            column_updates={
+                "abuseipdb_score": score,
+                "abuseipdb_raw": json.dumps(data),
+                "abuseipdb_queried_at": datetime.now(timezone.utc),
+            },
+        )
--- a/decnet/intel/feodo.py
+++ b/decnet/intel/feodo.py
@@ -0,0 +1,108 @@
+"""abuse.ch Feodo Tracker provider — bulk JSON botnet C2 feed.
+
+Endpoint: ``GET https://feodotracker.abuse.ch/downloads/ipblocklist.json``
+
+This is the only provider in the v1 set that uses a *bulk* feed instead
+of a per-IP query: the upstream is a list of every botnet C2 IP abuse.ch
+has seen recently (Emotet, TrickBot, Dridex, etc.), refreshed every few
+minutes. We fetch the full list once per ``refresh_interval_s`` and
+answer ``lookup(ip)`` calls from the in-process set.
+
+This makes Feodo Tracker effectively free at the call-site: thousands
+of attacker IPs map to a single network round-trip per refresh window.
+"""
+from __future__ import annotations
+
+import json
+import time
+from datetime import datetime, timezone
+from typing import Any, Optional
+
+from decnet.intel.base import IntelProvider, IntelResult
+from decnet.logging import get_logger
+from decnet.net.http import stealth_client
+
+log = get_logger("intel.feodo")
+
+_ENDPOINT = "https://feodotracker.abuse.ch/downloads/ipblocklist.json"
+_DEFAULT_REFRESH_S = 3600.0
+
+
+class FeodoProvider(IntelProvider):
+    name = "feodo"
+    concurrency = 1  # only one concurrent refresh; lookups are pure set ops
+    min_dispatch_interval_s = 0.0
+
+    def __init__(self, *, refresh_interval_s: float = _DEFAULT_REFRESH_S) -> None:
+        super().__init__()
+        self._refresh_interval_s = refresh_interval_s
+        # ip → upstream record dict, keyed by ``ip_address``.
+        self._index: dict[str, dict[str, Any]] = {}
+        self._loaded_at: float = 0.0
+        self._last_error: Optional[str] = None
+
+    async def _refresh(self) -> Optional[str]:
+        """Refetch the bulk feed. Returns an error string or ``None``."""
+        try:
+            async with stealth_client(timeout=20.0) as client:
+                resp = await client.get(_ENDPOINT)
+        except Exception as exc:  # noqa: BLE001
+            return f"network: {exc}"
+        if resp.status_code != 200:
+            return f"HTTP {resp.status_code}"
+        try:
+            payload = resp.json()
+        except Exception as exc:  # noqa: BLE001
+            return f"parse: {exc}"
+        if not isinstance(payload, list):
+            return "feed: not a list"
+
+        new_index: dict[str, dict[str, Any]] = {}
+        for entry in payload:
+            if not isinstance(entry, dict):
+                continue
+            ip = entry.get("ip_address")
+            if isinstance(ip, str):
+                new_index[ip] = entry
+        self._index = new_index
+        self._loaded_at = time.monotonic()
+        self._last_error = None
+        log.info("feodo: refreshed bulk feed entries=%d", len(new_index))
+        return None
+
+    async def _ensure_fresh(self) -> None:
+        if (
+            not self._index
+            or (time.monotonic() - self._loaded_at) >= self._refresh_interval_s
+        ):
+            err = await self._refresh()
+            if err:
+                self._last_error = err
+
+    async def lookup(self, ip: str) -> IntelResult:
+        await self._ensure_fresh()
+        if not self._index and self._last_error:
+            return IntelResult(provider=self.name, error=self._last_error)
+
+        entry = self._index.get(ip)
+        if entry is None:
+            # Not on the C2 list — explicit benign-ish signal. Cache it
+            # so we don't keep checking the same set on every wake.
+            return IntelResult(
+                provider=self.name,
+                verdict=None,  # absence ≠ "benign", let other providers speak
+                column_updates={
+                    "feodo_listed": False,
+                    "feodo_raw": "{}",
+                    "feodo_queried_at": datetime.now(timezone.utc),
+                },
+            )
+        return IntelResult(
+            provider=self.name,
+            verdict="malicious",
+            column_updates={
+                "feodo_listed": True,
+                "feodo_raw": json.dumps(entry),
+                "feodo_queried_at": datetime.now(timezone.utc),
+            },
+        )
--- a/decnet/intel/greynoise.py
+++ b/decnet/intel/greynoise.py
@@ -0,0 +1,107 @@
+"""GreyNoise Community API provider.
+
+Endpoint: ``GET https://api.greynoise.io/v3/community/<ip>``
+
+The Community endpoint requires no API key for low-volume use; an
+optional ``DECNET_GREYNOISE_API_KEY`` lifts the rate limit. We always
+send the key when present.
+
+Response shape (relevant fields)::
+
+    {
+      "ip": "1.2.3.4",
+      "noise": true,             // observed scanning the public internet
+      "riot": false,             // member of the "Rule It Out" benign set
+      "classification": "benign | malicious | unknown",
+      "name": "Censys",          // tool/operator label, when known
+      "link": "https://...",
+      "last_seen": "2026-04-25"
+    }
+
+Status code semantics:
+* 200 — IP found, JSON body as above
+* 404 — IP not observed by GreyNoise (treat as ``"unknown"``, not error)
+* 429 — rate-limited (treat as transient error)
+"""
+from __future__ import annotations
+
+import json
+import os
+from datetime import datetime, timezone
+from typing import Optional
+
+from decnet.intel.base import IntelProvider, IntelResult
+from decnet.logging import get_logger
+from decnet.net.http import stealth_client
+
+log = get_logger("intel.greynoise")
+
+_ENDPOINT = "https://api.greynoise.io/v3/community/{ip}"
+
+
+class GreyNoiseProvider(IntelProvider):
+    name = "greynoise"
+    concurrency = 4
+    # Community tier is ~50/min; ~1.5s between dispatches keeps us well
+    # under that without serialising entirely.
+    min_dispatch_interval_s = 1.5
+
+    def __init__(self, *, api_key: Optional[str] = None) -> None:
+        super().__init__()
+        self._api_key = api_key or os.environ.get(
+            "DECNET_GREYNOISE_API_KEY"
+        ) or None
+
+    async def lookup(self, ip: str) -> IntelResult:
+        url = _ENDPOINT.format(ip=ip)
+        headers = {"Accept": "application/json"}
+        if self._api_key:
+            headers["key"] = self._api_key
+        try:
+            async with stealth_client() as client:
+                resp = await client.get(url, headers=headers)
+        except Exception as exc:  # noqa: BLE001
+            return IntelResult(provider=self.name, error=f"network: {exc}")
+
+        if resp.status_code == 404:
+            # IP not in GreyNoise's view of the internet — record the row
+            # so we don't keep re-querying within the TTL window.
+            return IntelResult(
+                provider=self.name,
+                verdict="unknown",
+                column_updates={
+                    "greynoise_classification": "unknown",
+                    "greynoise_raw": json.dumps({"message": "not seen"}),
+                    "greynoise_queried_at": datetime.now(timezone.utc),
+                },
+            )
+        if resp.status_code != 200:
+            return IntelResult(
+                provider=self.name,
+                error=f"HTTP {resp.status_code}",
+            )
+
+        try:
+            data = resp.json()
+        except Exception as exc:  # noqa: BLE001
+            return IntelResult(provider=self.name, error=f"parse: {exc}")
+
+        classification = (data.get("classification") or "unknown").lower()
+        verdict = _CLASSIFICATION_TO_VERDICT.get(classification, "unknown")
+        return IntelResult(
+            provider=self.name,
+            verdict=verdict,
+            column_updates={
+                "greynoise_classification": classification,
+                "greynoise_raw": json.dumps(data),
+                "greynoise_queried_at": datetime.now(timezone.utc),
+            },
+        )
+
+
+_CLASSIFICATION_TO_VERDICT = {
+    "malicious": "malicious",
+    "suspicious": "suspicious",
+    "benign": "benign",
+    "unknown": "unknown",
+}
--- a/decnet/intel/threatfox.py
+++ b/decnet/intel/threatfox.py
@@ -0,0 +1,94 @@
+"""abuse.ch ThreatFox provider — per-IOC query API.
+
+Endpoint: ``POST https://threatfox-api.abuse.ch/api/v1/``
+
+ThreatFox returns IOC matches across many types (URL, domain, IP, hash).
+We send ``{"query": "search_ioc", "search_term": "<ip>"}`` and treat any
+non-empty ``data`` array as a malicious match.
+
+API key handling: ThreatFox accepts an optional ``Auth-Key`` header for
+higher rate limits. Without a key the public endpoint still answers but
+caps requests/min — the provider works either way.
+"""
+from __future__ import annotations
+
+import json
+import os
+from datetime import datetime, timezone
+from typing import Optional
+
+from decnet.intel.base import IntelProvider, IntelResult
+from decnet.logging import get_logger
+from decnet.net.http import stealth_client
+
+log = get_logger("intel.threatfox")
+
+_ENDPOINT = "https://threatfox-api.abuse.ch/api/v1/"
+
+
+class ThreatFoxProvider(IntelProvider):
+    name = "threatfox"
+    concurrency = 4
+    min_dispatch_interval_s = 0.5
+
+    def __init__(self, *, api_key: Optional[str] = None) -> None:
+        super().__init__()
+        self._api_key = api_key or os.environ.get(
+            "DECNET_THREATFOX_API_KEY"
+        ) or None
+
+    async def lookup(self, ip: str) -> IntelResult:
+        body = {"query": "search_ioc", "search_term": ip}
+        headers = {"Accept": "application/json"}
+        if self._api_key:
+            headers["Auth-Key"] = self._api_key
+
+        try:
+            async with stealth_client() as client:
+                resp = await client.post(
+                    _ENDPOINT, headers=headers, json=body,
+                )
+        except Exception as exc:  # noqa: BLE001
+            return IntelResult(provider=self.name, error=f"network: {exc}")
+
+        if resp.status_code != 200:
+            return IntelResult(
+                provider=self.name, error=f"HTTP {resp.status_code}",
+            )
+        try:
+            payload = resp.json()
+        except Exception as exc:  # noqa: BLE001
+            return IntelResult(provider=self.name, error=f"parse: {exc}")
+
+        status = payload.get("query_status")
+        # ThreatFox returns query_status="no_result" when the IOC isn't
+        # tracked, and query_status="ok" with a non-empty data list when
+        # it is. Anything else (illegal_search, etc.) is a contract
+        # violation we surface as an error.
+        if status == "no_result":
+            return IntelResult(
+                provider=self.name,
+                verdict=None,  # absence is not a benign signal
+                column_updates={
+                    "threatfox_listed": False,
+                    "threatfox_raw": "{}",
+                    "threatfox_queried_at": datetime.now(timezone.utc),
+                },
+            )
+        if status != "ok":
+            return IntelResult(
+                provider=self.name,
+                error=f"query_status={status!r}",
+            )
+
+        data = payload.get("data") or []
+        listed = bool(data)
+        return IntelResult(
+            provider=self.name,
+            verdict="malicious" if listed else None,
+            column_updates={
+                "threatfox_listed": listed,
+                "threatfox_raw": json.dumps(data),
+                "threatfox_queried_at": datetime.now(timezone.utc),
+            },
+        )