Files
DECNET/decnet/intel/threatfox.py
anti 999d3494b4 feat(intel): persist per-provider taxonomy on AttackerIntel for TTP dispatch
The 2026-05-02 ship-time audit of the R0054-R0058 intel rule pack found
that AbuseIPDB / GreyNoise / ThreatFox stored only the aggregate verdict
(score / classification / listed-bool) plus the raw response blob. The
TTP IntelLifter expects per-provider taxonomy fields (categories, tags,
threat_types) that were never populated, so R0054 / R0055 / R0057
emitted zero tags in production despite passing unit tests.

Add typed columns: abuseipdb_categories, greynoise_tags, greynoise_name,
feodo_malware_family, threatfox_threat_types, threatfox_ioc_types,
threatfox_malware_families. Each provider now parses the relevant
taxonomy out of the upstream response and writes it through
column_updates. JSON-list columns ride as TEXT with default "[]" to
keep the SQLite/MySQL backend split honest, deserialised back to native
lists by the repo on read.
2026-05-02 18:07:57 -04:00

123 lines
4.7 KiB
Python

"""abuse.ch ThreatFox provider — per-IOC query API.
Endpoint: ``POST https://threatfox-api.abuse.ch/api/v1/``
ThreatFox returns IOC matches across many types (URL, domain, IP, hash).
We send ``{"query": "search_ioc", "search_term": "<ip>"}`` and treat any
non-empty ``data`` array as a malicious match.
API key handling: ThreatFox accepts an optional ``Auth-Key`` header for
higher rate limits. Without a key the public endpoint still answers but
caps requests/min — the provider works either way.
"""
from __future__ import annotations
import json
import os
from datetime import datetime, timezone
from typing import Optional
from decnet.intel.base import IntelProvider, IntelResult
from decnet.logging import get_logger
from decnet.net.http import stealth_client
log = get_logger("intel.threatfox")
_ENDPOINT = "https://threatfox-api.abuse.ch/api/v1/"
class ThreatFoxProvider(IntelProvider):
name = "threatfox"
concurrency = 4
min_dispatch_interval_s = 0.5
def __init__(self, *, api_key: Optional[str] = None) -> None:
super().__init__()
self._api_key = api_key or os.environ.get(
"DECNET_THREATFOX_API_KEY"
) or None
async def lookup(self, ip: str) -> IntelResult:
body = {"query": "search_ioc", "search_term": ip}
headers = {"Accept": "application/json"}
if self._api_key:
headers["Auth-Key"] = self._api_key
try:
async with stealth_client() as client:
resp = await client.post(
_ENDPOINT, headers=headers, json=body,
)
except Exception as exc: # noqa: BLE001
return IntelResult(provider=self.name, error=f"network: {exc}")
if resp.status_code != 200:
return IntelResult(
provider=self.name, error=f"HTTP {resp.status_code}",
)
try:
payload = resp.json()
except Exception as exc: # noqa: BLE001
return IntelResult(provider=self.name, error=f"parse: {exc}")
status = payload.get("query_status")
# ThreatFox returns query_status="no_result" when the IOC isn't
# tracked, and query_status="ok" with a non-empty data list when
# it is. Anything else (illegal_search, etc.) is a contract
# violation we surface as an error.
if status == "no_result":
return IntelResult(
provider=self.name,
verdict=None, # absence is not a benign signal
column_updates={
"threatfox_listed": False,
"threatfox_threat_types": "[]",
"threatfox_ioc_types": "[]",
"threatfox_malware_families": "[]",
"threatfox_raw": "{}",
"threatfox_queried_at": datetime.now(timezone.utc),
},
)
if status != "ok":
return IntelResult(
provider=self.name,
error=f"query_status={status!r}",
)
data = payload.get("data") or []
listed = bool(data)
# Each match in ``data`` carries threat_type / ioc_type / malware
# (canonical family). The IntelLifter dispatches ATT&CK techniques
# off ``threat_type`` (botnet_cc / payload_delivery / payload /
# cc_skimming); the other two columns are evidence and SIEM
# context. Sets are flattened across matches and serialised
# sorted for determinism.
threat_types: set[str] = set()
ioc_types: set[str] = set()
families: set[str] = set()
if isinstance(data, list):
for entry in data:
if not isinstance(entry, dict):
continue
tt = entry.get("threat_type")
if isinstance(tt, str) and tt:
threat_types.add(tt)
it = entry.get("ioc_type")
if isinstance(it, str) and it:
ioc_types.add(it)
family = entry.get("malware") or entry.get("malware_printable")
if isinstance(family, str) and family:
families.add(family)
return IntelResult(
provider=self.name,
verdict="malicious" if listed else None,
column_updates={
"threatfox_listed": listed,
"threatfox_threat_types": json.dumps(sorted(threat_types)),
"threatfox_ioc_types": json.dumps(sorted(ioc_types)),
"threatfox_malware_families": json.dumps(sorted(families)),
"threatfox_raw": json.dumps(data),
"threatfox_queried_at": datetime.now(timezone.utc),
},
)