Files
DECNET/decnet/intel/abuseipdb.py
anti 999d3494b4 feat(intel): persist per-provider taxonomy on AttackerIntel for TTP dispatch
The 2026-05-02 ship-time audit of the R0054-R0058 intel rule pack found
that AbuseIPDB / GreyNoise / ThreatFox stored only the aggregate verdict
(score / classification / listed-bool) plus the raw response blob. The
TTP IntelLifter expects per-provider taxonomy fields (categories, tags,
threat_types) that were never populated, so R0054 / R0055 / R0057
emitted zero tags in production despite passing unit tests.

Add typed columns: abuseipdb_categories, greynoise_tags, greynoise_name,
feodo_malware_family, threatfox_threat_types, threatfox_ioc_types,
threatfox_malware_families. Each provider now parses the relevant
taxonomy out of the upstream response and writes it through
column_updates. JSON-list columns ride as TEXT with default "[]" to
keep the SQLite/MySQL backend split honest, deserialised back to native
lists by the repo on read.
2026-05-02 18:07:57 -04:00

118 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""AbuseIPDB provider.
Endpoint: ``GET https://api.abuseipdb.com/api/v2/check``
Free tier: 1000 lookups/day. Always requires an API key passed in the
``Key`` header — the provider self-disables (returns an error) when no
key is configured rather than burning quota at the free public IP.
Verdict mapping is tier-based on the ``abuseConfidenceScore`` (0100):
* ``>= 75`` — ``malicious``
* ``25..74`` — ``suspicious``
* ``< 25`` — ``benign``
This matches AbuseIPDB's own UI thresholds reasonably closely; tune
later if operators report drift.
"""
from __future__ import annotations
import json
import os
from datetime import datetime, timezone
from typing import Optional
from decnet.intel.base import IntelProvider, IntelResult
from decnet.logging import get_logger
from decnet.net.http import stealth_client
log = get_logger("intel.abuseipdb")
_ENDPOINT = "https://api.abuseipdb.com/api/v2/check"
_DEFAULT_MAX_AGE_DAYS = 30
def _score_to_verdict(score: int) -> str:
if score >= 75:
return "malicious"
if score >= 25:
return "suspicious"
return "benign"
class AbuseIPDBProvider(IntelProvider):
name = "abuseipdb"
concurrency = 4
# 1000/day = avg 1 every ~86s. We don't enforce the daily cap here —
# operators who burn it through the worker will see HTTP 429 and the
# row gets retried after the TTL window.
min_dispatch_interval_s = 0.5
def __init__(
self,
*,
api_key: Optional[str] = None,
max_age_days: int = _DEFAULT_MAX_AGE_DAYS,
) -> None:
super().__init__()
self._api_key = api_key or os.environ.get(
"DECNET_ABUSEIPDB_API_KEY"
) or None
self._max_age_days = max_age_days
async def lookup(self, ip: str) -> IntelResult:
if not self._api_key:
return IntelResult(
provider=self.name,
error="DECNET_ABUSEIPDB_API_KEY not configured",
)
params = {
"ipAddress": ip,
"maxAgeInDays": str(self._max_age_days),
}
headers = {
"Key": self._api_key,
"Accept": "application/json",
}
try:
async with stealth_client() as client:
resp = await client.get(_ENDPOINT, headers=headers, params=params)
except Exception as exc: # noqa: BLE001
return IntelResult(provider=self.name, error=f"network: {exc}")
if resp.status_code != 200:
return IntelResult(
provider=self.name,
error=f"HTTP {resp.status_code}",
)
try:
payload = resp.json()
except Exception as exc: # noqa: BLE001
return IntelResult(provider=self.name, error=f"parse: {exc}")
data = payload.get("data") or {}
score = int(data.get("abuseConfidenceScore") or 0)
verdict = _score_to_verdict(score)
# AbuseIPDB returns ``data.reports[*].categories`` — a list of
# int codes per report. Flatten the union across all recent
# reports so the IntelLifter sees the full activity profile,
# not just the most-recent report's categories. Sorted for
# determinism (matters for tests + for the bus payload diff).
categories: set[int] = set()
for report in data.get("reports") or []:
if not isinstance(report, dict):
continue
for cat in report.get("categories") or []:
if isinstance(cat, int):
categories.add(cat)
return IntelResult(
provider=self.name,
verdict=verdict,
column_updates={
"abuseipdb_score": score,
"abuseipdb_categories": json.dumps(sorted(categories)),
"abuseipdb_raw": json.dumps(data),
"abuseipdb_queried_at": datetime.now(timezone.utc),
},
)