feat(intel): persist per-provider taxonomy on AttackerIntel for TTP dispatch

The 2026-05-02 ship-time audit of the R0054-R0058 intel rule pack found
that AbuseIPDB / GreyNoise / ThreatFox stored only the aggregate verdict
(score / classification / listed-bool) plus the raw response blob. The
TTP IntelLifter expects per-provider taxonomy fields (categories, tags,
threat_types) that were never populated, so R0054 / R0055 / R0057
emitted zero tags in production despite passing unit tests.

Add typed columns: abuseipdb_categories, greynoise_tags, greynoise_name,
feodo_malware_family, threatfox_threat_types, threatfox_ioc_types,
threatfox_malware_families. Each provider now parses the relevant
taxonomy out of the upstream response and writes it through
column_updates. JSON-list columns ride as TEXT with default "[]" to
keep the SQLite/MySQL backend split honest, deserialised back to native
lists by the repo on read.
This commit is contained in:
2026-05-02 18:07:57 -04:00
parent d1c4a48963
commit 999d3494b4
10 changed files with 272 additions and 1 deletions

View File

@@ -93,11 +93,24 @@ class AbuseIPDBProvider(IntelProvider):
data = payload.get("data") or {}
score = int(data.get("abuseConfidenceScore") or 0)
verdict = _score_to_verdict(score)
# AbuseIPDB returns ``data.reports[*].categories`` — a list of
# int codes per report. Flatten the union across all recent
# reports so the IntelLifter sees the full activity profile,
# not just the most-recent report's categories. Sorted for
# determinism (matters for tests + for the bus payload diff).
categories: set[int] = set()
for report in data.get("reports") or []:
if not isinstance(report, dict):
continue
for cat in report.get("categories") or []:
if isinstance(cat, int):
categories.add(cat)
return IntelResult(
provider=self.name,
verdict=verdict,
column_updates={
"abuseipdb_score": score,
"abuseipdb_categories": json.dumps(sorted(categories)),
"abuseipdb_raw": json.dumps(data),
"abuseipdb_queried_at": datetime.now(timezone.utc),
},

View File

@@ -93,15 +93,21 @@ class FeodoProvider(IntelProvider):
verdict=None, # absence ≠ "benign", let other providers speak
column_updates={
"feodo_listed": False,
"feodo_malware_family": None,
"feodo_raw": "{}",
"feodo_queried_at": datetime.now(timezone.utc),
},
)
family_obj = entry.get("malware")
family = (
family_obj if isinstance(family_obj, str) and family_obj else None
)
return IntelResult(
provider=self.name,
verdict="malicious",
column_updates={
"feodo_listed": True,
"feodo_malware_family": family,
"feodo_raw": json.dumps(entry),
"feodo_queried_at": datetime.now(timezone.utc),
},

View File

@@ -71,6 +71,8 @@ class GreyNoiseProvider(IntelProvider):
verdict="unknown",
column_updates={
"greynoise_classification": "unknown",
"greynoise_name": None,
"greynoise_tags": "[]",
"greynoise_raw": json.dumps({"message": "not seen"}),
"greynoise_queried_at": datetime.now(timezone.utc),
},
@@ -88,11 +90,24 @@ class GreyNoiseProvider(IntelProvider):
classification = (data.get("classification") or "unknown").lower()
verdict = _CLASSIFICATION_TO_VERDICT.get(classification, "unknown")
# The Community endpoint surfaces an actor ``name`` (e.g. "Tor",
# "Censys") but no behavioral tag list — the tag taxonomy is
# paid-tier only. Persist whatever we got; a future non-Community
# provider may populate ``greynoise_tags``.
name_obj = data.get("name")
name = name_obj if isinstance(name_obj, str) and name_obj else None
tags_obj = data.get("tags")
tags: list[str] = (
[t for t in tags_obj if isinstance(t, str)]
if isinstance(tags_obj, list) else []
)
return IntelResult(
provider=self.name,
verdict=verdict,
column_updates={
"greynoise_classification": classification,
"greynoise_name": name,
"greynoise_tags": json.dumps(tags),
"greynoise_raw": json.dumps(data),
"greynoise_queried_at": datetime.now(timezone.utc),
},

View File

@@ -71,6 +71,9 @@ class ThreatFoxProvider(IntelProvider):
verdict=None, # absence is not a benign signal
column_updates={
"threatfox_listed": False,
"threatfox_threat_types": "[]",
"threatfox_ioc_types": "[]",
"threatfox_malware_families": "[]",
"threatfox_raw": "{}",
"threatfox_queried_at": datetime.now(timezone.utc),
},
@@ -83,11 +86,36 @@ class ThreatFoxProvider(IntelProvider):
data = payload.get("data") or []
listed = bool(data)
# Each match in ``data`` carries threat_type / ioc_type / malware
# (canonical family). The IntelLifter dispatches ATT&CK techniques
# off ``threat_type`` (botnet_cc / payload_delivery / payload /
# cc_skimming); the other two columns are evidence and SIEM
# context. Sets are flattened across matches and serialised
# sorted for determinism.
threat_types: set[str] = set()
ioc_types: set[str] = set()
families: set[str] = set()
if isinstance(data, list):
for entry in data:
if not isinstance(entry, dict):
continue
tt = entry.get("threat_type")
if isinstance(tt, str) and tt:
threat_types.add(tt)
it = entry.get("ioc_type")
if isinstance(it, str) and it:
ioc_types.add(it)
family = entry.get("malware") or entry.get("malware_printable")
if isinstance(family, str) and family:
families.add(family)
return IntelResult(
provider=self.name,
verdict="malicious" if listed else None,
column_updates={
"threatfox_listed": listed,
"threatfox_threat_types": json.dumps(sorted(threat_types)),
"threatfox_ioc_types": json.dumps(sorted(ioc_types)),
"threatfox_malware_families": json.dumps(sorted(families)),
"threatfox_raw": json.dumps(data),
"threatfox_queried_at": datetime.now(timezone.utc),
},