feat(intel): persist per-provider taxonomy on AttackerIntel for TTP dispatch

The 2026-05-02 ship-time audit of the R0054-R0058 intel rule pack found
that AbuseIPDB / GreyNoise / ThreatFox stored only the aggregate verdict
(score / classification / listed-bool) plus the raw response blob. The
TTP IntelLifter expects per-provider taxonomy fields (categories, tags,
threat_types) that were never populated, so R0054 / R0055 / R0057
emitted zero tags in production despite passing unit tests.

Add typed columns: abuseipdb_categories, greynoise_tags, greynoise_name,
feodo_malware_family, threatfox_threat_types, threatfox_ioc_types,
threatfox_malware_families. Each provider now parses the relevant
taxonomy out of the upstream response and writes it through
column_updates. JSON-list columns ride as TEXT with default "[]" to
keep the SQLite/MySQL backend split honest, deserialised back to native
lists by the repo on read.
This commit is contained in:
2026-05-02 18:07:57 -04:00
parent d1c4a48963
commit 999d3494b4
10 changed files with 272 additions and 1 deletions

View File

@@ -93,11 +93,24 @@ class AbuseIPDBProvider(IntelProvider):
data = payload.get("data") or {}
score = int(data.get("abuseConfidenceScore") or 0)
verdict = _score_to_verdict(score)
# AbuseIPDB returns ``data.reports[*].categories`` — a list of
# int codes per report. Flatten the union across all recent
# reports so the IntelLifter sees the full activity profile,
# not just the most-recent report's categories. Sorted for
# determinism (matters for tests + for the bus payload diff).
categories: set[int] = set()
for report in data.get("reports") or []:
if not isinstance(report, dict):
continue
for cat in report.get("categories") or []:
if isinstance(cat, int):
categories.add(cat)
return IntelResult(
provider=self.name,
verdict=verdict,
column_updates={
"abuseipdb_score": score,
"abuseipdb_categories": json.dumps(sorted(categories)),
"abuseipdb_raw": json.dumps(data),
"abuseipdb_queried_at": datetime.now(timezone.utc),
},

View File

@@ -93,15 +93,21 @@ class FeodoProvider(IntelProvider):
verdict=None, # absence ≠ "benign", let other providers speak
column_updates={
"feodo_listed": False,
"feodo_malware_family": None,
"feodo_raw": "{}",
"feodo_queried_at": datetime.now(timezone.utc),
},
)
family_obj = entry.get("malware")
family = (
family_obj if isinstance(family_obj, str) and family_obj else None
)
return IntelResult(
provider=self.name,
verdict="malicious",
column_updates={
"feodo_listed": True,
"feodo_malware_family": family,
"feodo_raw": json.dumps(entry),
"feodo_queried_at": datetime.now(timezone.utc),
},

View File

@@ -71,6 +71,8 @@ class GreyNoiseProvider(IntelProvider):
verdict="unknown",
column_updates={
"greynoise_classification": "unknown",
"greynoise_name": None,
"greynoise_tags": "[]",
"greynoise_raw": json.dumps({"message": "not seen"}),
"greynoise_queried_at": datetime.now(timezone.utc),
},
@@ -88,11 +90,24 @@ class GreyNoiseProvider(IntelProvider):
classification = (data.get("classification") or "unknown").lower()
verdict = _CLASSIFICATION_TO_VERDICT.get(classification, "unknown")
# The Community endpoint surfaces an actor ``name`` (e.g. "Tor",
# "Censys") but no behavioral tag list — the tag taxonomy is
# paid-tier only. Persist whatever we got; a future non-Community
# provider may populate ``greynoise_tags``.
name_obj = data.get("name")
name = name_obj if isinstance(name_obj, str) and name_obj else None
tags_obj = data.get("tags")
tags: list[str] = (
[t for t in tags_obj if isinstance(t, str)]
if isinstance(tags_obj, list) else []
)
return IntelResult(
provider=self.name,
verdict=verdict,
column_updates={
"greynoise_classification": classification,
"greynoise_name": name,
"greynoise_tags": json.dumps(tags),
"greynoise_raw": json.dumps(data),
"greynoise_queried_at": datetime.now(timezone.utc),
},

View File

@@ -71,6 +71,9 @@ class ThreatFoxProvider(IntelProvider):
verdict=None, # absence is not a benign signal
column_updates={
"threatfox_listed": False,
"threatfox_threat_types": "[]",
"threatfox_ioc_types": "[]",
"threatfox_malware_families": "[]",
"threatfox_raw": "{}",
"threatfox_queried_at": datetime.now(timezone.utc),
},
@@ -83,11 +86,36 @@ class ThreatFoxProvider(IntelProvider):
data = payload.get("data") or []
listed = bool(data)
# Each match in ``data`` carries threat_type / ioc_type / malware
# (canonical family). The IntelLifter dispatches ATT&CK techniques
# off ``threat_type`` (botnet_cc / payload_delivery / payload /
# cc_skimming); the other two columns are evidence and SIEM
# context. Sets are flattened across matches and serialised
# sorted for determinism.
threat_types: set[str] = set()
ioc_types: set[str] = set()
families: set[str] = set()
if isinstance(data, list):
for entry in data:
if not isinstance(entry, dict):
continue
tt = entry.get("threat_type")
if isinstance(tt, str) and tt:
threat_types.add(tt)
it = entry.get("ioc_type")
if isinstance(it, str) and it:
ioc_types.add(it)
family = entry.get("malware") or entry.get("malware_printable")
if isinstance(family, str) and family:
families.add(family)
return IntelResult(
provider=self.name,
verdict="malicious" if listed else None,
column_updates={
"threatfox_listed": listed,
"threatfox_threat_types": json.dumps(sorted(threat_types)),
"threatfox_ioc_types": json.dumps(sorted(ioc_types)),
"threatfox_malware_families": json.dumps(sorted(families)),
"threatfox_raw": json.dumps(data),
"threatfox_queried_at": datetime.now(timezone.utc),
},

View File

@@ -45,8 +45,15 @@ class AttackerIntel(SQLModel, table=True):
schema_version: int = Field(default=1)
# ── GreyNoise Community ─────────────────────────────────────────────
# classification ∈ {"benign", "malicious", "suspicious", "unknown"}
# classification ∈ {"benign", "malicious", "suspicious", "unknown"}.
# The Community endpoint does not return tags; ``greynoise_tags`` stays
# empty unless an operator wires a non-Community provider that does.
greynoise_classification: Optional[str] = Field(default=None, max_length=32)
greynoise_name: Optional[str] = Field(default=None, max_length=128)
greynoise_tags: str = Field(
default="[]",
sa_column=Column("greynoise_tags", _BIG_TEXT, nullable=False, default="[]"),
) # JSON list[str] — behavioral / actor tags
greynoise_raw: str = Field(
default="{}",
sa_column=Column("greynoise_raw", _BIG_TEXT, nullable=False, default="{}"),
@@ -56,6 +63,12 @@ class AttackerIntel(SQLModel, table=True):
# ── AbuseIPDB ────────────────────────────────────────────────────────
# 0..100 abuse confidence score
abuseipdb_score: Optional[int] = Field(default=None)
abuseipdb_categories: str = Field(
default="[]",
sa_column=Column(
"abuseipdb_categories", _BIG_TEXT, nullable=False, default="[]",
),
) # JSON list[int] — flattened set of categories across recent reports
abuseipdb_raw: str = Field(
default="{}",
sa_column=Column("abuseipdb_raw", _BIG_TEXT, nullable=False, default="{}"),
@@ -64,6 +77,7 @@ class AttackerIntel(SQLModel, table=True):
# ── abuse.ch Feodo Tracker ───────────────────────────────────────────
feodo_listed: Optional[bool] = Field(default=None)
feodo_malware_family: Optional[str] = Field(default=None, max_length=64)
feodo_raw: str = Field(
default="{}",
sa_column=Column("feodo_raw", _BIG_TEXT, nullable=False, default="{}"),
@@ -71,7 +85,31 @@ class AttackerIntel(SQLModel, table=True):
feodo_queried_at: Optional[datetime] = Field(default=None)
# ── abuse.ch ThreatFox ───────────────────────────────────────────────
# ThreatFox returns a list of matches for a queried IP. Each match has
# a ``threat_type`` (botnet_cc / payload_delivery / payload /
# cc_skimming) and an ``ioc_type`` (url / domain / ip:port / hash
# variants). We flatten the unique sets across all matches; the
# IntelLifter keys ATT&CK techniques on ``threat_type``, the canonical
# taxonomy field per ThreatFox's API.
threatfox_listed: Optional[bool] = Field(default=None)
threatfox_threat_types: str = Field(
default="[]",
sa_column=Column(
"threatfox_threat_types", _BIG_TEXT, nullable=False, default="[]",
),
) # JSON list[str]
threatfox_ioc_types: str = Field(
default="[]",
sa_column=Column(
"threatfox_ioc_types", _BIG_TEXT, nullable=False, default="[]",
),
) # JSON list[str]
threatfox_malware_families: str = Field(
default="[]",
sa_column=Column(
"threatfox_malware_families", _BIG_TEXT, nullable=False, default="[]",
),
) # JSON list[str]
threatfox_raw: str = Field(
default="{}",
sa_column=Column("threatfox_raw", _BIG_TEXT, nullable=False, default="{}"),

View File

@@ -58,11 +58,19 @@ class AttackerIntelMixin(_MixinBase):
if not row:
return None
d = row.model_dump(mode="json")
# Two passes: ``*_raw`` columns hold provider response blobs
# (objects); the per-provider taxonomy columns hold JSON
# arrays the IntelLifter consumes as native lists.
for key in (
"greynoise_raw",
"abuseipdb_raw",
"feodo_raw",
"threatfox_raw",
"greynoise_tags",
"abuseipdb_categories",
"threatfox_threat_types",
"threatfox_ioc_types",
"threatfox_malware_families",
):
raw = d.get(key)
if isinstance(raw, str):

View File

@@ -95,6 +95,50 @@ async def test_low_score_maps_to_benign(monkeypatch):
assert result.column_updates["abuseipdb_score"] == 0
@pytest.mark.anyio
async def test_categories_flattened_from_reports(monkeypatch):
"""Post-2026-05-02 audit: provider must extract the union of
``data.reports[*].categories`` so the IntelLifter can dispatch
ATT&CK techniques. Sorted for deterministic test + bus diff."""
monkeypatch.setenv("DECNET_ABUSEIPDB_API_KEY", "k3y")
async def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(
200,
json={"data": {
"abuseConfidenceScore": 80,
"reports": [
{"categories": [18, 22]},
{"categories": [22, 14]},
{"categories": []},
{"not_a_dict": True},
{"categories": [21]},
],
}},
)
_install_transport(handler)
provider = AbuseIPDBProvider()
result = await provider.lookup("1.2.3.4")
cats = json.loads(result.column_updates["abuseipdb_categories"])
assert cats == [14, 18, 21, 22]
@pytest.mark.anyio
async def test_categories_empty_when_no_reports(monkeypatch):
monkeypatch.setenv("DECNET_ABUSEIPDB_API_KEY", "k3y")
async def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(
200, json={"data": {"abuseConfidenceScore": 5}},
)
_install_transport(handler)
provider = AbuseIPDBProvider()
result = await provider.lookup("8.8.8.8")
assert json.loads(result.column_updates["abuseipdb_categories"]) == []
@pytest.mark.anyio
async def test_429_returns_error(monkeypatch):
monkeypatch.setenv("DECNET_ABUSEIPDB_API_KEY", "k3y")

View File

@@ -87,6 +87,31 @@ async def test_unlisted_ip_returns_no_verdict():
assert result.column_updates["feodo_listed"] is False
@pytest.mark.anyio
async def test_listed_ip_persists_malware_family():
"""Post-2026-05-02 audit: IntelLifter reads
``feodo_malware_family`` for evidence; persist it as a typed
column rather than only inside ``feodo_raw``."""
async def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(200, json=_FEED)
_install_transport(handler)
provider = FeodoProvider(refresh_interval_s=999.0)
result = await provider.lookup("9.9.9.9")
assert result.column_updates["feodo_malware_family"] == "TrickBot"
@pytest.mark.anyio
async def test_unlisted_ip_clears_family():
async def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(200, json=_FEED)
_install_transport(handler)
provider = FeodoProvider(refresh_interval_s=999.0)
result = await provider.lookup("1.2.3.4")
assert result.column_updates["feodo_malware_family"] is None
@pytest.mark.anyio
async def test_feed_failure_reports_error():
async def handler(request: httpx.Request) -> httpx.Response:

View File

@@ -123,6 +123,45 @@ async def test_429_returns_error_no_writes():
assert result.column_updates == {}
@pytest.mark.anyio
async def test_actor_name_and_tags_persisted_when_present():
"""Post-2026-05-02 audit: ``name`` (actor label) and any ``tags``
list returned by the upstream survive into ``column_updates``.
The Community endpoint does not return ``tags`` in practice; the
test seeds the field anyway so non-Community provider plans that
do (paid / Enterprise) work without further code changes.
"""
async def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(
200,
json={
"classification": "malicious",
"name": "Tor",
"tags": ["tor_exit_node", "ssh_bruteforcer"],
},
)
provider = GreyNoiseProvider()
_install_transport(provider, handler)
result = await provider.lookup("1.2.3.4")
assert result.column_updates["greynoise_name"] == "Tor"
tags = json.loads(result.column_updates["greynoise_tags"])
assert tags == ["tor_exit_node", "ssh_bruteforcer"]
@pytest.mark.anyio
async def test_404_clears_actor_and_tags():
async def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(404, json={"message": "not seen"})
provider = GreyNoiseProvider()
_install_transport(provider, handler)
result = await provider.lookup("10.0.0.5")
assert result.column_updates["greynoise_name"] is None
assert result.column_updates["greynoise_tags"] == "[]"
@pytest.mark.anyio
async def test_network_failure_becomes_error():
async def handler(request: httpx.Request) -> httpx.Response:

View File

@@ -100,6 +100,61 @@ async def test_unexpected_status_is_error():
assert result.column_updates == {}
@pytest.mark.anyio
async def test_threat_types_and_ioc_types_flattened(monkeypatch):
"""Post-2026-05-02 audit: provider must extract the union of
``threat_type`` / ``ioc_type`` / ``malware`` across all matches.
The IntelLifter dispatches ATT&CK on threat_type."""
monkeypatch.delenv("DECNET_THREATFOX_API_KEY", raising=False)
async def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(
200,
json={"query_status": "ok", "data": [
{
"ioc_type": "ip:port",
"threat_type": "botnet_cc",
"malware": "Sliver",
},
{
"ioc_type": "url",
"threat_type": "payload_delivery",
"malware_printable": "Emotet",
},
{
"ioc_type": "ip:port", # duplicate, dedup'd
"threat_type": "botnet_cc", # duplicate
"malware": "Sliver", # duplicate
},
"not a dict — silently skipped",
]},
)
_install_transport(handler)
provider = ThreatFoxProvider()
result = await provider.lookup("1.2.3.4")
cu = result.column_updates
assert json.loads(cu["threatfox_threat_types"]) == [
"botnet_cc", "payload_delivery",
]
assert json.loads(cu["threatfox_ioc_types"]) == ["ip:port", "url"]
assert json.loads(cu["threatfox_malware_families"]) == ["Emotet", "Sliver"]
@pytest.mark.anyio
async def test_no_result_clears_taxonomy_columns():
async def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(200, json={"query_status": "no_result"})
_install_transport(handler)
provider = ThreatFoxProvider()
result = await provider.lookup("8.8.8.8")
cu = result.column_updates
assert cu["threatfox_threat_types"] == "[]"
assert cu["threatfox_ioc_types"] == "[]"
assert cu["threatfox_malware_families"] == "[]"
@pytest.mark.anyio
async def test_http_error_surfaces():
async def handler(request: httpx.Request) -> httpx.Response: