feat(intel): persist per-provider taxonomy on AttackerIntel for TTP dispatch
The 2026-05-02 ship-time audit of the R0054-R0058 intel rule pack found that AbuseIPDB / GreyNoise / ThreatFox stored only the aggregate verdict (score / classification / listed-bool) plus the raw response blob. The TTP IntelLifter expects per-provider taxonomy fields (categories, tags, threat_types) that were never populated, so R0054 / R0055 / R0057 emitted zero tags in production despite passing unit tests. Add typed columns: abuseipdb_categories, greynoise_tags, greynoise_name, feodo_malware_family, threatfox_threat_types, threatfox_ioc_types, threatfox_malware_families. Each provider now parses the relevant taxonomy out of the upstream response and writes it through column_updates. JSON-list columns ride as TEXT with default "[]" to keep the SQLite/MySQL backend split honest, deserialised back to native lists by the repo on read.
This commit is contained in:
@@ -93,11 +93,24 @@ class AbuseIPDBProvider(IntelProvider):
|
||||
data = payload.get("data") or {}
|
||||
score = int(data.get("abuseConfidenceScore") or 0)
|
||||
verdict = _score_to_verdict(score)
|
||||
# AbuseIPDB returns ``data.reports[*].categories`` — a list of
|
||||
# int codes per report. Flatten the union across all recent
|
||||
# reports so the IntelLifter sees the full activity profile,
|
||||
# not just the most-recent report's categories. Sorted for
|
||||
# determinism (matters for tests + for the bus payload diff).
|
||||
categories: set[int] = set()
|
||||
for report in data.get("reports") or []:
|
||||
if not isinstance(report, dict):
|
||||
continue
|
||||
for cat in report.get("categories") or []:
|
||||
if isinstance(cat, int):
|
||||
categories.add(cat)
|
||||
return IntelResult(
|
||||
provider=self.name,
|
||||
verdict=verdict,
|
||||
column_updates={
|
||||
"abuseipdb_score": score,
|
||||
"abuseipdb_categories": json.dumps(sorted(categories)),
|
||||
"abuseipdb_raw": json.dumps(data),
|
||||
"abuseipdb_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
|
||||
@@ -93,15 +93,21 @@ class FeodoProvider(IntelProvider):
|
||||
verdict=None, # absence ≠ "benign", let other providers speak
|
||||
column_updates={
|
||||
"feodo_listed": False,
|
||||
"feodo_malware_family": None,
|
||||
"feodo_raw": "{}",
|
||||
"feodo_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
family_obj = entry.get("malware")
|
||||
family = (
|
||||
family_obj if isinstance(family_obj, str) and family_obj else None
|
||||
)
|
||||
return IntelResult(
|
||||
provider=self.name,
|
||||
verdict="malicious",
|
||||
column_updates={
|
||||
"feodo_listed": True,
|
||||
"feodo_malware_family": family,
|
||||
"feodo_raw": json.dumps(entry),
|
||||
"feodo_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
|
||||
@@ -71,6 +71,8 @@ class GreyNoiseProvider(IntelProvider):
|
||||
verdict="unknown",
|
||||
column_updates={
|
||||
"greynoise_classification": "unknown",
|
||||
"greynoise_name": None,
|
||||
"greynoise_tags": "[]",
|
||||
"greynoise_raw": json.dumps({"message": "not seen"}),
|
||||
"greynoise_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
@@ -88,11 +90,24 @@ class GreyNoiseProvider(IntelProvider):
|
||||
|
||||
classification = (data.get("classification") or "unknown").lower()
|
||||
verdict = _CLASSIFICATION_TO_VERDICT.get(classification, "unknown")
|
||||
# The Community endpoint surfaces an actor ``name`` (e.g. "Tor",
|
||||
# "Censys") but no behavioral tag list — the tag taxonomy is
|
||||
# paid-tier only. Persist whatever we got; a future non-Community
|
||||
# provider may populate ``greynoise_tags``.
|
||||
name_obj = data.get("name")
|
||||
name = name_obj if isinstance(name_obj, str) and name_obj else None
|
||||
tags_obj = data.get("tags")
|
||||
tags: list[str] = (
|
||||
[t for t in tags_obj if isinstance(t, str)]
|
||||
if isinstance(tags_obj, list) else []
|
||||
)
|
||||
return IntelResult(
|
||||
provider=self.name,
|
||||
verdict=verdict,
|
||||
column_updates={
|
||||
"greynoise_classification": classification,
|
||||
"greynoise_name": name,
|
||||
"greynoise_tags": json.dumps(tags),
|
||||
"greynoise_raw": json.dumps(data),
|
||||
"greynoise_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
|
||||
@@ -71,6 +71,9 @@ class ThreatFoxProvider(IntelProvider):
|
||||
verdict=None, # absence is not a benign signal
|
||||
column_updates={
|
||||
"threatfox_listed": False,
|
||||
"threatfox_threat_types": "[]",
|
||||
"threatfox_ioc_types": "[]",
|
||||
"threatfox_malware_families": "[]",
|
||||
"threatfox_raw": "{}",
|
||||
"threatfox_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
@@ -83,11 +86,36 @@ class ThreatFoxProvider(IntelProvider):
|
||||
|
||||
data = payload.get("data") or []
|
||||
listed = bool(data)
|
||||
# Each match in ``data`` carries threat_type / ioc_type / malware
|
||||
# (canonical family). The IntelLifter dispatches ATT&CK techniques
|
||||
# off ``threat_type`` (botnet_cc / payload_delivery / payload /
|
||||
# cc_skimming); the other two columns are evidence and SIEM
|
||||
# context. Sets are flattened across matches and serialised
|
||||
# sorted for determinism.
|
||||
threat_types: set[str] = set()
|
||||
ioc_types: set[str] = set()
|
||||
families: set[str] = set()
|
||||
if isinstance(data, list):
|
||||
for entry in data:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
tt = entry.get("threat_type")
|
||||
if isinstance(tt, str) and tt:
|
||||
threat_types.add(tt)
|
||||
it = entry.get("ioc_type")
|
||||
if isinstance(it, str) and it:
|
||||
ioc_types.add(it)
|
||||
family = entry.get("malware") or entry.get("malware_printable")
|
||||
if isinstance(family, str) and family:
|
||||
families.add(family)
|
||||
return IntelResult(
|
||||
provider=self.name,
|
||||
verdict="malicious" if listed else None,
|
||||
column_updates={
|
||||
"threatfox_listed": listed,
|
||||
"threatfox_threat_types": json.dumps(sorted(threat_types)),
|
||||
"threatfox_ioc_types": json.dumps(sorted(ioc_types)),
|
||||
"threatfox_malware_families": json.dumps(sorted(families)),
|
||||
"threatfox_raw": json.dumps(data),
|
||||
"threatfox_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
|
||||
@@ -45,8 +45,15 @@ class AttackerIntel(SQLModel, table=True):
|
||||
schema_version: int = Field(default=1)
|
||||
|
||||
# ── GreyNoise Community ─────────────────────────────────────────────
|
||||
# classification ∈ {"benign", "malicious", "suspicious", "unknown"}
|
||||
# classification ∈ {"benign", "malicious", "suspicious", "unknown"}.
|
||||
# The Community endpoint does not return tags; ``greynoise_tags`` stays
|
||||
# empty unless an operator wires a non-Community provider that does.
|
||||
greynoise_classification: Optional[str] = Field(default=None, max_length=32)
|
||||
greynoise_name: Optional[str] = Field(default=None, max_length=128)
|
||||
greynoise_tags: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column("greynoise_tags", _BIG_TEXT, nullable=False, default="[]"),
|
||||
) # JSON list[str] — behavioral / actor tags
|
||||
greynoise_raw: str = Field(
|
||||
default="{}",
|
||||
sa_column=Column("greynoise_raw", _BIG_TEXT, nullable=False, default="{}"),
|
||||
@@ -56,6 +63,12 @@ class AttackerIntel(SQLModel, table=True):
|
||||
# ── AbuseIPDB ────────────────────────────────────────────────────────
|
||||
# 0..100 abuse confidence score
|
||||
abuseipdb_score: Optional[int] = Field(default=None)
|
||||
abuseipdb_categories: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column(
|
||||
"abuseipdb_categories", _BIG_TEXT, nullable=False, default="[]",
|
||||
),
|
||||
) # JSON list[int] — flattened set of categories across recent reports
|
||||
abuseipdb_raw: str = Field(
|
||||
default="{}",
|
||||
sa_column=Column("abuseipdb_raw", _BIG_TEXT, nullable=False, default="{}"),
|
||||
@@ -64,6 +77,7 @@ class AttackerIntel(SQLModel, table=True):
|
||||
|
||||
# ── abuse.ch Feodo Tracker ───────────────────────────────────────────
|
||||
feodo_listed: Optional[bool] = Field(default=None)
|
||||
feodo_malware_family: Optional[str] = Field(default=None, max_length=64)
|
||||
feodo_raw: str = Field(
|
||||
default="{}",
|
||||
sa_column=Column("feodo_raw", _BIG_TEXT, nullable=False, default="{}"),
|
||||
@@ -71,7 +85,31 @@ class AttackerIntel(SQLModel, table=True):
|
||||
feodo_queried_at: Optional[datetime] = Field(default=None)
|
||||
|
||||
# ── abuse.ch ThreatFox ───────────────────────────────────────────────
|
||||
# ThreatFox returns a list of matches for a queried IP. Each match has
|
||||
# a ``threat_type`` (botnet_cc / payload_delivery / payload /
|
||||
# cc_skimming) and an ``ioc_type`` (url / domain / ip:port / hash
|
||||
# variants). We flatten the unique sets across all matches; the
|
||||
# IntelLifter keys ATT&CK techniques on ``threat_type``, the canonical
|
||||
# taxonomy field per ThreatFox's API.
|
||||
threatfox_listed: Optional[bool] = Field(default=None)
|
||||
threatfox_threat_types: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column(
|
||||
"threatfox_threat_types", _BIG_TEXT, nullable=False, default="[]",
|
||||
),
|
||||
) # JSON list[str]
|
||||
threatfox_ioc_types: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column(
|
||||
"threatfox_ioc_types", _BIG_TEXT, nullable=False, default="[]",
|
||||
),
|
||||
) # JSON list[str]
|
||||
threatfox_malware_families: str = Field(
|
||||
default="[]",
|
||||
sa_column=Column(
|
||||
"threatfox_malware_families", _BIG_TEXT, nullable=False, default="[]",
|
||||
),
|
||||
) # JSON list[str]
|
||||
threatfox_raw: str = Field(
|
||||
default="{}",
|
||||
sa_column=Column("threatfox_raw", _BIG_TEXT, nullable=False, default="{}"),
|
||||
|
||||
@@ -58,11 +58,19 @@ class AttackerIntelMixin(_MixinBase):
|
||||
if not row:
|
||||
return None
|
||||
d = row.model_dump(mode="json")
|
||||
# Two passes: ``*_raw`` columns hold provider response blobs
|
||||
# (objects); the per-provider taxonomy columns hold JSON
|
||||
# arrays the IntelLifter consumes as native lists.
|
||||
for key in (
|
||||
"greynoise_raw",
|
||||
"abuseipdb_raw",
|
||||
"feodo_raw",
|
||||
"threatfox_raw",
|
||||
"greynoise_tags",
|
||||
"abuseipdb_categories",
|
||||
"threatfox_threat_types",
|
||||
"threatfox_ioc_types",
|
||||
"threatfox_malware_families",
|
||||
):
|
||||
raw = d.get(key)
|
||||
if isinstance(raw, str):
|
||||
|
||||
@@ -95,6 +95,50 @@ async def test_low_score_maps_to_benign(monkeypatch):
|
||||
assert result.column_updates["abuseipdb_score"] == 0
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_categories_flattened_from_reports(monkeypatch):
|
||||
"""Post-2026-05-02 audit: provider must extract the union of
|
||||
``data.reports[*].categories`` so the IntelLifter can dispatch
|
||||
ATT&CK techniques. Sorted for deterministic test + bus diff."""
|
||||
monkeypatch.setenv("DECNET_ABUSEIPDB_API_KEY", "k3y")
|
||||
|
||||
async def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(
|
||||
200,
|
||||
json={"data": {
|
||||
"abuseConfidenceScore": 80,
|
||||
"reports": [
|
||||
{"categories": [18, 22]},
|
||||
{"categories": [22, 14]},
|
||||
{"categories": []},
|
||||
{"not_a_dict": True},
|
||||
{"categories": [21]},
|
||||
],
|
||||
}},
|
||||
)
|
||||
|
||||
_install_transport(handler)
|
||||
provider = AbuseIPDBProvider()
|
||||
result = await provider.lookup("1.2.3.4")
|
||||
cats = json.loads(result.column_updates["abuseipdb_categories"])
|
||||
assert cats == [14, 18, 21, 22]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_categories_empty_when_no_reports(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_ABUSEIPDB_API_KEY", "k3y")
|
||||
|
||||
async def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(
|
||||
200, json={"data": {"abuseConfidenceScore": 5}},
|
||||
)
|
||||
|
||||
_install_transport(handler)
|
||||
provider = AbuseIPDBProvider()
|
||||
result = await provider.lookup("8.8.8.8")
|
||||
assert json.loads(result.column_updates["abuseipdb_categories"]) == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_429_returns_error(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_ABUSEIPDB_API_KEY", "k3y")
|
||||
|
||||
@@ -87,6 +87,31 @@ async def test_unlisted_ip_returns_no_verdict():
|
||||
assert result.column_updates["feodo_listed"] is False
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_listed_ip_persists_malware_family():
|
||||
"""Post-2026-05-02 audit: IntelLifter reads
|
||||
``feodo_malware_family`` for evidence; persist it as a typed
|
||||
column rather than only inside ``feodo_raw``."""
|
||||
async def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(200, json=_FEED)
|
||||
|
||||
_install_transport(handler)
|
||||
provider = FeodoProvider(refresh_interval_s=999.0)
|
||||
result = await provider.lookup("9.9.9.9")
|
||||
assert result.column_updates["feodo_malware_family"] == "TrickBot"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_unlisted_ip_clears_family():
|
||||
async def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(200, json=_FEED)
|
||||
|
||||
_install_transport(handler)
|
||||
provider = FeodoProvider(refresh_interval_s=999.0)
|
||||
result = await provider.lookup("1.2.3.4")
|
||||
assert result.column_updates["feodo_malware_family"] is None
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_feed_failure_reports_error():
|
||||
async def handler(request: httpx.Request) -> httpx.Response:
|
||||
|
||||
@@ -123,6 +123,45 @@ async def test_429_returns_error_no_writes():
|
||||
assert result.column_updates == {}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_actor_name_and_tags_persisted_when_present():
|
||||
"""Post-2026-05-02 audit: ``name`` (actor label) and any ``tags``
|
||||
list returned by the upstream survive into ``column_updates``.
|
||||
|
||||
The Community endpoint does not return ``tags`` in practice; the
|
||||
test seeds the field anyway so non-Community provider plans that
|
||||
do (paid / Enterprise) work without further code changes.
|
||||
"""
|
||||
async def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"classification": "malicious",
|
||||
"name": "Tor",
|
||||
"tags": ["tor_exit_node", "ssh_bruteforcer"],
|
||||
},
|
||||
)
|
||||
|
||||
provider = GreyNoiseProvider()
|
||||
_install_transport(provider, handler)
|
||||
result = await provider.lookup("1.2.3.4")
|
||||
assert result.column_updates["greynoise_name"] == "Tor"
|
||||
tags = json.loads(result.column_updates["greynoise_tags"])
|
||||
assert tags == ["tor_exit_node", "ssh_bruteforcer"]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_404_clears_actor_and_tags():
|
||||
async def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(404, json={"message": "not seen"})
|
||||
|
||||
provider = GreyNoiseProvider()
|
||||
_install_transport(provider, handler)
|
||||
result = await provider.lookup("10.0.0.5")
|
||||
assert result.column_updates["greynoise_name"] is None
|
||||
assert result.column_updates["greynoise_tags"] == "[]"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_network_failure_becomes_error():
|
||||
async def handler(request: httpx.Request) -> httpx.Response:
|
||||
|
||||
@@ -100,6 +100,61 @@ async def test_unexpected_status_is_error():
|
||||
assert result.column_updates == {}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_threat_types_and_ioc_types_flattened(monkeypatch):
|
||||
"""Post-2026-05-02 audit: provider must extract the union of
|
||||
``threat_type`` / ``ioc_type`` / ``malware`` across all matches.
|
||||
The IntelLifter dispatches ATT&CK on threat_type."""
|
||||
monkeypatch.delenv("DECNET_THREATFOX_API_KEY", raising=False)
|
||||
|
||||
async def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(
|
||||
200,
|
||||
json={"query_status": "ok", "data": [
|
||||
{
|
||||
"ioc_type": "ip:port",
|
||||
"threat_type": "botnet_cc",
|
||||
"malware": "Sliver",
|
||||
},
|
||||
{
|
||||
"ioc_type": "url",
|
||||
"threat_type": "payload_delivery",
|
||||
"malware_printable": "Emotet",
|
||||
},
|
||||
{
|
||||
"ioc_type": "ip:port", # duplicate, dedup'd
|
||||
"threat_type": "botnet_cc", # duplicate
|
||||
"malware": "Sliver", # duplicate
|
||||
},
|
||||
"not a dict — silently skipped",
|
||||
]},
|
||||
)
|
||||
|
||||
_install_transport(handler)
|
||||
provider = ThreatFoxProvider()
|
||||
result = await provider.lookup("1.2.3.4")
|
||||
cu = result.column_updates
|
||||
assert json.loads(cu["threatfox_threat_types"]) == [
|
||||
"botnet_cc", "payload_delivery",
|
||||
]
|
||||
assert json.loads(cu["threatfox_ioc_types"]) == ["ip:port", "url"]
|
||||
assert json.loads(cu["threatfox_malware_families"]) == ["Emotet", "Sliver"]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_no_result_clears_taxonomy_columns():
|
||||
async def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(200, json={"query_status": "no_result"})
|
||||
|
||||
_install_transport(handler)
|
||||
provider = ThreatFoxProvider()
|
||||
result = await provider.lookup("8.8.8.8")
|
||||
cu = result.column_updates
|
||||
assert cu["threatfox_threat_types"] == "[]"
|
||||
assert cu["threatfox_ioc_types"] == "[]"
|
||||
assert cu["threatfox_malware_families"] == "[]"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_http_error_surfaces():
|
||||
async def handler(request: httpx.Request) -> httpx.Response:
|
||||
|
||||
Reference in New Issue
Block a user