feat(intel): persist per-provider taxonomy on AttackerIntel for TTP dispatch

The 2026-05-02 ship-time audit of the R0054-R0058 intel rule pack found
that AbuseIPDB / GreyNoise / ThreatFox stored only the aggregate verdict
(score / classification / listed-bool) plus the raw response blob. The
TTP IntelLifter expects per-provider taxonomy fields (categories, tags,
threat_types) that were never populated, so R0054 / R0055 / R0057
emitted zero tags in production despite passing unit tests.

Add typed columns: abuseipdb_categories, greynoise_tags, greynoise_name,
feodo_malware_family, threatfox_threat_types, threatfox_ioc_types,
threatfox_malware_families. Each provider now parses the relevant
taxonomy out of the upstream response and writes it through
column_updates. JSON-list columns ride as TEXT with default "[]" to
keep the SQLite/MySQL backend split honest, deserialised back to native
lists by the repo on read.
This commit is contained in:
2026-05-02 18:07:57 -04:00
parent d1c4a48963
commit 999d3494b4
10 changed files with 272 additions and 1 deletions

View File

@@ -45,8 +45,15 @@ class AttackerIntel(SQLModel, table=True):
schema_version: int = Field(default=1)
# ── GreyNoise Community ─────────────────────────────────────────────
# classification ∈ {"benign", "malicious", "suspicious", "unknown"}
# classification ∈ {"benign", "malicious", "suspicious", "unknown"}.
# The Community endpoint does not return tags; ``greynoise_tags`` stays
# empty unless an operator wires a non-Community provider that does.
greynoise_classification: Optional[str] = Field(default=None, max_length=32)
greynoise_name: Optional[str] = Field(default=None, max_length=128)
greynoise_tags: str = Field(
default="[]",
sa_column=Column("greynoise_tags", _BIG_TEXT, nullable=False, default="[]"),
) # JSON list[str] — behavioral / actor tags
greynoise_raw: str = Field(
default="{}",
sa_column=Column("greynoise_raw", _BIG_TEXT, nullable=False, default="{}"),
@@ -56,6 +63,12 @@ class AttackerIntel(SQLModel, table=True):
# ── AbuseIPDB ────────────────────────────────────────────────────────
# 0..100 abuse confidence score
abuseipdb_score: Optional[int] = Field(default=None)
abuseipdb_categories: str = Field(
default="[]",
sa_column=Column(
"abuseipdb_categories", _BIG_TEXT, nullable=False, default="[]",
),
) # JSON list[int] — flattened set of categories across recent reports
abuseipdb_raw: str = Field(
default="{}",
sa_column=Column("abuseipdb_raw", _BIG_TEXT, nullable=False, default="{}"),
@@ -64,6 +77,7 @@ class AttackerIntel(SQLModel, table=True):
# ── abuse.ch Feodo Tracker ───────────────────────────────────────────
feodo_listed: Optional[bool] = Field(default=None)
feodo_malware_family: Optional[str] = Field(default=None, max_length=64)
feodo_raw: str = Field(
default="{}",
sa_column=Column("feodo_raw", _BIG_TEXT, nullable=False, default="{}"),
@@ -71,7 +85,31 @@ class AttackerIntel(SQLModel, table=True):
feodo_queried_at: Optional[datetime] = Field(default=None)
# ── abuse.ch ThreatFox ───────────────────────────────────────────────
# ThreatFox returns a list of matches for a queried IP. Each match has
# a ``threat_type`` (botnet_cc / payload_delivery / payload /
# cc_skimming) and an ``ioc_type`` (url / domain / ip:port / hash
# variants). We flatten the unique sets across all matches; the
# IntelLifter keys ATT&CK techniques on ``threat_type``, the canonical
# taxonomy field per ThreatFox's API.
threatfox_listed: Optional[bool] = Field(default=None)
threatfox_threat_types: str = Field(
default="[]",
sa_column=Column(
"threatfox_threat_types", _BIG_TEXT, nullable=False, default="[]",
),
) # JSON list[str]
threatfox_ioc_types: str = Field(
default="[]",
sa_column=Column(
"threatfox_ioc_types", _BIG_TEXT, nullable=False, default="[]",
),
) # JSON list[str]
threatfox_malware_families: str = Field(
default="[]",
sa_column=Column(
"threatfox_malware_families", _BIG_TEXT, nullable=False, default="[]",
),
) # JSON list[str]
threatfox_raw: str = Field(
default="{}",
sa_column=Column("threatfox_raw", _BIG_TEXT, nullable=False, default="{}"),

View File

@@ -58,11 +58,19 @@ class AttackerIntelMixin(_MixinBase):
if not row:
return None
d = row.model_dump(mode="json")
# Two passes: ``*_raw`` columns hold provider response blobs
# (objects); the per-provider taxonomy columns hold JSON
# arrays the IntelLifter consumes as native lists.
for key in (
"greynoise_raw",
"abuseipdb_raw",
"feodo_raw",
"threatfox_raw",
"greynoise_tags",
"abuseipdb_categories",
"threatfox_threat_types",
"threatfox_ioc_types",
"threatfox_malware_families",
):
raw = d.get(key)
if isinstance(raw, str):