feat(intel): persist per-provider taxonomy on AttackerIntel for TTP dispatch

The 2026-05-02 ship-time audit of the R0054-R0058 intel rule pack found that AbuseIPDB / GreyNoise / ThreatFox stored only the aggregate verdict (score / classification / listed-bool) plus the raw response blob. The TTP IntelLifter expects per-provider taxonomy fields (categories, tags, threat_types) that were never populated, so R0054 / R0055 / R0057 emitted zero tags in production despite passing unit tests. Add typed columns: abuseipdb_categories, greynoise_tags, greynoise_name, feodo_malware_family, threatfox_threat_types, threatfox_ioc_types, threatfox_malware_families. Each provider now parses the relevant taxonomy out of the upstream response and writes it through column_updates. JSON-list columns ride as TEXT with default "[]" to keep the SQLite/MySQL backend split honest, deserialised back to native lists by the repo on read.
2026-05-02 18:07:57 -04:00
parent d1c4a48963
commit 999d3494b4
10 changed files with 272 additions and 1 deletions
--- a/decnet/intel/abuseipdb.py
+++ b/decnet/intel/abuseipdb.py
@@ -93,11 +93,24 @@ class AbuseIPDBProvider(IntelProvider):
        data = payload.get("data") or {}
        score = int(data.get("abuseConfidenceScore") or 0)
        verdict = _score_to_verdict(score)
+        # AbuseIPDB returns ``data.reports[*].categories`` — a list of
+        # int codes per report. Flatten the union across all recent
+        # reports so the IntelLifter sees the full activity profile,
+        # not just the most-recent report's categories. Sorted for
+        # determinism (matters for tests + for the bus payload diff).
+        categories: set[int] = set()
+        for report in data.get("reports") or []:
+            if not isinstance(report, dict):
+                continue
+            for cat in report.get("categories") or []:
+                if isinstance(cat, int):
+                    categories.add(cat)
        return IntelResult(
            provider=self.name,
            verdict=verdict,
            column_updates={
                "abuseipdb_score": score,
+                "abuseipdb_categories": json.dumps(sorted(categories)),
                "abuseipdb_raw": json.dumps(data),
                "abuseipdb_queried_at": datetime.now(timezone.utc),
            },
--- a/decnet/intel/feodo.py
+++ b/decnet/intel/feodo.py
@@ -93,15 +93,21 @@ class FeodoProvider(IntelProvider):
                verdict=None,  # absence ≠ "benign", let other providers speak
                column_updates={
                    "feodo_listed": False,
+                    "feodo_malware_family": None,
                    "feodo_raw": "{}",
                    "feodo_queried_at": datetime.now(timezone.utc),
                },
            )
+        family_obj = entry.get("malware")
+        family = (
+            family_obj if isinstance(family_obj, str) and family_obj else None
+        )
        return IntelResult(
            provider=self.name,
            verdict="malicious",
            column_updates={
                "feodo_listed": True,
+                "feodo_malware_family": family,
                "feodo_raw": json.dumps(entry),
                "feodo_queried_at": datetime.now(timezone.utc),
            },
--- a/decnet/intel/greynoise.py
+++ b/decnet/intel/greynoise.py
@@ -71,6 +71,8 @@ class GreyNoiseProvider(IntelProvider):
                verdict="unknown",
                column_updates={
                    "greynoise_classification": "unknown",
+                    "greynoise_name": None,
+                    "greynoise_tags": "[]",
                    "greynoise_raw": json.dumps({"message": "not seen"}),
                    "greynoise_queried_at": datetime.now(timezone.utc),
                },
@@ -88,11 +90,24 @@ class GreyNoiseProvider(IntelProvider):

        classification = (data.get("classification") or "unknown").lower()
        verdict = _CLASSIFICATION_TO_VERDICT.get(classification, "unknown")
+        # The Community endpoint surfaces an actor ``name`` (e.g. "Tor",
+        # "Censys") but no behavioral tag list — the tag taxonomy is
+        # paid-tier only. Persist whatever we got; a future non-Community
+        # provider may populate ``greynoise_tags``.
+        name_obj = data.get("name")
+        name = name_obj if isinstance(name_obj, str) and name_obj else None
+        tags_obj = data.get("tags")
+        tags: list[str] = (
+            [t for t in tags_obj if isinstance(t, str)]
+            if isinstance(tags_obj, list) else []
+        )
        return IntelResult(
            provider=self.name,
            verdict=verdict,
            column_updates={
                "greynoise_classification": classification,
+                "greynoise_name": name,
+                "greynoise_tags": json.dumps(tags),
                "greynoise_raw": json.dumps(data),
                "greynoise_queried_at": datetime.now(timezone.utc),
            },
--- a/decnet/intel/threatfox.py
+++ b/decnet/intel/threatfox.py
@@ -71,6 +71,9 @@ class ThreatFoxProvider(IntelProvider):
                verdict=None,  # absence is not a benign signal
                column_updates={
                    "threatfox_listed": False,
+                    "threatfox_threat_types": "[]",
+                    "threatfox_ioc_types": "[]",
+                    "threatfox_malware_families": "[]",
                    "threatfox_raw": "{}",
                    "threatfox_queried_at": datetime.now(timezone.utc),
                },
@@ -83,11 +86,36 @@ class ThreatFoxProvider(IntelProvider):

        data = payload.get("data") or []
        listed = bool(data)
+        # Each match in ``data`` carries threat_type / ioc_type / malware
+        # (canonical family). The IntelLifter dispatches ATT&CK techniques
+        # off ``threat_type`` (botnet_cc / payload_delivery / payload /
+        # cc_skimming); the other two columns are evidence and SIEM
+        # context. Sets are flattened across matches and serialised
+        # sorted for determinism.
+        threat_types: set[str] = set()
+        ioc_types: set[str] = set()
+        families: set[str] = set()
+        if isinstance(data, list):
+            for entry in data:
+                if not isinstance(entry, dict):
+                    continue
+                tt = entry.get("threat_type")
+                if isinstance(tt, str) and tt:
+                    threat_types.add(tt)
+                it = entry.get("ioc_type")
+                if isinstance(it, str) and it:
+                    ioc_types.add(it)
+                family = entry.get("malware") or entry.get("malware_printable")
+                if isinstance(family, str) and family:
+                    families.add(family)
        return IntelResult(
            provider=self.name,
            verdict="malicious" if listed else None,
            column_updates={
                "threatfox_listed": listed,
+                "threatfox_threat_types": json.dumps(sorted(threat_types)),
+                "threatfox_ioc_types": json.dumps(sorted(ioc_types)),
+                "threatfox_malware_families": json.dumps(sorted(families)),
                "threatfox_raw": json.dumps(data),
                "threatfox_queried_at": datetime.now(timezone.utc),
            },
--- a/decnet/web/db/models/attacker_intel.py
+++ b/decnet/web/db/models/attacker_intel.py
@@ -45,8 +45,15 @@ class AttackerIntel(SQLModel, table=True):
    schema_version: int = Field(default=1)

    # ── GreyNoise Community ─────────────────────────────────────────────
-    # classification ∈ {"benign", "malicious", "suspicious", "unknown"}
+    # classification ∈ {"benign", "malicious", "suspicious", "unknown"}.
+    # The Community endpoint does not return tags; ``greynoise_tags`` stays
+    # empty unless an operator wires a non-Community provider that does.
    greynoise_classification: Optional[str] = Field(default=None, max_length=32)
+    greynoise_name: Optional[str] = Field(default=None, max_length=128)
+    greynoise_tags: str = Field(
+        default="[]",
+        sa_column=Column("greynoise_tags", _BIG_TEXT, nullable=False, default="[]"),
+    )  # JSON list[str] — behavioral / actor tags
    greynoise_raw: str = Field(
        default="{}",
        sa_column=Column("greynoise_raw", _BIG_TEXT, nullable=False, default="{}"),
@@ -56,6 +63,12 @@ class AttackerIntel(SQLModel, table=True):
    # ── AbuseIPDB ────────────────────────────────────────────────────────
    # 0..100 abuse confidence score
    abuseipdb_score: Optional[int] = Field(default=None)
+    abuseipdb_categories: str = Field(
+        default="[]",
+        sa_column=Column(
+            "abuseipdb_categories", _BIG_TEXT, nullable=False, default="[]",
+        ),
+    )  # JSON list[int] — flattened set of categories across recent reports
    abuseipdb_raw: str = Field(
        default="{}",
        sa_column=Column("abuseipdb_raw", _BIG_TEXT, nullable=False, default="{}"),
@@ -64,6 +77,7 @@ class AttackerIntel(SQLModel, table=True):

    # ── abuse.ch Feodo Tracker ───────────────────────────────────────────
    feodo_listed: Optional[bool] = Field(default=None)
+    feodo_malware_family: Optional[str] = Field(default=None, max_length=64)
    feodo_raw: str = Field(
        default="{}",
        sa_column=Column("feodo_raw", _BIG_TEXT, nullable=False, default="{}"),
@@ -71,7 +85,31 @@ class AttackerIntel(SQLModel, table=True):
    feodo_queried_at: Optional[datetime] = Field(default=None)

    # ── abuse.ch ThreatFox ───────────────────────────────────────────────
+    # ThreatFox returns a list of matches for a queried IP. Each match has
+    # a ``threat_type`` (botnet_cc / payload_delivery / payload /
+    # cc_skimming) and an ``ioc_type`` (url / domain / ip:port / hash
+    # variants). We flatten the unique sets across all matches; the
+    # IntelLifter keys ATT&CK techniques on ``threat_type``, the canonical
+    # taxonomy field per ThreatFox's API.
    threatfox_listed: Optional[bool] = Field(default=None)
+    threatfox_threat_types: str = Field(
+        default="[]",
+        sa_column=Column(
+            "threatfox_threat_types", _BIG_TEXT, nullable=False, default="[]",
+        ),
+    )  # JSON list[str]
+    threatfox_ioc_types: str = Field(
+        default="[]",
+        sa_column=Column(
+            "threatfox_ioc_types", _BIG_TEXT, nullable=False, default="[]",
+        ),
+    )  # JSON list[str]
+    threatfox_malware_families: str = Field(
+        default="[]",
+        sa_column=Column(
+            "threatfox_malware_families", _BIG_TEXT, nullable=False, default="[]",
+        ),
+    )  # JSON list[str]
    threatfox_raw: str = Field(
        default="{}",
        sa_column=Column("threatfox_raw", _BIG_TEXT, nullable=False, default="{}"),
--- a/decnet/web/db/sqlmodel_repo/attacker_intel.py
+++ b/decnet/web/db/sqlmodel_repo/attacker_intel.py
@@ -58,11 +58,19 @@ class AttackerIntelMixin(_MixinBase):
            if not row:
                return None
            d = row.model_dump(mode="json")
+            # Two passes: ``*_raw`` columns hold provider response blobs
+            # (objects); the per-provider taxonomy columns hold JSON
+            # arrays the IntelLifter consumes as native lists.
            for key in (
                "greynoise_raw",
                "abuseipdb_raw",
                "feodo_raw",
                "threatfox_raw",
+                "greynoise_tags",
+                "abuseipdb_categories",
+                "threatfox_threat_types",
+                "threatfox_ioc_types",
+                "threatfox_malware_families",
            ):
                raw = d.get(key)
                if isinstance(raw, str):