From 999d3494b4ca344823420a877be21bd89794b614 Mon Sep 17 00:00:00 2001 From: anti Date: Sat, 2 May 2026 18:07:57 -0400 Subject: [PATCH] feat(intel): persist per-provider taxonomy on AttackerIntel for TTP dispatch The 2026-05-02 ship-time audit of the R0054-R0058 intel rule pack found that AbuseIPDB / GreyNoise / ThreatFox stored only the aggregate verdict (score / classification / listed-bool) plus the raw response blob. The TTP IntelLifter expects per-provider taxonomy fields (categories, tags, threat_types) that were never populated, so R0054 / R0055 / R0057 emitted zero tags in production despite passing unit tests. Add typed columns: abuseipdb_categories, greynoise_tags, greynoise_name, feodo_malware_family, threatfox_threat_types, threatfox_ioc_types, threatfox_malware_families. Each provider now parses the relevant taxonomy out of the upstream response and writes it through column_updates. JSON-list columns ride as TEXT with default "[]" to keep the SQLite/MySQL backend split honest, deserialised back to native lists by the repo on read. --- decnet/intel/abuseipdb.py | 13 +++++ decnet/intel/feodo.py | 6 ++ decnet/intel/greynoise.py | 15 +++++ decnet/intel/threatfox.py | 28 ++++++++++ decnet/web/db/models/attacker_intel.py | 40 +++++++++++++- decnet/web/db/sqlmodel_repo/attacker_intel.py | 8 +++ tests/intel/test_abuseipdb.py | 44 +++++++++++++++ tests/intel/test_feodo.py | 25 +++++++++ tests/intel/test_greynoise.py | 39 +++++++++++++ tests/intel/test_threatfox.py | 55 +++++++++++++++++++ 10 files changed, 272 insertions(+), 1 deletion(-) diff --git a/decnet/intel/abuseipdb.py b/decnet/intel/abuseipdb.py index a099c4c5..8cfc1c7a 100644 --- a/decnet/intel/abuseipdb.py +++ b/decnet/intel/abuseipdb.py @@ -93,11 +93,24 @@ class AbuseIPDBProvider(IntelProvider): data = payload.get("data") or {} score = int(data.get("abuseConfidenceScore") or 0) verdict = _score_to_verdict(score) + # AbuseIPDB returns ``data.reports[*].categories`` — a list of + # int codes per report. Flatten the union across all recent + # reports so the IntelLifter sees the full activity profile, + # not just the most-recent report's categories. Sorted for + # determinism (matters for tests + for the bus payload diff). + categories: set[int] = set() + for report in data.get("reports") or []: + if not isinstance(report, dict): + continue + for cat in report.get("categories") or []: + if isinstance(cat, int): + categories.add(cat) return IntelResult( provider=self.name, verdict=verdict, column_updates={ "abuseipdb_score": score, + "abuseipdb_categories": json.dumps(sorted(categories)), "abuseipdb_raw": json.dumps(data), "abuseipdb_queried_at": datetime.now(timezone.utc), }, diff --git a/decnet/intel/feodo.py b/decnet/intel/feodo.py index 284dd9d1..bede265d 100644 --- a/decnet/intel/feodo.py +++ b/decnet/intel/feodo.py @@ -93,15 +93,21 @@ class FeodoProvider(IntelProvider): verdict=None, # absence ≠ "benign", let other providers speak column_updates={ "feodo_listed": False, + "feodo_malware_family": None, "feodo_raw": "{}", "feodo_queried_at": datetime.now(timezone.utc), }, ) + family_obj = entry.get("malware") + family = ( + family_obj if isinstance(family_obj, str) and family_obj else None + ) return IntelResult( provider=self.name, verdict="malicious", column_updates={ "feodo_listed": True, + "feodo_malware_family": family, "feodo_raw": json.dumps(entry), "feodo_queried_at": datetime.now(timezone.utc), }, diff --git a/decnet/intel/greynoise.py b/decnet/intel/greynoise.py index b702c311..1ecfa87c 100644 --- a/decnet/intel/greynoise.py +++ b/decnet/intel/greynoise.py @@ -71,6 +71,8 @@ class GreyNoiseProvider(IntelProvider): verdict="unknown", column_updates={ "greynoise_classification": "unknown", + "greynoise_name": None, + "greynoise_tags": "[]", "greynoise_raw": json.dumps({"message": "not seen"}), "greynoise_queried_at": datetime.now(timezone.utc), }, @@ -88,11 +90,24 @@ class GreyNoiseProvider(IntelProvider): classification = (data.get("classification") or "unknown").lower() verdict = _CLASSIFICATION_TO_VERDICT.get(classification, "unknown") + # The Community endpoint surfaces an actor ``name`` (e.g. "Tor", + # "Censys") but no behavioral tag list — the tag taxonomy is + # paid-tier only. Persist whatever we got; a future non-Community + # provider may populate ``greynoise_tags``. + name_obj = data.get("name") + name = name_obj if isinstance(name_obj, str) and name_obj else None + tags_obj = data.get("tags") + tags: list[str] = ( + [t for t in tags_obj if isinstance(t, str)] + if isinstance(tags_obj, list) else [] + ) return IntelResult( provider=self.name, verdict=verdict, column_updates={ "greynoise_classification": classification, + "greynoise_name": name, + "greynoise_tags": json.dumps(tags), "greynoise_raw": json.dumps(data), "greynoise_queried_at": datetime.now(timezone.utc), }, diff --git a/decnet/intel/threatfox.py b/decnet/intel/threatfox.py index 17bdb787..45790efc 100644 --- a/decnet/intel/threatfox.py +++ b/decnet/intel/threatfox.py @@ -71,6 +71,9 @@ class ThreatFoxProvider(IntelProvider): verdict=None, # absence is not a benign signal column_updates={ "threatfox_listed": False, + "threatfox_threat_types": "[]", + "threatfox_ioc_types": "[]", + "threatfox_malware_families": "[]", "threatfox_raw": "{}", "threatfox_queried_at": datetime.now(timezone.utc), }, @@ -83,11 +86,36 @@ class ThreatFoxProvider(IntelProvider): data = payload.get("data") or [] listed = bool(data) + # Each match in ``data`` carries threat_type / ioc_type / malware + # (canonical family). The IntelLifter dispatches ATT&CK techniques + # off ``threat_type`` (botnet_cc / payload_delivery / payload / + # cc_skimming); the other two columns are evidence and SIEM + # context. Sets are flattened across matches and serialised + # sorted for determinism. + threat_types: set[str] = set() + ioc_types: set[str] = set() + families: set[str] = set() + if isinstance(data, list): + for entry in data: + if not isinstance(entry, dict): + continue + tt = entry.get("threat_type") + if isinstance(tt, str) and tt: + threat_types.add(tt) + it = entry.get("ioc_type") + if isinstance(it, str) and it: + ioc_types.add(it) + family = entry.get("malware") or entry.get("malware_printable") + if isinstance(family, str) and family: + families.add(family) return IntelResult( provider=self.name, verdict="malicious" if listed else None, column_updates={ "threatfox_listed": listed, + "threatfox_threat_types": json.dumps(sorted(threat_types)), + "threatfox_ioc_types": json.dumps(sorted(ioc_types)), + "threatfox_malware_families": json.dumps(sorted(families)), "threatfox_raw": json.dumps(data), "threatfox_queried_at": datetime.now(timezone.utc), }, diff --git a/decnet/web/db/models/attacker_intel.py b/decnet/web/db/models/attacker_intel.py index 42087221..99be69df 100644 --- a/decnet/web/db/models/attacker_intel.py +++ b/decnet/web/db/models/attacker_intel.py @@ -45,8 +45,15 @@ class AttackerIntel(SQLModel, table=True): schema_version: int = Field(default=1) # ── GreyNoise Community ───────────────────────────────────────────── - # classification ∈ {"benign", "malicious", "suspicious", "unknown"} + # classification ∈ {"benign", "malicious", "suspicious", "unknown"}. + # The Community endpoint does not return tags; ``greynoise_tags`` stays + # empty unless an operator wires a non-Community provider that does. greynoise_classification: Optional[str] = Field(default=None, max_length=32) + greynoise_name: Optional[str] = Field(default=None, max_length=128) + greynoise_tags: str = Field( + default="[]", + sa_column=Column("greynoise_tags", _BIG_TEXT, nullable=False, default="[]"), + ) # JSON list[str] — behavioral / actor tags greynoise_raw: str = Field( default="{}", sa_column=Column("greynoise_raw", _BIG_TEXT, nullable=False, default="{}"), @@ -56,6 +63,12 @@ class AttackerIntel(SQLModel, table=True): # ── AbuseIPDB ──────────────────────────────────────────────────────── # 0..100 abuse confidence score abuseipdb_score: Optional[int] = Field(default=None) + abuseipdb_categories: str = Field( + default="[]", + sa_column=Column( + "abuseipdb_categories", _BIG_TEXT, nullable=False, default="[]", + ), + ) # JSON list[int] — flattened set of categories across recent reports abuseipdb_raw: str = Field( default="{}", sa_column=Column("abuseipdb_raw", _BIG_TEXT, nullable=False, default="{}"), @@ -64,6 +77,7 @@ class AttackerIntel(SQLModel, table=True): # ── abuse.ch Feodo Tracker ─────────────────────────────────────────── feodo_listed: Optional[bool] = Field(default=None) + feodo_malware_family: Optional[str] = Field(default=None, max_length=64) feodo_raw: str = Field( default="{}", sa_column=Column("feodo_raw", _BIG_TEXT, nullable=False, default="{}"), @@ -71,7 +85,31 @@ class AttackerIntel(SQLModel, table=True): feodo_queried_at: Optional[datetime] = Field(default=None) # ── abuse.ch ThreatFox ─────────────────────────────────────────────── + # ThreatFox returns a list of matches for a queried IP. Each match has + # a ``threat_type`` (botnet_cc / payload_delivery / payload / + # cc_skimming) and an ``ioc_type`` (url / domain / ip:port / hash + # variants). We flatten the unique sets across all matches; the + # IntelLifter keys ATT&CK techniques on ``threat_type``, the canonical + # taxonomy field per ThreatFox's API. threatfox_listed: Optional[bool] = Field(default=None) + threatfox_threat_types: str = Field( + default="[]", + sa_column=Column( + "threatfox_threat_types", _BIG_TEXT, nullable=False, default="[]", + ), + ) # JSON list[str] + threatfox_ioc_types: str = Field( + default="[]", + sa_column=Column( + "threatfox_ioc_types", _BIG_TEXT, nullable=False, default="[]", + ), + ) # JSON list[str] + threatfox_malware_families: str = Field( + default="[]", + sa_column=Column( + "threatfox_malware_families", _BIG_TEXT, nullable=False, default="[]", + ), + ) # JSON list[str] threatfox_raw: str = Field( default="{}", sa_column=Column("threatfox_raw", _BIG_TEXT, nullable=False, default="{}"), diff --git a/decnet/web/db/sqlmodel_repo/attacker_intel.py b/decnet/web/db/sqlmodel_repo/attacker_intel.py index a0cc6696..6bc47e69 100644 --- a/decnet/web/db/sqlmodel_repo/attacker_intel.py +++ b/decnet/web/db/sqlmodel_repo/attacker_intel.py @@ -58,11 +58,19 @@ class AttackerIntelMixin(_MixinBase): if not row: return None d = row.model_dump(mode="json") + # Two passes: ``*_raw`` columns hold provider response blobs + # (objects); the per-provider taxonomy columns hold JSON + # arrays the IntelLifter consumes as native lists. for key in ( "greynoise_raw", "abuseipdb_raw", "feodo_raw", "threatfox_raw", + "greynoise_tags", + "abuseipdb_categories", + "threatfox_threat_types", + "threatfox_ioc_types", + "threatfox_malware_families", ): raw = d.get(key) if isinstance(raw, str): diff --git a/tests/intel/test_abuseipdb.py b/tests/intel/test_abuseipdb.py index 20180da7..c69d21c4 100644 --- a/tests/intel/test_abuseipdb.py +++ b/tests/intel/test_abuseipdb.py @@ -95,6 +95,50 @@ async def test_low_score_maps_to_benign(monkeypatch): assert result.column_updates["abuseipdb_score"] == 0 +@pytest.mark.anyio +async def test_categories_flattened_from_reports(monkeypatch): + """Post-2026-05-02 audit: provider must extract the union of + ``data.reports[*].categories`` so the IntelLifter can dispatch + ATT&CK techniques. Sorted for deterministic test + bus diff.""" + monkeypatch.setenv("DECNET_ABUSEIPDB_API_KEY", "k3y") + + async def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={"data": { + "abuseConfidenceScore": 80, + "reports": [ + {"categories": [18, 22]}, + {"categories": [22, 14]}, + {"categories": []}, + {"not_a_dict": True}, + {"categories": [21]}, + ], + }}, + ) + + _install_transport(handler) + provider = AbuseIPDBProvider() + result = await provider.lookup("1.2.3.4") + cats = json.loads(result.column_updates["abuseipdb_categories"]) + assert cats == [14, 18, 21, 22] + + +@pytest.mark.anyio +async def test_categories_empty_when_no_reports(monkeypatch): + monkeypatch.setenv("DECNET_ABUSEIPDB_API_KEY", "k3y") + + async def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, json={"data": {"abuseConfidenceScore": 5}}, + ) + + _install_transport(handler) + provider = AbuseIPDBProvider() + result = await provider.lookup("8.8.8.8") + assert json.loads(result.column_updates["abuseipdb_categories"]) == [] + + @pytest.mark.anyio async def test_429_returns_error(monkeypatch): monkeypatch.setenv("DECNET_ABUSEIPDB_API_KEY", "k3y") diff --git a/tests/intel/test_feodo.py b/tests/intel/test_feodo.py index 049392c7..7c0db105 100644 --- a/tests/intel/test_feodo.py +++ b/tests/intel/test_feodo.py @@ -87,6 +87,31 @@ async def test_unlisted_ip_returns_no_verdict(): assert result.column_updates["feodo_listed"] is False +@pytest.mark.anyio +async def test_listed_ip_persists_malware_family(): + """Post-2026-05-02 audit: IntelLifter reads + ``feodo_malware_family`` for evidence; persist it as a typed + column rather than only inside ``feodo_raw``.""" + async def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(200, json=_FEED) + + _install_transport(handler) + provider = FeodoProvider(refresh_interval_s=999.0) + result = await provider.lookup("9.9.9.9") + assert result.column_updates["feodo_malware_family"] == "TrickBot" + + +@pytest.mark.anyio +async def test_unlisted_ip_clears_family(): + async def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(200, json=_FEED) + + _install_transport(handler) + provider = FeodoProvider(refresh_interval_s=999.0) + result = await provider.lookup("1.2.3.4") + assert result.column_updates["feodo_malware_family"] is None + + @pytest.mark.anyio async def test_feed_failure_reports_error(): async def handler(request: httpx.Request) -> httpx.Response: diff --git a/tests/intel/test_greynoise.py b/tests/intel/test_greynoise.py index 8e2cdea4..324a7747 100644 --- a/tests/intel/test_greynoise.py +++ b/tests/intel/test_greynoise.py @@ -123,6 +123,45 @@ async def test_429_returns_error_no_writes(): assert result.column_updates == {} +@pytest.mark.anyio +async def test_actor_name_and_tags_persisted_when_present(): + """Post-2026-05-02 audit: ``name`` (actor label) and any ``tags`` + list returned by the upstream survive into ``column_updates``. + + The Community endpoint does not return ``tags`` in practice; the + test seeds the field anyway so non-Community provider plans that + do (paid / Enterprise) work without further code changes. + """ + async def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={ + "classification": "malicious", + "name": "Tor", + "tags": ["tor_exit_node", "ssh_bruteforcer"], + }, + ) + + provider = GreyNoiseProvider() + _install_transport(provider, handler) + result = await provider.lookup("1.2.3.4") + assert result.column_updates["greynoise_name"] == "Tor" + tags = json.loads(result.column_updates["greynoise_tags"]) + assert tags == ["tor_exit_node", "ssh_bruteforcer"] + + +@pytest.mark.anyio +async def test_404_clears_actor_and_tags(): + async def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(404, json={"message": "not seen"}) + + provider = GreyNoiseProvider() + _install_transport(provider, handler) + result = await provider.lookup("10.0.0.5") + assert result.column_updates["greynoise_name"] is None + assert result.column_updates["greynoise_tags"] == "[]" + + @pytest.mark.anyio async def test_network_failure_becomes_error(): async def handler(request: httpx.Request) -> httpx.Response: diff --git a/tests/intel/test_threatfox.py b/tests/intel/test_threatfox.py index 45b29e61..37d2a7c3 100644 --- a/tests/intel/test_threatfox.py +++ b/tests/intel/test_threatfox.py @@ -100,6 +100,61 @@ async def test_unexpected_status_is_error(): assert result.column_updates == {} +@pytest.mark.anyio +async def test_threat_types_and_ioc_types_flattened(monkeypatch): + """Post-2026-05-02 audit: provider must extract the union of + ``threat_type`` / ``ioc_type`` / ``malware`` across all matches. + The IntelLifter dispatches ATT&CK on threat_type.""" + monkeypatch.delenv("DECNET_THREATFOX_API_KEY", raising=False) + + async def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={"query_status": "ok", "data": [ + { + "ioc_type": "ip:port", + "threat_type": "botnet_cc", + "malware": "Sliver", + }, + { + "ioc_type": "url", + "threat_type": "payload_delivery", + "malware_printable": "Emotet", + }, + { + "ioc_type": "ip:port", # duplicate, dedup'd + "threat_type": "botnet_cc", # duplicate + "malware": "Sliver", # duplicate + }, + "not a dict — silently skipped", + ]}, + ) + + _install_transport(handler) + provider = ThreatFoxProvider() + result = await provider.lookup("1.2.3.4") + cu = result.column_updates + assert json.loads(cu["threatfox_threat_types"]) == [ + "botnet_cc", "payload_delivery", + ] + assert json.loads(cu["threatfox_ioc_types"]) == ["ip:port", "url"] + assert json.loads(cu["threatfox_malware_families"]) == ["Emotet", "Sliver"] + + +@pytest.mark.anyio +async def test_no_result_clears_taxonomy_columns(): + async def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(200, json={"query_status": "no_result"}) + + _install_transport(handler) + provider = ThreatFoxProvider() + result = await provider.lookup("8.8.8.8") + cu = result.column_updates + assert cu["threatfox_threat_types"] == "[]" + assert cu["threatfox_ioc_types"] == "[]" + assert cu["threatfox_malware_families"] == "[]" + + @pytest.mark.anyio async def test_http_error_surfaces(): async def handler(request: httpx.Request) -> httpx.Response: