refactor(intel): migrate AttackerIntel JSON-string columns to native SQLAlchemy JSON

Five list columns (greynoise_tags, abuseipdb_categories, threatfox_threat_types, threatfox_ioc_types, threatfox_malware_families) and four dict columns (*_raw) are now Column(JSON) with list/dict type annotations and default_factory=list/dict. Providers return native Python objects; the application-layer json.dumps/json.loads round-trip and _decode_json_list helpers are gone. to_intel_event_payload() reads columns directly. Also caps pytest xdist at -n 4 and excludes tests/api from norecursedirs to prevent schemathesis workers from OOM-killing the dev loop.
2026-05-10 09:17:15 -04:00
parent de3634d739
commit 9a7b03700c
16 changed files with 90 additions and 193 deletions
--- a/tests/intel/test_abuseipdb.py
+++ b/tests/intel/test_abuseipdb.py
@@ -1,7 +1,6 @@
 """Unit tests for the AbuseIPDB provider."""
 from __future__ import annotations

-import json

 import httpx
 import pytest
@@ -71,8 +70,7 @@ async def test_high_score_maps_to_malicious(monkeypatch):
    result = await provider.lookup("1.2.3.4")
    assert result.verdict == "malicious"
    assert result.column_updates["abuseipdb_score"] == 92
-    raw = json.loads(result.column_updates["abuseipdb_raw"])
-    assert raw["countryCode"] == "RU"
+    assert result.column_updates["abuseipdb_raw"]["countryCode"] == "RU"
    # Key header sent, query params correct.
    req = captured[0]
    assert req.headers["key"] == "k3y"
@@ -120,8 +118,7 @@ async def test_categories_flattened_from_reports(monkeypatch):
    _install_transport(handler)
    provider = AbuseIPDBProvider()
    result = await provider.lookup("1.2.3.4")
-    cats = json.loads(result.column_updates["abuseipdb_categories"])
-    assert cats == [14, 18, 21, 22]
+    assert result.column_updates["abuseipdb_categories"] == [14, 18, 21, 22]


@pytest.mark.anyio
@@ -136,7 +133,7 @@ async def test_categories_empty_when_no_reports(monkeypatch):
    _install_transport(handler)
    provider = AbuseIPDBProvider()
    result = await provider.lookup("8.8.8.8")
-    assert json.loads(result.column_updates["abuseipdb_categories"]) == []
+    assert result.column_updates["abuseipdb_categories"] == []


@pytest.mark.anyio
--- a/tests/intel/test_attacker_intel_repo.py
+++ b/tests/intel/test_attacker_intel_repo.py
@@ -77,7 +77,7 @@ async def test_partial_provider_update_preserves_others(repo):
        _intel_payload(
            attacker_uuid=a_uuid, ip="9.9.9.9",
            greynoise_classification="malicious",
-            greynoise_raw='{"classification":"malicious"}',
+            greynoise_raw={"classification": "malicious"},
            greynoise_queried_at=datetime.now(timezone.utc),
        )
    )
@@ -87,7 +87,7 @@ async def test_partial_provider_update_preserves_others(repo):
        _intel_payload(
            attacker_uuid=a_uuid, ip="9.9.9.9",
            abuseipdb_score=85,
-            abuseipdb_raw='{"abuseConfidenceScore":85}',
+            abuseipdb_raw={"abuseConfidenceScore": 85},
            abuseipdb_queried_at=datetime.now(timezone.utc),
        )
    )
--- a/tests/intel/test_feodo.py
+++ b/tests/intel/test_feodo.py
@@ -10,7 +10,6 @@ subsequent ``lookup`` calls hit memory. We assert:
 """
 from __future__ import annotations

-import json

 import httpx
 import pytest
@@ -56,8 +55,7 @@ async def test_listed_ip_yields_malicious_verdict():
    result = await provider.lookup("9.9.9.9")
    assert result.verdict == "malicious"
    assert result.column_updates["feodo_listed"] is True
-    raw = json.loads(result.column_updates["feodo_raw"])
-    assert raw["malware"] == "TrickBot"
+    assert result.column_updates["feodo_raw"]["malware"] == "TrickBot"
    assert len(captured) == 1


--- a/tests/intel/test_greynoise.py
+++ b/tests/intel/test_greynoise.py
@@ -11,7 +11,6 @@ Mocks httpx via ``MockTransport`` and asserts:
 """
 from __future__ import annotations

-import json

 import httpx
 import pytest
@@ -61,8 +60,7 @@ async def test_malicious_classification_maps_to_verdict():
    assert result.error is None
    assert result.verdict == "malicious"
    assert result.column_updates["greynoise_classification"] == "malicious"
-    raw = json.loads(result.column_updates["greynoise_raw"])
-    assert raw["name"] == "Mirai-like"
+    assert result.column_updates["greynoise_raw"]["name"] == "Mirai-like"
    assert "1.2.3.4" in str(captured[0].url)
    # No DECNET label leaks in the UA.
    assert "decnet" not in captured[0].headers["user-agent"].lower()
@@ -146,8 +144,7 @@ async def test_actor_name_and_tags_persisted_when_present():
    _install_transport(provider, handler)
    result = await provider.lookup("1.2.3.4")
    assert result.column_updates["greynoise_name"] == "Tor"
-    tags = json.loads(result.column_updates["greynoise_tags"])
-    assert tags == ["tor_exit_node", "ssh_bruteforcer"]
+    assert result.column_updates["greynoise_tags"] == ["tor_exit_node", "ssh_bruteforcer"]


@pytest.mark.anyio
@@ -159,7 +156,7 @@ async def test_404_clears_actor_and_tags():
    _install_transport(provider, handler)
    result = await provider.lookup("10.0.0.5")
    assert result.column_updates["greynoise_name"] is None
-    assert result.column_updates["greynoise_tags"] == "[]"
+    assert result.column_updates["greynoise_tags"] == []


@pytest.mark.anyio
--- a/tests/intel/test_threatfox.py
+++ b/tests/intel/test_threatfox.py
@@ -56,7 +56,7 @@ async def test_match_returns_malicious(monkeypatch):
    result = await provider.lookup("1.2.3.4")
    assert result.verdict == "malicious"
    assert result.column_updates["threatfox_listed"] is True
-    raw = json.loads(result.column_updates["threatfox_raw"])
+    raw = result.column_updates["threatfox_raw"]
    assert raw[0]["malware"] == "Cobalt Strike"
    # No Auth-Key when none configured.
    assert "auth-key" not in {h.lower() for h in captured[0].headers}
@@ -134,11 +134,9 @@ async def test_threat_types_and_ioc_types_flattened(monkeypatch):
    provider = ThreatFoxProvider()
    result = await provider.lookup("1.2.3.4")
    cu = result.column_updates
-    assert json.loads(cu["threatfox_threat_types"]) == [
-        "botnet_cc", "payload_delivery",
-    ]
-    assert json.loads(cu["threatfox_ioc_types"]) == ["ip:port", "url"]
-    assert json.loads(cu["threatfox_malware_families"]) == ["Emotet", "Sliver"]
+    assert cu["threatfox_threat_types"] == ["botnet_cc", "payload_delivery"]
+    assert cu["threatfox_ioc_types"] == ["ip:port", "url"]
+    assert cu["threatfox_malware_families"] == ["Emotet", "Sliver"]


@pytest.mark.anyio
@@ -150,9 +148,9 @@ async def test_no_result_clears_taxonomy_columns():
    provider = ThreatFoxProvider()
    result = await provider.lookup("8.8.8.8")
    cu = result.column_updates
-    assert cu["threatfox_threat_types"] == "[]"
-    assert cu["threatfox_ioc_types"] == "[]"
-    assert cu["threatfox_malware_families"] == "[]"
+    assert cu["threatfox_threat_types"] == []
+    assert cu["threatfox_ioc_types"] == []
+    assert cu["threatfox_malware_families"] == []


@pytest.mark.anyio
--- a/tests/intel/test_worker.py
+++ b/tests/intel/test_worker.py
@@ -12,7 +12,6 @@ Covers — without any real provider impls — that the loop:
 from __future__ import annotations

 import asyncio
-import json
 from datetime import datetime, timezone
 from typing import Optional

@@ -128,7 +127,7 @@ async def test_fan_out_writes_aggregate_row(repo):
        verdict="benign",
        column_updates={
            "greynoise_classification": "benign",
-            "greynoise_raw": json.dumps({"classification": "benign"}),
+            "greynoise_raw": {"classification": "benign"},
            "greynoise_queried_at": datetime.now(timezone.utc),
        },
    )
@@ -137,7 +136,7 @@ async def test_fan_out_writes_aggregate_row(repo):
        verdict="malicious",
        column_updates={
            "abuseipdb_score": 90,
-            "abuseipdb_raw": json.dumps({"abuseConfidenceScore": 90}),
+            "abuseipdb_raw": {"abuseConfidenceScore": 90},
            "abuseipdb_queried_at": datetime.now(timezone.utc),
        },
    )
@@ -178,7 +177,7 @@ async def test_provider_error_does_not_poison_row(repo):
        verdict="benign",
        column_updates={
            "greynoise_classification": "benign",
-            "greynoise_raw": "{}",
+            "greynoise_raw": {},
            "greynoise_queried_at": datetime.now(timezone.utc),
        },
    )
@@ -234,7 +233,7 @@ async def test_intel_enriched_event_published_to_bus(repo, monkeypatch):
        verdict="malicious",
        column_updates={
            "greynoise_classification": "malicious",
-            "greynoise_raw": "{}",
+            "greynoise_raw": {},
            "greynoise_queried_at": datetime.now(timezone.utc),
        },
    )
--- a/tests/intel/test_worker_publish.py
+++ b/tests/intel/test_worker_publish.py
@@ -98,26 +98,22 @@ async def test_intel_worker_publishes_intel_enriched(


 def test_build_intel_event_payload_projects_taxonomy_fields() -> None:
-    """Post-2026-05-02 audit: the bus payload now carries the per-
-    provider taxonomy fields the IntelLifter needs (categories, tags,
-    threat_types). JSON-string columns are decoded back to native
-    lists so the consumer does not have to know about storage shape.
+    """The bus payload carries the per-provider taxonomy fields the
+    IntelLifter needs (categories, tags, threat_types) as native lists.
    """
-    import json as _json
-
    row = {
        "aggregate_verdict": "malicious",
        "abuseipdb_score": 87,
-        "abuseipdb_categories": _json.dumps([14, 18, 22]),
+        "abuseipdb_categories": [14, 18, 22],
        "greynoise_classification": "malicious",
        "greynoise_name": "Mirai",
-        "greynoise_tags": _json.dumps(["ssh_bruteforcer"]),
+        "greynoise_tags": ["ssh_bruteforcer"],
        "feodo_listed": True,
        "feodo_malware_family": "Emotet",
        "threatfox_listed": True,
-        "threatfox_threat_types": _json.dumps(["botnet_cc"]),
-        "threatfox_ioc_types": _json.dumps(["ip:port"]),
-        "threatfox_malware_families": _json.dumps(["Sliver"]),
+        "threatfox_threat_types": ["botnet_cc"],
+        "threatfox_ioc_types": ["ip:port"],
+        "threatfox_malware_families": ["Sliver"],
    }
    payload = _iw._build_intel_event_payload(
        "att-2", "203.0.113.7", row, [_FakeProvider()],