refactor(intel): migrate AttackerIntel JSON-string columns to native SQLAlchemy JSON

Five list columns (greynoise_tags, abuseipdb_categories, threatfox_threat_types,
threatfox_ioc_types, threatfox_malware_families) and four dict columns
(*_raw) are now Column(JSON) with list/dict type annotations and
default_factory=list/dict. Providers return native Python objects; the
application-layer json.dumps/json.loads round-trip and _decode_json_list
helpers are gone. to_intel_event_payload() reads columns directly.

Also caps pytest xdist at -n 4 and excludes tests/api from norecursedirs
to prevent schemathesis workers from OOM-killing the dev loop.
This commit is contained in:
2026-05-10 09:17:15 -04:00
parent de3634d739
commit 9a7b03700c
16 changed files with 90 additions and 193 deletions

View File

@@ -1,7 +1,6 @@
"""Unit tests for the AbuseIPDB provider."""
from __future__ import annotations
import json
import httpx
import pytest
@@ -71,8 +70,7 @@ async def test_high_score_maps_to_malicious(monkeypatch):
result = await provider.lookup("1.2.3.4")
assert result.verdict == "malicious"
assert result.column_updates["abuseipdb_score"] == 92
raw = json.loads(result.column_updates["abuseipdb_raw"])
assert raw["countryCode"] == "RU"
assert result.column_updates["abuseipdb_raw"]["countryCode"] == "RU"
# Key header sent, query params correct.
req = captured[0]
assert req.headers["key"] == "k3y"
@@ -120,8 +118,7 @@ async def test_categories_flattened_from_reports(monkeypatch):
_install_transport(handler)
provider = AbuseIPDBProvider()
result = await provider.lookup("1.2.3.4")
cats = json.loads(result.column_updates["abuseipdb_categories"])
assert cats == [14, 18, 21, 22]
assert result.column_updates["abuseipdb_categories"] == [14, 18, 21, 22]
@pytest.mark.anyio
@@ -136,7 +133,7 @@ async def test_categories_empty_when_no_reports(monkeypatch):
_install_transport(handler)
provider = AbuseIPDBProvider()
result = await provider.lookup("8.8.8.8")
assert json.loads(result.column_updates["abuseipdb_categories"]) == []
assert result.column_updates["abuseipdb_categories"] == []
@pytest.mark.anyio

View File

@@ -77,7 +77,7 @@ async def test_partial_provider_update_preserves_others(repo):
_intel_payload(
attacker_uuid=a_uuid, ip="9.9.9.9",
greynoise_classification="malicious",
greynoise_raw='{"classification":"malicious"}',
greynoise_raw={"classification": "malicious"},
greynoise_queried_at=datetime.now(timezone.utc),
)
)
@@ -87,7 +87,7 @@ async def test_partial_provider_update_preserves_others(repo):
_intel_payload(
attacker_uuid=a_uuid, ip="9.9.9.9",
abuseipdb_score=85,
abuseipdb_raw='{"abuseConfidenceScore":85}',
abuseipdb_raw={"abuseConfidenceScore": 85},
abuseipdb_queried_at=datetime.now(timezone.utc),
)
)

View File

@@ -10,7 +10,6 @@ subsequent ``lookup`` calls hit memory. We assert:
"""
from __future__ import annotations
import json
import httpx
import pytest
@@ -56,8 +55,7 @@ async def test_listed_ip_yields_malicious_verdict():
result = await provider.lookup("9.9.9.9")
assert result.verdict == "malicious"
assert result.column_updates["feodo_listed"] is True
raw = json.loads(result.column_updates["feodo_raw"])
assert raw["malware"] == "TrickBot"
assert result.column_updates["feodo_raw"]["malware"] == "TrickBot"
assert len(captured) == 1

View File

@@ -11,7 +11,6 @@ Mocks httpx via ``MockTransport`` and asserts:
"""
from __future__ import annotations
import json
import httpx
import pytest
@@ -61,8 +60,7 @@ async def test_malicious_classification_maps_to_verdict():
assert result.error is None
assert result.verdict == "malicious"
assert result.column_updates["greynoise_classification"] == "malicious"
raw = json.loads(result.column_updates["greynoise_raw"])
assert raw["name"] == "Mirai-like"
assert result.column_updates["greynoise_raw"]["name"] == "Mirai-like"
assert "1.2.3.4" in str(captured[0].url)
# No DECNET label leaks in the UA.
assert "decnet" not in captured[0].headers["user-agent"].lower()
@@ -146,8 +144,7 @@ async def test_actor_name_and_tags_persisted_when_present():
_install_transport(provider, handler)
result = await provider.lookup("1.2.3.4")
assert result.column_updates["greynoise_name"] == "Tor"
tags = json.loads(result.column_updates["greynoise_tags"])
assert tags == ["tor_exit_node", "ssh_bruteforcer"]
assert result.column_updates["greynoise_tags"] == ["tor_exit_node", "ssh_bruteforcer"]
@pytest.mark.anyio
@@ -159,7 +156,7 @@ async def test_404_clears_actor_and_tags():
_install_transport(provider, handler)
result = await provider.lookup("10.0.0.5")
assert result.column_updates["greynoise_name"] is None
assert result.column_updates["greynoise_tags"] == "[]"
assert result.column_updates["greynoise_tags"] == []
@pytest.mark.anyio

View File

@@ -56,7 +56,7 @@ async def test_match_returns_malicious(monkeypatch):
result = await provider.lookup("1.2.3.4")
assert result.verdict == "malicious"
assert result.column_updates["threatfox_listed"] is True
raw = json.loads(result.column_updates["threatfox_raw"])
raw = result.column_updates["threatfox_raw"]
assert raw[0]["malware"] == "Cobalt Strike"
# No Auth-Key when none configured.
assert "auth-key" not in {h.lower() for h in captured[0].headers}
@@ -134,11 +134,9 @@ async def test_threat_types_and_ioc_types_flattened(monkeypatch):
provider = ThreatFoxProvider()
result = await provider.lookup("1.2.3.4")
cu = result.column_updates
assert json.loads(cu["threatfox_threat_types"]) == [
"botnet_cc", "payload_delivery",
]
assert json.loads(cu["threatfox_ioc_types"]) == ["ip:port", "url"]
assert json.loads(cu["threatfox_malware_families"]) == ["Emotet", "Sliver"]
assert cu["threatfox_threat_types"] == ["botnet_cc", "payload_delivery"]
assert cu["threatfox_ioc_types"] == ["ip:port", "url"]
assert cu["threatfox_malware_families"] == ["Emotet", "Sliver"]
@pytest.mark.anyio
@@ -150,9 +148,9 @@ async def test_no_result_clears_taxonomy_columns():
provider = ThreatFoxProvider()
result = await provider.lookup("8.8.8.8")
cu = result.column_updates
assert cu["threatfox_threat_types"] == "[]"
assert cu["threatfox_ioc_types"] == "[]"
assert cu["threatfox_malware_families"] == "[]"
assert cu["threatfox_threat_types"] == []
assert cu["threatfox_ioc_types"] == []
assert cu["threatfox_malware_families"] == []
@pytest.mark.anyio

View File

@@ -12,7 +12,6 @@ Covers — without any real provider impls — that the loop:
from __future__ import annotations
import asyncio
import json
from datetime import datetime, timezone
from typing import Optional
@@ -128,7 +127,7 @@ async def test_fan_out_writes_aggregate_row(repo):
verdict="benign",
column_updates={
"greynoise_classification": "benign",
"greynoise_raw": json.dumps({"classification": "benign"}),
"greynoise_raw": {"classification": "benign"},
"greynoise_queried_at": datetime.now(timezone.utc),
},
)
@@ -137,7 +136,7 @@ async def test_fan_out_writes_aggregate_row(repo):
verdict="malicious",
column_updates={
"abuseipdb_score": 90,
"abuseipdb_raw": json.dumps({"abuseConfidenceScore": 90}),
"abuseipdb_raw": {"abuseConfidenceScore": 90},
"abuseipdb_queried_at": datetime.now(timezone.utc),
},
)
@@ -178,7 +177,7 @@ async def test_provider_error_does_not_poison_row(repo):
verdict="benign",
column_updates={
"greynoise_classification": "benign",
"greynoise_raw": "{}",
"greynoise_raw": {},
"greynoise_queried_at": datetime.now(timezone.utc),
},
)
@@ -234,7 +233,7 @@ async def test_intel_enriched_event_published_to_bus(repo, monkeypatch):
verdict="malicious",
column_updates={
"greynoise_classification": "malicious",
"greynoise_raw": "{}",
"greynoise_raw": {},
"greynoise_queried_at": datetime.now(timezone.utc),
},
)

View File

@@ -98,26 +98,22 @@ async def test_intel_worker_publishes_intel_enriched(
def test_build_intel_event_payload_projects_taxonomy_fields() -> None:
"""Post-2026-05-02 audit: the bus payload now carries the per-
provider taxonomy fields the IntelLifter needs (categories, tags,
threat_types). JSON-string columns are decoded back to native
lists so the consumer does not have to know about storage shape.
"""The bus payload carries the per-provider taxonomy fields the
IntelLifter needs (categories, tags, threat_types) as native lists.
"""
import json as _json
row = {
"aggregate_verdict": "malicious",
"abuseipdb_score": 87,
"abuseipdb_categories": _json.dumps([14, 18, 22]),
"abuseipdb_categories": [14, 18, 22],
"greynoise_classification": "malicious",
"greynoise_name": "Mirai",
"greynoise_tags": _json.dumps(["ssh_bruteforcer"]),
"greynoise_tags": ["ssh_bruteforcer"],
"feodo_listed": True,
"feodo_malware_family": "Emotet",
"threatfox_listed": True,
"threatfox_threat_types": _json.dumps(["botnet_cc"]),
"threatfox_ioc_types": _json.dumps(["ip:port"]),
"threatfox_malware_families": _json.dumps(["Sliver"]),
"threatfox_threat_types": ["botnet_cc"],
"threatfox_ioc_types": ["ip:port"],
"threatfox_malware_families": ["Sliver"],
}
payload = _iw._build_intel_event_payload(
"att-2", "203.0.113.7", row, [_FakeProvider()],