refactor(intel): migrate AttackerIntel JSON-string columns to native SQLAlchemy JSON
Five list columns (greynoise_tags, abuseipdb_categories, threatfox_threat_types, threatfox_ioc_types, threatfox_malware_families) and four dict columns (*_raw) are now Column(JSON) with list/dict type annotations and default_factory=list/dict. Providers return native Python objects; the application-layer json.dumps/json.loads round-trip and _decode_json_list helpers are gone. to_intel_event_payload() reads columns directly. Also caps pytest xdist at -n 4 and excludes tests/api from norecursedirs to prevent schemathesis workers from OOM-killing the dev loop.
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
"""Unit tests for the AbuseIPDB provider."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
@@ -71,8 +70,7 @@ async def test_high_score_maps_to_malicious(monkeypatch):
|
||||
result = await provider.lookup("1.2.3.4")
|
||||
assert result.verdict == "malicious"
|
||||
assert result.column_updates["abuseipdb_score"] == 92
|
||||
raw = json.loads(result.column_updates["abuseipdb_raw"])
|
||||
assert raw["countryCode"] == "RU"
|
||||
assert result.column_updates["abuseipdb_raw"]["countryCode"] == "RU"
|
||||
# Key header sent, query params correct.
|
||||
req = captured[0]
|
||||
assert req.headers["key"] == "k3y"
|
||||
@@ -120,8 +118,7 @@ async def test_categories_flattened_from_reports(monkeypatch):
|
||||
_install_transport(handler)
|
||||
provider = AbuseIPDBProvider()
|
||||
result = await provider.lookup("1.2.3.4")
|
||||
cats = json.loads(result.column_updates["abuseipdb_categories"])
|
||||
assert cats == [14, 18, 21, 22]
|
||||
assert result.column_updates["abuseipdb_categories"] == [14, 18, 21, 22]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
@@ -136,7 +133,7 @@ async def test_categories_empty_when_no_reports(monkeypatch):
|
||||
_install_transport(handler)
|
||||
provider = AbuseIPDBProvider()
|
||||
result = await provider.lookup("8.8.8.8")
|
||||
assert json.loads(result.column_updates["abuseipdb_categories"]) == []
|
||||
assert result.column_updates["abuseipdb_categories"] == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
|
||||
@@ -77,7 +77,7 @@ async def test_partial_provider_update_preserves_others(repo):
|
||||
_intel_payload(
|
||||
attacker_uuid=a_uuid, ip="9.9.9.9",
|
||||
greynoise_classification="malicious",
|
||||
greynoise_raw='{"classification":"malicious"}',
|
||||
greynoise_raw={"classification": "malicious"},
|
||||
greynoise_queried_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
@@ -87,7 +87,7 @@ async def test_partial_provider_update_preserves_others(repo):
|
||||
_intel_payload(
|
||||
attacker_uuid=a_uuid, ip="9.9.9.9",
|
||||
abuseipdb_score=85,
|
||||
abuseipdb_raw='{"abuseConfidenceScore":85}',
|
||||
abuseipdb_raw={"abuseConfidenceScore": 85},
|
||||
abuseipdb_queried_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -10,7 +10,6 @@ subsequent ``lookup`` calls hit memory. We assert:
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
@@ -56,8 +55,7 @@ async def test_listed_ip_yields_malicious_verdict():
|
||||
result = await provider.lookup("9.9.9.9")
|
||||
assert result.verdict == "malicious"
|
||||
assert result.column_updates["feodo_listed"] is True
|
||||
raw = json.loads(result.column_updates["feodo_raw"])
|
||||
assert raw["malware"] == "TrickBot"
|
||||
assert result.column_updates["feodo_raw"]["malware"] == "TrickBot"
|
||||
assert len(captured) == 1
|
||||
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@ Mocks httpx via ``MockTransport`` and asserts:
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
@@ -61,8 +60,7 @@ async def test_malicious_classification_maps_to_verdict():
|
||||
assert result.error is None
|
||||
assert result.verdict == "malicious"
|
||||
assert result.column_updates["greynoise_classification"] == "malicious"
|
||||
raw = json.loads(result.column_updates["greynoise_raw"])
|
||||
assert raw["name"] == "Mirai-like"
|
||||
assert result.column_updates["greynoise_raw"]["name"] == "Mirai-like"
|
||||
assert "1.2.3.4" in str(captured[0].url)
|
||||
# No DECNET label leaks in the UA.
|
||||
assert "decnet" not in captured[0].headers["user-agent"].lower()
|
||||
@@ -146,8 +144,7 @@ async def test_actor_name_and_tags_persisted_when_present():
|
||||
_install_transport(provider, handler)
|
||||
result = await provider.lookup("1.2.3.4")
|
||||
assert result.column_updates["greynoise_name"] == "Tor"
|
||||
tags = json.loads(result.column_updates["greynoise_tags"])
|
||||
assert tags == ["tor_exit_node", "ssh_bruteforcer"]
|
||||
assert result.column_updates["greynoise_tags"] == ["tor_exit_node", "ssh_bruteforcer"]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
@@ -159,7 +156,7 @@ async def test_404_clears_actor_and_tags():
|
||||
_install_transport(provider, handler)
|
||||
result = await provider.lookup("10.0.0.5")
|
||||
assert result.column_updates["greynoise_name"] is None
|
||||
assert result.column_updates["greynoise_tags"] == "[]"
|
||||
assert result.column_updates["greynoise_tags"] == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
|
||||
@@ -56,7 +56,7 @@ async def test_match_returns_malicious(monkeypatch):
|
||||
result = await provider.lookup("1.2.3.4")
|
||||
assert result.verdict == "malicious"
|
||||
assert result.column_updates["threatfox_listed"] is True
|
||||
raw = json.loads(result.column_updates["threatfox_raw"])
|
||||
raw = result.column_updates["threatfox_raw"]
|
||||
assert raw[0]["malware"] == "Cobalt Strike"
|
||||
# No Auth-Key when none configured.
|
||||
assert "auth-key" not in {h.lower() for h in captured[0].headers}
|
||||
@@ -134,11 +134,9 @@ async def test_threat_types_and_ioc_types_flattened(monkeypatch):
|
||||
provider = ThreatFoxProvider()
|
||||
result = await provider.lookup("1.2.3.4")
|
||||
cu = result.column_updates
|
||||
assert json.loads(cu["threatfox_threat_types"]) == [
|
||||
"botnet_cc", "payload_delivery",
|
||||
]
|
||||
assert json.loads(cu["threatfox_ioc_types"]) == ["ip:port", "url"]
|
||||
assert json.loads(cu["threatfox_malware_families"]) == ["Emotet", "Sliver"]
|
||||
assert cu["threatfox_threat_types"] == ["botnet_cc", "payload_delivery"]
|
||||
assert cu["threatfox_ioc_types"] == ["ip:port", "url"]
|
||||
assert cu["threatfox_malware_families"] == ["Emotet", "Sliver"]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
@@ -150,9 +148,9 @@ async def test_no_result_clears_taxonomy_columns():
|
||||
provider = ThreatFoxProvider()
|
||||
result = await provider.lookup("8.8.8.8")
|
||||
cu = result.column_updates
|
||||
assert cu["threatfox_threat_types"] == "[]"
|
||||
assert cu["threatfox_ioc_types"] == "[]"
|
||||
assert cu["threatfox_malware_families"] == "[]"
|
||||
assert cu["threatfox_threat_types"] == []
|
||||
assert cu["threatfox_ioc_types"] == []
|
||||
assert cu["threatfox_malware_families"] == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
|
||||
@@ -12,7 +12,6 @@ Covers — without any real provider impls — that the loop:
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
@@ -128,7 +127,7 @@ async def test_fan_out_writes_aggregate_row(repo):
|
||||
verdict="benign",
|
||||
column_updates={
|
||||
"greynoise_classification": "benign",
|
||||
"greynoise_raw": json.dumps({"classification": "benign"}),
|
||||
"greynoise_raw": {"classification": "benign"},
|
||||
"greynoise_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
@@ -137,7 +136,7 @@ async def test_fan_out_writes_aggregate_row(repo):
|
||||
verdict="malicious",
|
||||
column_updates={
|
||||
"abuseipdb_score": 90,
|
||||
"abuseipdb_raw": json.dumps({"abuseConfidenceScore": 90}),
|
||||
"abuseipdb_raw": {"abuseConfidenceScore": 90},
|
||||
"abuseipdb_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
@@ -178,7 +177,7 @@ async def test_provider_error_does_not_poison_row(repo):
|
||||
verdict="benign",
|
||||
column_updates={
|
||||
"greynoise_classification": "benign",
|
||||
"greynoise_raw": "{}",
|
||||
"greynoise_raw": {},
|
||||
"greynoise_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
@@ -234,7 +233,7 @@ async def test_intel_enriched_event_published_to_bus(repo, monkeypatch):
|
||||
verdict="malicious",
|
||||
column_updates={
|
||||
"greynoise_classification": "malicious",
|
||||
"greynoise_raw": "{}",
|
||||
"greynoise_raw": {},
|
||||
"greynoise_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -98,26 +98,22 @@ async def test_intel_worker_publishes_intel_enriched(
|
||||
|
||||
|
||||
def test_build_intel_event_payload_projects_taxonomy_fields() -> None:
|
||||
"""Post-2026-05-02 audit: the bus payload now carries the per-
|
||||
provider taxonomy fields the IntelLifter needs (categories, tags,
|
||||
threat_types). JSON-string columns are decoded back to native
|
||||
lists so the consumer does not have to know about storage shape.
|
||||
"""The bus payload carries the per-provider taxonomy fields the
|
||||
IntelLifter needs (categories, tags, threat_types) as native lists.
|
||||
"""
|
||||
import json as _json
|
||||
|
||||
row = {
|
||||
"aggregate_verdict": "malicious",
|
||||
"abuseipdb_score": 87,
|
||||
"abuseipdb_categories": _json.dumps([14, 18, 22]),
|
||||
"abuseipdb_categories": [14, 18, 22],
|
||||
"greynoise_classification": "malicious",
|
||||
"greynoise_name": "Mirai",
|
||||
"greynoise_tags": _json.dumps(["ssh_bruteforcer"]),
|
||||
"greynoise_tags": ["ssh_bruteforcer"],
|
||||
"feodo_listed": True,
|
||||
"feodo_malware_family": "Emotet",
|
||||
"threatfox_listed": True,
|
||||
"threatfox_threat_types": _json.dumps(["botnet_cc"]),
|
||||
"threatfox_ioc_types": _json.dumps(["ip:port"]),
|
||||
"threatfox_malware_families": _json.dumps(["Sliver"]),
|
||||
"threatfox_threat_types": ["botnet_cc"],
|
||||
"threatfox_ioc_types": ["ip:port"],
|
||||
"threatfox_malware_families": ["Sliver"],
|
||||
}
|
||||
payload = _iw._build_intel_event_payload(
|
||||
"att-2", "203.0.113.7", row, [_FakeProvider()],
|
||||
|
||||
Reference in New Issue
Block a user