refactor(intel): migrate AttackerIntel JSON-string columns to native SQLAlchemy JSON

Five list columns (greynoise_tags, abuseipdb_categories, threatfox_threat_types,
threatfox_ioc_types, threatfox_malware_families) and four dict columns
(*_raw) are now Column(JSON) with list/dict type annotations and
default_factory=list/dict. Providers return native Python objects; the
application-layer json.dumps/json.loads round-trip and _decode_json_list
helpers are gone. to_intel_event_payload() reads columns directly.

Also caps pytest xdist at -n 4 and excludes tests/api from norecursedirs
to prevent schemathesis workers from OOM-killing the dev loop.
This commit is contained in:
2026-05-10 09:17:15 -04:00
parent de3634d739
commit 9a7b03700c
16 changed files with 90 additions and 193 deletions

View File

@@ -17,7 +17,6 @@ later if operators report drift.
"""
from __future__ import annotations
import json
import os
from datetime import datetime, timezone
from typing import Optional
@@ -110,8 +109,8 @@ class AbuseIPDBProvider(IntelProvider):
verdict=verdict,
column_updates={
"abuseipdb_score": score,
"abuseipdb_categories": json.dumps(sorted(categories)),
"abuseipdb_raw": json.dumps(data),
"abuseipdb_categories": sorted(categories),
"abuseipdb_raw": data,
"abuseipdb_queried_at": datetime.now(timezone.utc),
},
)

View File

@@ -13,7 +13,6 @@ of attacker IPs map to a single network round-trip per refresh window.
"""
from __future__ import annotations
import json
import time
from datetime import datetime, timezone
from typing import Any, Optional
@@ -94,7 +93,7 @@ class FeodoProvider(IntelProvider):
column_updates={
"feodo_listed": False,
"feodo_malware_family": None,
"feodo_raw": "{}",
"feodo_raw": {},
"feodo_queried_at": datetime.now(timezone.utc),
},
)
@@ -108,7 +107,7 @@ class FeodoProvider(IntelProvider):
column_updates={
"feodo_listed": True,
"feodo_malware_family": family,
"feodo_raw": json.dumps(entry),
"feodo_raw": entry,
"feodo_queried_at": datetime.now(timezone.utc),
},
)

View File

@@ -25,7 +25,6 @@ Status code semantics:
"""
from __future__ import annotations
import json
import os
from datetime import datetime, timezone
from typing import Optional
@@ -72,8 +71,8 @@ class GreyNoiseProvider(IntelProvider):
column_updates={
"greynoise_classification": "unknown",
"greynoise_name": None,
"greynoise_tags": "[]",
"greynoise_raw": json.dumps({"message": "not seen"}),
"greynoise_tags": [],
"greynoise_raw": {"message": "not seen"},
"greynoise_queried_at": datetime.now(timezone.utc),
},
)
@@ -107,8 +106,8 @@ class GreyNoiseProvider(IntelProvider):
column_updates={
"greynoise_classification": classification,
"greynoise_name": name,
"greynoise_tags": json.dumps(tags),
"greynoise_raw": json.dumps(data),
"greynoise_tags": tags,
"greynoise_raw": data,
"greynoise_queried_at": datetime.now(timezone.utc),
},
)

View File

@@ -12,7 +12,6 @@ caps requests/min — the provider works either way.
"""
from __future__ import annotations
import json
import os
from datetime import datetime, timezone
from typing import Optional
@@ -71,10 +70,10 @@ class ThreatFoxProvider(IntelProvider):
verdict=None, # absence is not a benign signal
column_updates={
"threatfox_listed": False,
"threatfox_threat_types": "[]",
"threatfox_ioc_types": "[]",
"threatfox_malware_families": "[]",
"threatfox_raw": "{}",
"threatfox_threat_types": [],
"threatfox_ioc_types": [],
"threatfox_malware_families": [],
"threatfox_raw": {},
"threatfox_queried_at": datetime.now(timezone.utc),
},
)
@@ -113,10 +112,10 @@ class ThreatFoxProvider(IntelProvider):
verdict="malicious" if listed else None,
column_updates={
"threatfox_listed": listed,
"threatfox_threat_types": json.dumps(sorted(threat_types)),
"threatfox_ioc_types": json.dumps(sorted(ioc_types)),
"threatfox_malware_families": json.dumps(sorted(families)),
"threatfox_raw": json.dumps(data),
"threatfox_threat_types": sorted(threat_types),
"threatfox_ioc_types": sorted(ioc_types),
"threatfox_malware_families": sorted(families),
"threatfox_raw": data,
"threatfox_queried_at": datetime.now(timezone.utc),
},
)

View File

@@ -20,7 +20,6 @@ from __future__ import annotations
import asyncio
import contextlib
import json
from datetime import datetime, timedelta, timezone
from typing import Any, Optional
@@ -60,18 +59,6 @@ def _aggregate(verdicts: list[Optional[str]]) -> Optional[str]:
return None
def _decode_json_list(value: Any) -> list[Any]:
if isinstance(value, list):
return value
if isinstance(value, str) and value:
try:
decoded = json.loads(value)
except (json.JSONDecodeError, TypeError):
return []
return decoded if isinstance(decoded, list) else []
return []
def _build_intel_event_payload(
attacker_uuid: str,
ip: str,
@@ -80,11 +67,6 @@ def _build_intel_event_payload(
) -> dict[str, Any]:
"""Project the AttackerIntel row into the bus event the TTP worker
consumes as ``source_kind="intel"``.
The TTP worker forwards the payload verbatim to the IntelLifter.
Per-provider taxonomy fields (categories, tags, threat_types) are
decoded back to native lists here so the lifter does not have to
care that the storage layer JSON-encodes them.
"""
return {
"attacker_uuid": attacker_uuid,
@@ -93,27 +75,19 @@ def _build_intel_event_payload(
"providers": [p.name for p in providers],
# AbuseIPDB
"abuseipdb_score": row.get("abuseipdb_score"),
"abuseipdb_categories": _decode_json_list(
row.get("abuseipdb_categories"),
),
"abuseipdb_categories": row.get("abuseipdb_categories") or [],
# GreyNoise
"greynoise_classification": row.get("greynoise_classification"),
"greynoise_name": row.get("greynoise_name"),
"greynoise_tags": _decode_json_list(row.get("greynoise_tags")),
"greynoise_tags": row.get("greynoise_tags") or [],
# Feodo
"feodo_listed": row.get("feodo_listed"),
"feodo_malware_family": row.get("feodo_malware_family"),
# ThreatFox
"threatfox_listed": row.get("threatfox_listed"),
"threatfox_threat_types": _decode_json_list(
row.get("threatfox_threat_types"),
),
"threatfox_ioc_types": _decode_json_list(
row.get("threatfox_ioc_types"),
),
"threatfox_malware_families": _decode_json_list(
row.get("threatfox_malware_families"),
),
"threatfox_threat_types": row.get("threatfox_threat_types") or [],
"threatfox_ioc_types": row.get("threatfox_ioc_types") or [],
"threatfox_malware_families": row.get("threatfox_malware_families") or [],
}