refactor(intel): migrate AttackerIntel JSON-string columns to native SQLAlchemy JSON
Five list columns (greynoise_tags, abuseipdb_categories, threatfox_threat_types, threatfox_ioc_types, threatfox_malware_families) and four dict columns (*_raw) are now Column(JSON) with list/dict type annotations and default_factory=list/dict. Providers return native Python objects; the application-layer json.dumps/json.loads round-trip and _decode_json_list helpers are gone. to_intel_event_payload() reads columns directly. Also caps pytest xdist at -n 4 and excludes tests/api from norecursedirs to prevent schemathesis workers from OOM-killing the dev loop.
This commit is contained in:
@@ -17,7 +17,6 @@ later if operators report drift.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
@@ -110,8 +109,8 @@ class AbuseIPDBProvider(IntelProvider):
|
||||
verdict=verdict,
|
||||
column_updates={
|
||||
"abuseipdb_score": score,
|
||||
"abuseipdb_categories": json.dumps(sorted(categories)),
|
||||
"abuseipdb_raw": json.dumps(data),
|
||||
"abuseipdb_categories": sorted(categories),
|
||||
"abuseipdb_raw": data,
|
||||
"abuseipdb_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -13,7 +13,6 @@ of attacker IPs map to a single network round-trip per refresh window.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
@@ -94,7 +93,7 @@ class FeodoProvider(IntelProvider):
|
||||
column_updates={
|
||||
"feodo_listed": False,
|
||||
"feodo_malware_family": None,
|
||||
"feodo_raw": "{}",
|
||||
"feodo_raw": {},
|
||||
"feodo_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
@@ -108,7 +107,7 @@ class FeodoProvider(IntelProvider):
|
||||
column_updates={
|
||||
"feodo_listed": True,
|
||||
"feodo_malware_family": family,
|
||||
"feodo_raw": json.dumps(entry),
|
||||
"feodo_raw": entry,
|
||||
"feodo_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -25,7 +25,6 @@ Status code semantics:
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
@@ -72,8 +71,8 @@ class GreyNoiseProvider(IntelProvider):
|
||||
column_updates={
|
||||
"greynoise_classification": "unknown",
|
||||
"greynoise_name": None,
|
||||
"greynoise_tags": "[]",
|
||||
"greynoise_raw": json.dumps({"message": "not seen"}),
|
||||
"greynoise_tags": [],
|
||||
"greynoise_raw": {"message": "not seen"},
|
||||
"greynoise_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
@@ -107,8 +106,8 @@ class GreyNoiseProvider(IntelProvider):
|
||||
column_updates={
|
||||
"greynoise_classification": classification,
|
||||
"greynoise_name": name,
|
||||
"greynoise_tags": json.dumps(tags),
|
||||
"greynoise_raw": json.dumps(data),
|
||||
"greynoise_tags": tags,
|
||||
"greynoise_raw": data,
|
||||
"greynoise_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -12,7 +12,6 @@ caps requests/min — the provider works either way.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
@@ -71,10 +70,10 @@ class ThreatFoxProvider(IntelProvider):
|
||||
verdict=None, # absence is not a benign signal
|
||||
column_updates={
|
||||
"threatfox_listed": False,
|
||||
"threatfox_threat_types": "[]",
|
||||
"threatfox_ioc_types": "[]",
|
||||
"threatfox_malware_families": "[]",
|
||||
"threatfox_raw": "{}",
|
||||
"threatfox_threat_types": [],
|
||||
"threatfox_ioc_types": [],
|
||||
"threatfox_malware_families": [],
|
||||
"threatfox_raw": {},
|
||||
"threatfox_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
@@ -113,10 +112,10 @@ class ThreatFoxProvider(IntelProvider):
|
||||
verdict="malicious" if listed else None,
|
||||
column_updates={
|
||||
"threatfox_listed": listed,
|
||||
"threatfox_threat_types": json.dumps(sorted(threat_types)),
|
||||
"threatfox_ioc_types": json.dumps(sorted(ioc_types)),
|
||||
"threatfox_malware_families": json.dumps(sorted(families)),
|
||||
"threatfox_raw": json.dumps(data),
|
||||
"threatfox_threat_types": sorted(threat_types),
|
||||
"threatfox_ioc_types": sorted(ioc_types),
|
||||
"threatfox_malware_families": sorted(families),
|
||||
"threatfox_raw": data,
|
||||
"threatfox_queried_at": datetime.now(timezone.utc),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -20,7 +20,6 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import json
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
@@ -60,18 +59,6 @@ def _aggregate(verdicts: list[Optional[str]]) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _decode_json_list(value: Any) -> list[Any]:
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
if isinstance(value, str) and value:
|
||||
try:
|
||||
decoded = json.loads(value)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return []
|
||||
return decoded if isinstance(decoded, list) else []
|
||||
return []
|
||||
|
||||
|
||||
def _build_intel_event_payload(
|
||||
attacker_uuid: str,
|
||||
ip: str,
|
||||
@@ -80,11 +67,6 @@ def _build_intel_event_payload(
|
||||
) -> dict[str, Any]:
|
||||
"""Project the AttackerIntel row into the bus event the TTP worker
|
||||
consumes as ``source_kind="intel"``.
|
||||
|
||||
The TTP worker forwards the payload verbatim to the IntelLifter.
|
||||
Per-provider taxonomy fields (categories, tags, threat_types) are
|
||||
decoded back to native lists here so the lifter does not have to
|
||||
care that the storage layer JSON-encodes them.
|
||||
"""
|
||||
return {
|
||||
"attacker_uuid": attacker_uuid,
|
||||
@@ -93,27 +75,19 @@ def _build_intel_event_payload(
|
||||
"providers": [p.name for p in providers],
|
||||
# AbuseIPDB
|
||||
"abuseipdb_score": row.get("abuseipdb_score"),
|
||||
"abuseipdb_categories": _decode_json_list(
|
||||
row.get("abuseipdb_categories"),
|
||||
),
|
||||
"abuseipdb_categories": row.get("abuseipdb_categories") or [],
|
||||
# GreyNoise
|
||||
"greynoise_classification": row.get("greynoise_classification"),
|
||||
"greynoise_name": row.get("greynoise_name"),
|
||||
"greynoise_tags": _decode_json_list(row.get("greynoise_tags")),
|
||||
"greynoise_tags": row.get("greynoise_tags") or [],
|
||||
# Feodo
|
||||
"feodo_listed": row.get("feodo_listed"),
|
||||
"feodo_malware_family": row.get("feodo_malware_family"),
|
||||
# ThreatFox
|
||||
"threatfox_listed": row.get("threatfox_listed"),
|
||||
"threatfox_threat_types": _decode_json_list(
|
||||
row.get("threatfox_threat_types"),
|
||||
),
|
||||
"threatfox_ioc_types": _decode_json_list(
|
||||
row.get("threatfox_ioc_types"),
|
||||
),
|
||||
"threatfox_malware_families": _decode_json_list(
|
||||
row.get("threatfox_malware_families"),
|
||||
),
|
||||
"threatfox_threat_types": row.get("threatfox_threat_types") or [],
|
||||
"threatfox_ioc_types": row.get("threatfox_ioc_types") or [],
|
||||
"threatfox_malware_families": row.get("threatfox_malware_families") or [],
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user