refactor(intel): migrate AttackerIntel JSON-string columns to native SQLAlchemy JSON

Five list columns (greynoise_tags, abuseipdb_categories, threatfox_threat_types,
threatfox_ioc_types, threatfox_malware_families) and four dict columns
(*_raw) are now Column(JSON) with list/dict type annotations and
default_factory=list/dict. Providers return native Python objects; the
application-layer json.dumps/json.loads round-trip and _decode_json_list
helpers are gone. to_intel_event_payload() reads columns directly.

Also caps pytest xdist at -n 4 and excludes tests/api from norecursedirs
to prevent schemathesis workers from OOM-killing the dev loop.
This commit is contained in:
2026-05-10 09:17:15 -04:00
parent de3634d739
commit 9a7b03700c
16 changed files with 90 additions and 193 deletions

View File

@@ -17,7 +17,6 @@ later if operators report drift.
""" """
from __future__ import annotations from __future__ import annotations
import json
import os import os
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Optional from typing import Optional
@@ -110,8 +109,8 @@ class AbuseIPDBProvider(IntelProvider):
verdict=verdict, verdict=verdict,
column_updates={ column_updates={
"abuseipdb_score": score, "abuseipdb_score": score,
"abuseipdb_categories": json.dumps(sorted(categories)), "abuseipdb_categories": sorted(categories),
"abuseipdb_raw": json.dumps(data), "abuseipdb_raw": data,
"abuseipdb_queried_at": datetime.now(timezone.utc), "abuseipdb_queried_at": datetime.now(timezone.utc),
}, },
) )

View File

@@ -13,7 +13,6 @@ of attacker IPs map to a single network round-trip per refresh window.
""" """
from __future__ import annotations from __future__ import annotations
import json
import time import time
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any, Optional from typing import Any, Optional
@@ -94,7 +93,7 @@ class FeodoProvider(IntelProvider):
column_updates={ column_updates={
"feodo_listed": False, "feodo_listed": False,
"feodo_malware_family": None, "feodo_malware_family": None,
"feodo_raw": "{}", "feodo_raw": {},
"feodo_queried_at": datetime.now(timezone.utc), "feodo_queried_at": datetime.now(timezone.utc),
}, },
) )
@@ -108,7 +107,7 @@ class FeodoProvider(IntelProvider):
column_updates={ column_updates={
"feodo_listed": True, "feodo_listed": True,
"feodo_malware_family": family, "feodo_malware_family": family,
"feodo_raw": json.dumps(entry), "feodo_raw": entry,
"feodo_queried_at": datetime.now(timezone.utc), "feodo_queried_at": datetime.now(timezone.utc),
}, },
) )

View File

@@ -25,7 +25,6 @@ Status code semantics:
""" """
from __future__ import annotations from __future__ import annotations
import json
import os import os
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Optional from typing import Optional
@@ -72,8 +71,8 @@ class GreyNoiseProvider(IntelProvider):
column_updates={ column_updates={
"greynoise_classification": "unknown", "greynoise_classification": "unknown",
"greynoise_name": None, "greynoise_name": None,
"greynoise_tags": "[]", "greynoise_tags": [],
"greynoise_raw": json.dumps({"message": "not seen"}), "greynoise_raw": {"message": "not seen"},
"greynoise_queried_at": datetime.now(timezone.utc), "greynoise_queried_at": datetime.now(timezone.utc),
}, },
) )
@@ -107,8 +106,8 @@ class GreyNoiseProvider(IntelProvider):
column_updates={ column_updates={
"greynoise_classification": classification, "greynoise_classification": classification,
"greynoise_name": name, "greynoise_name": name,
"greynoise_tags": json.dumps(tags), "greynoise_tags": tags,
"greynoise_raw": json.dumps(data), "greynoise_raw": data,
"greynoise_queried_at": datetime.now(timezone.utc), "greynoise_queried_at": datetime.now(timezone.utc),
}, },
) )

View File

@@ -12,7 +12,6 @@ caps requests/min — the provider works either way.
""" """
from __future__ import annotations from __future__ import annotations
import json
import os import os
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Optional from typing import Optional
@@ -71,10 +70,10 @@ class ThreatFoxProvider(IntelProvider):
verdict=None, # absence is not a benign signal verdict=None, # absence is not a benign signal
column_updates={ column_updates={
"threatfox_listed": False, "threatfox_listed": False,
"threatfox_threat_types": "[]", "threatfox_threat_types": [],
"threatfox_ioc_types": "[]", "threatfox_ioc_types": [],
"threatfox_malware_families": "[]", "threatfox_malware_families": [],
"threatfox_raw": "{}", "threatfox_raw": {},
"threatfox_queried_at": datetime.now(timezone.utc), "threatfox_queried_at": datetime.now(timezone.utc),
}, },
) )
@@ -113,10 +112,10 @@ class ThreatFoxProvider(IntelProvider):
verdict="malicious" if listed else None, verdict="malicious" if listed else None,
column_updates={ column_updates={
"threatfox_listed": listed, "threatfox_listed": listed,
"threatfox_threat_types": json.dumps(sorted(threat_types)), "threatfox_threat_types": sorted(threat_types),
"threatfox_ioc_types": json.dumps(sorted(ioc_types)), "threatfox_ioc_types": sorted(ioc_types),
"threatfox_malware_families": json.dumps(sorted(families)), "threatfox_malware_families": sorted(families),
"threatfox_raw": json.dumps(data), "threatfox_raw": data,
"threatfox_queried_at": datetime.now(timezone.utc), "threatfox_queried_at": datetime.now(timezone.utc),
}, },
) )

View File

@@ -20,7 +20,6 @@ from __future__ import annotations
import asyncio import asyncio
import contextlib import contextlib
import json
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Any, Optional from typing import Any, Optional
@@ -60,18 +59,6 @@ def _aggregate(verdicts: list[Optional[str]]) -> Optional[str]:
return None return None
def _decode_json_list(value: Any) -> list[Any]:
if isinstance(value, list):
return value
if isinstance(value, str) and value:
try:
decoded = json.loads(value)
except (json.JSONDecodeError, TypeError):
return []
return decoded if isinstance(decoded, list) else []
return []
def _build_intel_event_payload( def _build_intel_event_payload(
attacker_uuid: str, attacker_uuid: str,
ip: str, ip: str,
@@ -80,11 +67,6 @@ def _build_intel_event_payload(
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Project the AttackerIntel row into the bus event the TTP worker """Project the AttackerIntel row into the bus event the TTP worker
consumes as ``source_kind="intel"``. consumes as ``source_kind="intel"``.
The TTP worker forwards the payload verbatim to the IntelLifter.
Per-provider taxonomy fields (categories, tags, threat_types) are
decoded back to native lists here so the lifter does not have to
care that the storage layer JSON-encodes them.
""" """
return { return {
"attacker_uuid": attacker_uuid, "attacker_uuid": attacker_uuid,
@@ -93,27 +75,19 @@ def _build_intel_event_payload(
"providers": [p.name for p in providers], "providers": [p.name for p in providers],
# AbuseIPDB # AbuseIPDB
"abuseipdb_score": row.get("abuseipdb_score"), "abuseipdb_score": row.get("abuseipdb_score"),
"abuseipdb_categories": _decode_json_list( "abuseipdb_categories": row.get("abuseipdb_categories") or [],
row.get("abuseipdb_categories"),
),
# GreyNoise # GreyNoise
"greynoise_classification": row.get("greynoise_classification"), "greynoise_classification": row.get("greynoise_classification"),
"greynoise_name": row.get("greynoise_name"), "greynoise_name": row.get("greynoise_name"),
"greynoise_tags": _decode_json_list(row.get("greynoise_tags")), "greynoise_tags": row.get("greynoise_tags") or [],
# Feodo # Feodo
"feodo_listed": row.get("feodo_listed"), "feodo_listed": row.get("feodo_listed"),
"feodo_malware_family": row.get("feodo_malware_family"), "feodo_malware_family": row.get("feodo_malware_family"),
# ThreatFox # ThreatFox
"threatfox_listed": row.get("threatfox_listed"), "threatfox_listed": row.get("threatfox_listed"),
"threatfox_threat_types": _decode_json_list( "threatfox_threat_types": row.get("threatfox_threat_types") or [],
row.get("threatfox_threat_types"), "threatfox_ioc_types": row.get("threatfox_ioc_types") or [],
), "threatfox_malware_families": row.get("threatfox_malware_families") or [],
"threatfox_ioc_types": _decode_json_list(
row.get("threatfox_ioc_types"),
),
"threatfox_malware_families": _decode_json_list(
row.get("threatfox_malware_families"),
),
} }

View File

@@ -1,25 +1,10 @@
"""Threat-intel enrichment row — one per attacker IP, TTL-cached.""" """Threat-intel enrichment row — one per attacker IP, TTL-cached."""
import json as _json
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any, Optional from typing import Any, Optional
from sqlalchemy import Column from sqlalchemy import JSON, Column
from sqlmodel import Field, SQLModel from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
def _decode_json_list(value: Any) -> list[Any]:
if isinstance(value, list):
return value
if isinstance(value, str) and value:
try:
decoded = _json.loads(value)
except (_json.JSONDecodeError, TypeError):
return []
return decoded if isinstance(decoded, list) else []
return []
class AttackerIntel(SQLModel, table=True): class AttackerIntel(SQLModel, table=True):
"""Aggregated threat-intel verdict for a single attacker IP. """Aggregated threat-intel verdict for a single attacker IP.
@@ -63,37 +48,35 @@ class AttackerIntel(SQLModel, table=True):
# empty unless an operator wires a non-Community provider that does. # empty unless an operator wires a non-Community provider that does.
greynoise_classification: Optional[str] = Field(default=None, max_length=32) greynoise_classification: Optional[str] = Field(default=None, max_length=32)
greynoise_name: Optional[str] = Field(default=None, max_length=128) greynoise_name: Optional[str] = Field(default=None, max_length=128)
greynoise_tags: str = Field( greynoise_tags: list[str] = Field(
default="[]", default_factory=list,
sa_column=Column("greynoise_tags", _BIG_TEXT, nullable=False, default="[]"), sa_column=Column("greynoise_tags", JSON, nullable=False),
) # JSON list[str] — behavioral / actor tags )
greynoise_raw: str = Field( greynoise_raw: dict[str, Any] = Field(
default="{}", default_factory=dict,
sa_column=Column("greynoise_raw", _BIG_TEXT, nullable=False, default="{}"), sa_column=Column("greynoise_raw", JSON, nullable=False),
) )
greynoise_queried_at: Optional[datetime] = Field(default=None) greynoise_queried_at: Optional[datetime] = Field(default=None)
# ── AbuseIPDB ──────────────────────────────────────────────────────── # ── AbuseIPDB ────────────────────────────────────────────────────────
# 0..100 abuse confidence score # 0..100 abuse confidence score
abuseipdb_score: Optional[int] = Field(default=None) abuseipdb_score: Optional[int] = Field(default=None)
abuseipdb_categories: str = Field( abuseipdb_categories: list[int] = Field(
default="[]", default_factory=list,
sa_column=Column( sa_column=Column("abuseipdb_categories", JSON, nullable=False),
"abuseipdb_categories", _BIG_TEXT, nullable=False, default="[]", )
), abuseipdb_raw: dict[str, Any] = Field(
) # JSON list[int] — flattened set of categories across recent reports default_factory=dict,
abuseipdb_raw: str = Field( sa_column=Column("abuseipdb_raw", JSON, nullable=False),
default="{}",
sa_column=Column("abuseipdb_raw", _BIG_TEXT, nullable=False, default="{}"),
) )
abuseipdb_queried_at: Optional[datetime] = Field(default=None) abuseipdb_queried_at: Optional[datetime] = Field(default=None)
# ── abuse.ch Feodo Tracker ─────────────────────────────────────────── # ── abuse.ch Feodo Tracker ───────────────────────────────────────────
feodo_listed: Optional[bool] = Field(default=None) feodo_listed: Optional[bool] = Field(default=None)
feodo_malware_family: Optional[str] = Field(default=None, max_length=64) feodo_malware_family: Optional[str] = Field(default=None, max_length=64)
feodo_raw: str = Field( feodo_raw: dict[str, Any] = Field(
default="{}", default_factory=dict,
sa_column=Column("feodo_raw", _BIG_TEXT, nullable=False, default="{}"), sa_column=Column("feodo_raw", JSON, nullable=False),
) )
feodo_queried_at: Optional[datetime] = Field(default=None) feodo_queried_at: Optional[datetime] = Field(default=None)
@@ -105,27 +88,21 @@ class AttackerIntel(SQLModel, table=True):
# IntelLifter keys ATT&CK techniques on ``threat_type``, the canonical # IntelLifter keys ATT&CK techniques on ``threat_type``, the canonical
# taxonomy field per ThreatFox's API. # taxonomy field per ThreatFox's API.
threatfox_listed: Optional[bool] = Field(default=None) threatfox_listed: Optional[bool] = Field(default=None)
threatfox_threat_types: str = Field( threatfox_threat_types: list[str] = Field(
default="[]", default_factory=list,
sa_column=Column( sa_column=Column("threatfox_threat_types", JSON, nullable=False),
"threatfox_threat_types", _BIG_TEXT, nullable=False, default="[]", )
), threatfox_ioc_types: list[str] = Field(
) # JSON list[str] default_factory=list,
threatfox_ioc_types: str = Field( sa_column=Column("threatfox_ioc_types", JSON, nullable=False),
default="[]", )
sa_column=Column( threatfox_malware_families: list[str] = Field(
"threatfox_ioc_types", _BIG_TEXT, nullable=False, default="[]", default_factory=list,
), sa_column=Column("threatfox_malware_families", JSON, nullable=False),
) # JSON list[str] )
threatfox_malware_families: str = Field( threatfox_raw: dict[str, Any] = Field(
default="[]", default_factory=dict,
sa_column=Column( sa_column=Column("threatfox_raw", JSON, nullable=False),
"threatfox_malware_families", _BIG_TEXT, nullable=False, default="[]",
),
) # JSON list[str]
threatfox_raw: str = Field(
default="{}",
sa_column=Column("threatfox_raw", _BIG_TEXT, nullable=False, default="{}"),
) )
threatfox_queried_at: Optional[datetime] = Field(default=None) threatfox_queried_at: Optional[datetime] = Field(default=None)
@@ -166,21 +143,19 @@ class AttackerIntel(SQLModel, table=True):
"aggregate_verdict": self.aggregate_verdict, "aggregate_verdict": self.aggregate_verdict,
# AbuseIPDB # AbuseIPDB
"abuseipdb_score": self.abuseipdb_score, "abuseipdb_score": self.abuseipdb_score,
"abuseipdb_categories": _decode_json_list(self.abuseipdb_categories), "abuseipdb_categories": self.abuseipdb_categories,
# GreyNoise # GreyNoise
"greynoise_classification": self.greynoise_classification, "greynoise_classification": self.greynoise_classification,
"greynoise_name": self.greynoise_name, "greynoise_name": self.greynoise_name,
"greynoise_tags": _decode_json_list(self.greynoise_tags), "greynoise_tags": self.greynoise_tags,
# Feodo # Feodo
"feodo_listed": self.feodo_listed, "feodo_listed": self.feodo_listed,
"feodo_malware_family": self.feodo_malware_family, "feodo_malware_family": self.feodo_malware_family,
# ThreatFox # ThreatFox
"threatfox_listed": self.threatfox_listed, "threatfox_listed": self.threatfox_listed,
"threatfox_threat_types": _decode_json_list(self.threatfox_threat_types), "threatfox_threat_types": self.threatfox_threat_types,
"threatfox_ioc_types": _decode_json_list(self.threatfox_ioc_types), "threatfox_ioc_types": self.threatfox_ioc_types,
"threatfox_malware_families": _decode_json_list( "threatfox_malware_families": self.threatfox_malware_families,
self.threatfox_malware_families
),
} }
if providers is not None: if providers is not None:
d["providers"] = providers d["providers"] = providers

View File

@@ -7,7 +7,6 @@ worker query.
""" """
from __future__ import annotations from __future__ import annotations
import json
import uuid as _uuid import uuid as _uuid
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any, Optional from typing import Any, Optional
@@ -67,28 +66,7 @@ class AttackerIntelMixin(_MixinBase):
row = result.scalar_one_or_none() row = result.scalar_one_or_none()
if not row: if not row:
return None return None
d = row.model_dump(mode="json") return row.model_dump(mode="json")
# Two passes: ``*_raw`` columns hold provider response blobs
# (objects); the per-provider taxonomy columns hold JSON
# arrays the IntelLifter consumes as native lists.
for key in (
"greynoise_raw",
"abuseipdb_raw",
"feodo_raw",
"threatfox_raw",
"greynoise_tags",
"abuseipdb_categories",
"threatfox_threat_types",
"threatfox_ioc_types",
"threatfox_malware_families",
):
raw = d.get(key)
if isinstance(raw, str):
try:
d[key] = json.loads(raw)
except (json.JSONDecodeError, TypeError):
pass
return d
async def get_unenriched_attackers( async def get_unenriched_attackers(
self, limit: int = 100, self, limit: int = 100,

View File

@@ -114,22 +114,10 @@ class AttackersCoreMixin(_MixinBase):
async with self._session() as session: async with self._session() as session:
rows = (await session.execute(stmt)).all() rows = (await session.execute(stmt)).all()
_intel_raw_keys = ("greynoise_raw", "abuseipdb_raw", "feodo_raw", "threatfox_raw")
result = [] result = []
for attacker, intel in rows: for attacker, intel in rows:
d = self._deserialize_attacker(attacker.model_dump(mode="json")) d = self._deserialize_attacker(attacker.model_dump(mode="json"))
if intel is not None: d["threat_intel"] = intel.model_dump(mode="json") if intel is not None else None
intel_d = intel.model_dump(mode="json")
for key in _intel_raw_keys:
raw = intel_d.get(key)
if isinstance(raw, str):
try:
intel_d[key] = json.loads(raw)
except (json.JSONDecodeError, TypeError):
pass
d["threat_intel"] = intel_d
else:
d["threat_intel"] = None
result.append(d) result.append(d)
return result return result

View File

@@ -117,13 +117,14 @@ decnet = "decnet.cli:app"
asyncio_mode = "auto" asyncio_mode = "auto"
asyncio_debug = "true" asyncio_debug = "true"
asyncio_default_fixture_loop_scope = "module" asyncio_default_fixture_loop_scope = "module"
addopts = "-v -q -x -n logical --dist load" addopts = "-v -q -x -n 4 --dist load"
norecursedirs = [ norecursedirs = [
"tests/live", "tests/live",
"tests/stress", "tests/stress",
"tests/service_testing", "tests/service_testing",
"tests/docker", "tests/docker",
"tests/perf", "tests/perf",
"tests/api",
"__pycache__", "__pycache__",
".git", ".git",
"node_modules", "node_modules",

View File

@@ -1,7 +1,6 @@
"""Unit tests for the AbuseIPDB provider.""" """Unit tests for the AbuseIPDB provider."""
from __future__ import annotations from __future__ import annotations
import json
import httpx import httpx
import pytest import pytest
@@ -71,8 +70,7 @@ async def test_high_score_maps_to_malicious(monkeypatch):
result = await provider.lookup("1.2.3.4") result = await provider.lookup("1.2.3.4")
assert result.verdict == "malicious" assert result.verdict == "malicious"
assert result.column_updates["abuseipdb_score"] == 92 assert result.column_updates["abuseipdb_score"] == 92
raw = json.loads(result.column_updates["abuseipdb_raw"]) assert result.column_updates["abuseipdb_raw"]["countryCode"] == "RU"
assert raw["countryCode"] == "RU"
# Key header sent, query params correct. # Key header sent, query params correct.
req = captured[0] req = captured[0]
assert req.headers["key"] == "k3y" assert req.headers["key"] == "k3y"
@@ -120,8 +118,7 @@ async def test_categories_flattened_from_reports(monkeypatch):
_install_transport(handler) _install_transport(handler)
provider = AbuseIPDBProvider() provider = AbuseIPDBProvider()
result = await provider.lookup("1.2.3.4") result = await provider.lookup("1.2.3.4")
cats = json.loads(result.column_updates["abuseipdb_categories"]) assert result.column_updates["abuseipdb_categories"] == [14, 18, 21, 22]
assert cats == [14, 18, 21, 22]
@pytest.mark.anyio @pytest.mark.anyio
@@ -136,7 +133,7 @@ async def test_categories_empty_when_no_reports(monkeypatch):
_install_transport(handler) _install_transport(handler)
provider = AbuseIPDBProvider() provider = AbuseIPDBProvider()
result = await provider.lookup("8.8.8.8") result = await provider.lookup("8.8.8.8")
assert json.loads(result.column_updates["abuseipdb_categories"]) == [] assert result.column_updates["abuseipdb_categories"] == []
@pytest.mark.anyio @pytest.mark.anyio

View File

@@ -77,7 +77,7 @@ async def test_partial_provider_update_preserves_others(repo):
_intel_payload( _intel_payload(
attacker_uuid=a_uuid, ip="9.9.9.9", attacker_uuid=a_uuid, ip="9.9.9.9",
greynoise_classification="malicious", greynoise_classification="malicious",
greynoise_raw='{"classification":"malicious"}', greynoise_raw={"classification": "malicious"},
greynoise_queried_at=datetime.now(timezone.utc), greynoise_queried_at=datetime.now(timezone.utc),
) )
) )
@@ -87,7 +87,7 @@ async def test_partial_provider_update_preserves_others(repo):
_intel_payload( _intel_payload(
attacker_uuid=a_uuid, ip="9.9.9.9", attacker_uuid=a_uuid, ip="9.9.9.9",
abuseipdb_score=85, abuseipdb_score=85,
abuseipdb_raw='{"abuseConfidenceScore":85}', abuseipdb_raw={"abuseConfidenceScore": 85},
abuseipdb_queried_at=datetime.now(timezone.utc), abuseipdb_queried_at=datetime.now(timezone.utc),
) )
) )

View File

@@ -10,7 +10,6 @@ subsequent ``lookup`` calls hit memory. We assert:
""" """
from __future__ import annotations from __future__ import annotations
import json
import httpx import httpx
import pytest import pytest
@@ -56,8 +55,7 @@ async def test_listed_ip_yields_malicious_verdict():
result = await provider.lookup("9.9.9.9") result = await provider.lookup("9.9.9.9")
assert result.verdict == "malicious" assert result.verdict == "malicious"
assert result.column_updates["feodo_listed"] is True assert result.column_updates["feodo_listed"] is True
raw = json.loads(result.column_updates["feodo_raw"]) assert result.column_updates["feodo_raw"]["malware"] == "TrickBot"
assert raw["malware"] == "TrickBot"
assert len(captured) == 1 assert len(captured) == 1

View File

@@ -11,7 +11,6 @@ Mocks httpx via ``MockTransport`` and asserts:
""" """
from __future__ import annotations from __future__ import annotations
import json
import httpx import httpx
import pytest import pytest
@@ -61,8 +60,7 @@ async def test_malicious_classification_maps_to_verdict():
assert result.error is None assert result.error is None
assert result.verdict == "malicious" assert result.verdict == "malicious"
assert result.column_updates["greynoise_classification"] == "malicious" assert result.column_updates["greynoise_classification"] == "malicious"
raw = json.loads(result.column_updates["greynoise_raw"]) assert result.column_updates["greynoise_raw"]["name"] == "Mirai-like"
assert raw["name"] == "Mirai-like"
assert "1.2.3.4" in str(captured[0].url) assert "1.2.3.4" in str(captured[0].url)
# No DECNET label leaks in the UA. # No DECNET label leaks in the UA.
assert "decnet" not in captured[0].headers["user-agent"].lower() assert "decnet" not in captured[0].headers["user-agent"].lower()
@@ -146,8 +144,7 @@ async def test_actor_name_and_tags_persisted_when_present():
_install_transport(provider, handler) _install_transport(provider, handler)
result = await provider.lookup("1.2.3.4") result = await provider.lookup("1.2.3.4")
assert result.column_updates["greynoise_name"] == "Tor" assert result.column_updates["greynoise_name"] == "Tor"
tags = json.loads(result.column_updates["greynoise_tags"]) assert result.column_updates["greynoise_tags"] == ["tor_exit_node", "ssh_bruteforcer"]
assert tags == ["tor_exit_node", "ssh_bruteforcer"]
@pytest.mark.anyio @pytest.mark.anyio
@@ -159,7 +156,7 @@ async def test_404_clears_actor_and_tags():
_install_transport(provider, handler) _install_transport(provider, handler)
result = await provider.lookup("10.0.0.5") result = await provider.lookup("10.0.0.5")
assert result.column_updates["greynoise_name"] is None assert result.column_updates["greynoise_name"] is None
assert result.column_updates["greynoise_tags"] == "[]" assert result.column_updates["greynoise_tags"] == []
@pytest.mark.anyio @pytest.mark.anyio

View File

@@ -56,7 +56,7 @@ async def test_match_returns_malicious(monkeypatch):
result = await provider.lookup("1.2.3.4") result = await provider.lookup("1.2.3.4")
assert result.verdict == "malicious" assert result.verdict == "malicious"
assert result.column_updates["threatfox_listed"] is True assert result.column_updates["threatfox_listed"] is True
raw = json.loads(result.column_updates["threatfox_raw"]) raw = result.column_updates["threatfox_raw"]
assert raw[0]["malware"] == "Cobalt Strike" assert raw[0]["malware"] == "Cobalt Strike"
# No Auth-Key when none configured. # No Auth-Key when none configured.
assert "auth-key" not in {h.lower() for h in captured[0].headers} assert "auth-key" not in {h.lower() for h in captured[0].headers}
@@ -134,11 +134,9 @@ async def test_threat_types_and_ioc_types_flattened(monkeypatch):
provider = ThreatFoxProvider() provider = ThreatFoxProvider()
result = await provider.lookup("1.2.3.4") result = await provider.lookup("1.2.3.4")
cu = result.column_updates cu = result.column_updates
assert json.loads(cu["threatfox_threat_types"]) == [ assert cu["threatfox_threat_types"] == ["botnet_cc", "payload_delivery"]
"botnet_cc", "payload_delivery", assert cu["threatfox_ioc_types"] == ["ip:port", "url"]
] assert cu["threatfox_malware_families"] == ["Emotet", "Sliver"]
assert json.loads(cu["threatfox_ioc_types"]) == ["ip:port", "url"]
assert json.loads(cu["threatfox_malware_families"]) == ["Emotet", "Sliver"]
@pytest.mark.anyio @pytest.mark.anyio
@@ -150,9 +148,9 @@ async def test_no_result_clears_taxonomy_columns():
provider = ThreatFoxProvider() provider = ThreatFoxProvider()
result = await provider.lookup("8.8.8.8") result = await provider.lookup("8.8.8.8")
cu = result.column_updates cu = result.column_updates
assert cu["threatfox_threat_types"] == "[]" assert cu["threatfox_threat_types"] == []
assert cu["threatfox_ioc_types"] == "[]" assert cu["threatfox_ioc_types"] == []
assert cu["threatfox_malware_families"] == "[]" assert cu["threatfox_malware_families"] == []
@pytest.mark.anyio @pytest.mark.anyio

View File

@@ -12,7 +12,6 @@ Covers — without any real provider impls — that the loop:
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import json
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Optional from typing import Optional
@@ -128,7 +127,7 @@ async def test_fan_out_writes_aggregate_row(repo):
verdict="benign", verdict="benign",
column_updates={ column_updates={
"greynoise_classification": "benign", "greynoise_classification": "benign",
"greynoise_raw": json.dumps({"classification": "benign"}), "greynoise_raw": {"classification": "benign"},
"greynoise_queried_at": datetime.now(timezone.utc), "greynoise_queried_at": datetime.now(timezone.utc),
}, },
) )
@@ -137,7 +136,7 @@ async def test_fan_out_writes_aggregate_row(repo):
verdict="malicious", verdict="malicious",
column_updates={ column_updates={
"abuseipdb_score": 90, "abuseipdb_score": 90,
"abuseipdb_raw": json.dumps({"abuseConfidenceScore": 90}), "abuseipdb_raw": {"abuseConfidenceScore": 90},
"abuseipdb_queried_at": datetime.now(timezone.utc), "abuseipdb_queried_at": datetime.now(timezone.utc),
}, },
) )
@@ -178,7 +177,7 @@ async def test_provider_error_does_not_poison_row(repo):
verdict="benign", verdict="benign",
column_updates={ column_updates={
"greynoise_classification": "benign", "greynoise_classification": "benign",
"greynoise_raw": "{}", "greynoise_raw": {},
"greynoise_queried_at": datetime.now(timezone.utc), "greynoise_queried_at": datetime.now(timezone.utc),
}, },
) )
@@ -234,7 +233,7 @@ async def test_intel_enriched_event_published_to_bus(repo, monkeypatch):
verdict="malicious", verdict="malicious",
column_updates={ column_updates={
"greynoise_classification": "malicious", "greynoise_classification": "malicious",
"greynoise_raw": "{}", "greynoise_raw": {},
"greynoise_queried_at": datetime.now(timezone.utc), "greynoise_queried_at": datetime.now(timezone.utc),
}, },
) )

View File

@@ -98,26 +98,22 @@ async def test_intel_worker_publishes_intel_enriched(
def test_build_intel_event_payload_projects_taxonomy_fields() -> None: def test_build_intel_event_payload_projects_taxonomy_fields() -> None:
"""Post-2026-05-02 audit: the bus payload now carries the per- """The bus payload carries the per-provider taxonomy fields the
provider taxonomy fields the IntelLifter needs (categories, tags, IntelLifter needs (categories, tags, threat_types) as native lists.
threat_types). JSON-string columns are decoded back to native
lists so the consumer does not have to know about storage shape.
""" """
import json as _json
row = { row = {
"aggregate_verdict": "malicious", "aggregate_verdict": "malicious",
"abuseipdb_score": 87, "abuseipdb_score": 87,
"abuseipdb_categories": _json.dumps([14, 18, 22]), "abuseipdb_categories": [14, 18, 22],
"greynoise_classification": "malicious", "greynoise_classification": "malicious",
"greynoise_name": "Mirai", "greynoise_name": "Mirai",
"greynoise_tags": _json.dumps(["ssh_bruteforcer"]), "greynoise_tags": ["ssh_bruteforcer"],
"feodo_listed": True, "feodo_listed": True,
"feodo_malware_family": "Emotet", "feodo_malware_family": "Emotet",
"threatfox_listed": True, "threatfox_listed": True,
"threatfox_threat_types": _json.dumps(["botnet_cc"]), "threatfox_threat_types": ["botnet_cc"],
"threatfox_ioc_types": _json.dumps(["ip:port"]), "threatfox_ioc_types": ["ip:port"],
"threatfox_malware_families": _json.dumps(["Sliver"]), "threatfox_malware_families": ["Sliver"],
} }
payload = _iw._build_intel_event_payload( payload = _iw._build_intel_event_payload(
"att-2", "203.0.113.7", row, [_FakeProvider()], "att-2", "203.0.113.7", row, [_FakeProvider()],