refactor(intel): re-key attacker_intel on attacker_uuid (closes DEBT-041)
The threat-intel surface was IP-keyed on day one as an expedient — the
worker is woken by IP-bearing bus events. ANTI's call: don't carry that
debt. NO IPs as primary keys anywhere on the attacker-intel surface.
Schema:
- attacker_uuid is now the canonical key — UNIQUE + FK to attackers.uuid.
- attacker_ip stays as a denormalised, indexed, NON-UNIQUE value column.
Updated on every upsert; useful for SIEM payloads and audit lookups,
but explicitly NOT a key. Model docstring says so.
- Pre-v1, no Alembic migration needed. SQLModel.metadata.create_all()
builds the new shape on fresh DBs.
Repo:
- upsert_attacker_intel now keys on attacker_uuid.
- get_attacker_intel_by_ip → get_attacker_intel_by_uuid.
- get_unenriched_attacker_ips → get_unenriched_attackers, returning
[{uuid, ip}] tuples so the worker writes by UUID and dispatches
provider calls by IP without a second round-trip.
Worker:
- _enrich_one(uuid, ip, ...) — UUID lands on the row, IP rides for
provider egress.
- attacker.intel.enriched bus payload gains attacker_uuid alongside
attacker_ip — webhook → SIEM consumers benefit; no removal.
API:
- GET /api/v1/attackers/{ip}/intel deleted outright (rip-and-replace,
never deployed beyond dev).
- GET /api/v1/attackers/{uuid}/intel is the only public route, matching
every other /attackers/* route.
Frontend:
- <IntelPanel uuid={id!} /> uses the URL param directly, fetches in
parallel with the rest of AttackerDetail rather than waiting on
attacker.ip.
Tests: re-keyed in place, 39 passed (same coverage as before the
refactor). Provider-impl tests untouched.
DEBT-041: closed in DEBT.md (entry preserved as historical rationale,
summary table flipped to ✅, remaining-open list shortened by one).
This commit is contained in:
@@ -2,11 +2,12 @@
|
||||
Round-trip tests for the ``attacker_intel`` table and its repo helpers.
|
||||
|
||||
Covers:
|
||||
* empty-write upsert path
|
||||
* per-provider partial update
|
||||
* empty-write upsert path (attacker_uuid as canonical key)
|
||||
* per-provider partial update preserves untouched columns
|
||||
* JSON-blob deserialization on read
|
||||
* TTL bookkeeping (cached_at + expires_at) round-trips intact
|
||||
* ``get_unenriched_attacker_ips`` selects fresh + stale, skips cached
|
||||
* ``get_unenriched_attackers`` returns ``{"uuid", "ip"}`` pairs and
|
||||
selects fresh + stale rows while skipping cached ones
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -24,9 +25,20 @@ async def repo(tmp_path):
|
||||
return r
|
||||
|
||||
|
||||
def _intel_payload(ip: str, *, ttl_hours: int = 24, **overrides) -> dict:
|
||||
async def _seed_attacker(repo, ip: str) -> str:
|
||||
"""Seed an attackers row and return its UUID (the FK target)."""
|
||||
now = datetime.now(timezone.utc)
|
||||
return await repo.upsert_attacker(
|
||||
{"ip": ip, "first_seen": now, "last_seen": now, "event_count": 1}
|
||||
)
|
||||
|
||||
|
||||
def _intel_payload(
|
||||
*, attacker_uuid: str, ip: str, ttl_hours: int = 24, **overrides
|
||||
) -> dict:
|
||||
now = datetime.now(timezone.utc)
|
||||
base = {
|
||||
"attacker_uuid": attacker_uuid,
|
||||
"attacker_ip": ip,
|
||||
"cached_at": now,
|
||||
"expires_at": now + timedelta(hours=ttl_hours),
|
||||
@@ -37,11 +49,15 @@ def _intel_payload(ip: str, *, ttl_hours: int = 24, **overrides) -> dict:
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_empty_upsert_writes_minimal_row(repo):
|
||||
row_uuid = await repo.upsert_attacker_intel(_intel_payload("1.2.3.4"))
|
||||
a_uuid = await _seed_attacker(repo, "1.2.3.4")
|
||||
row_uuid = await repo.upsert_attacker_intel(
|
||||
_intel_payload(attacker_uuid=a_uuid, ip="1.2.3.4")
|
||||
)
|
||||
assert row_uuid
|
||||
|
||||
row = await repo.get_attacker_intel_by_ip("1.2.3.4")
|
||||
row = await repo.get_attacker_intel_by_uuid(a_uuid)
|
||||
assert row is not None
|
||||
assert row["attacker_uuid"] == a_uuid
|
||||
assert row["attacker_ip"] == "1.2.3.4"
|
||||
assert row["uuid"] == row_uuid
|
||||
assert row["schema_version"] == 1
|
||||
@@ -55,10 +71,11 @@ async def test_empty_upsert_writes_minimal_row(repo):
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_partial_provider_update_preserves_others(repo):
|
||||
a_uuid = await _seed_attacker(repo, "9.9.9.9")
|
||||
# First pass: GreyNoise responds, others lag.
|
||||
first_uuid = await repo.upsert_attacker_intel(
|
||||
_intel_payload(
|
||||
"9.9.9.9",
|
||||
attacker_uuid=a_uuid, ip="9.9.9.9",
|
||||
greynoise_classification="malicious",
|
||||
greynoise_raw='{"classification":"malicious"}',
|
||||
greynoise_queried_at=datetime.now(timezone.utc),
|
||||
@@ -68,15 +85,15 @@ async def test_partial_provider_update_preserves_others(repo):
|
||||
# columns — the worker passes only the new fields.
|
||||
second_uuid = await repo.upsert_attacker_intel(
|
||||
_intel_payload(
|
||||
"9.9.9.9",
|
||||
attacker_uuid=a_uuid, ip="9.9.9.9",
|
||||
abuseipdb_score=85,
|
||||
abuseipdb_raw='{"abuseConfidenceScore":85}',
|
||||
abuseipdb_queried_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
assert first_uuid == second_uuid # same row
|
||||
assert first_uuid == second_uuid # same row keyed on attacker_uuid
|
||||
|
||||
row = await repo.get_attacker_intel_by_ip("9.9.9.9")
|
||||
row = await repo.get_attacker_intel_by_uuid(a_uuid)
|
||||
assert row["greynoise_classification"] == "malicious"
|
||||
assert row["greynoise_raw"] == {"classification": "malicious"}
|
||||
assert row["abuseipdb_score"] == 85
|
||||
@@ -85,29 +102,28 @@ async def test_partial_provider_update_preserves_others(repo):
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_get_missing_returns_none(repo):
|
||||
assert await repo.get_attacker_intel_by_ip("0.0.0.0") is None
|
||||
assert await repo.get_attacker_intel_by_uuid("nonexistent-uuid") is None
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_unenriched_selects_fresh_and_stale_ips(repo):
|
||||
# Seed three attackers via upsert_attacker.
|
||||
now = datetime.now(timezone.utc)
|
||||
for ip in ("10.0.0.1", "10.0.0.2", "10.0.0.3"):
|
||||
await repo.upsert_attacker(
|
||||
{
|
||||
"ip": ip,
|
||||
"first_seen": now,
|
||||
"last_seen": now,
|
||||
"event_count": 1,
|
||||
}
|
||||
)
|
||||
async def test_unenriched_returns_uuid_ip_pairs(repo):
|
||||
fresh_uuid = await _seed_attacker(repo, "10.0.0.1")
|
||||
stale_uuid = await _seed_attacker(repo, "10.0.0.2")
|
||||
new_uuid = await _seed_attacker(repo, "10.0.0.3")
|
||||
|
||||
# 10.0.0.1 has fresh intel (not due for refresh).
|
||||
await repo.upsert_attacker_intel(_intel_payload("10.0.0.1", ttl_hours=24))
|
||||
await repo.upsert_attacker_intel(
|
||||
_intel_payload(attacker_uuid=fresh_uuid, ip="10.0.0.1", ttl_hours=24)
|
||||
)
|
||||
# 10.0.0.2 has stale intel (already expired).
|
||||
await repo.upsert_attacker_intel(_intel_payload("10.0.0.2", ttl_hours=-1))
|
||||
await repo.upsert_attacker_intel(
|
||||
_intel_payload(attacker_uuid=stale_uuid, ip="10.0.0.2", ttl_hours=-1)
|
||||
)
|
||||
# 10.0.0.3 has no intel row at all.
|
||||
|
||||
pending = await repo.get_unenriched_attacker_ips(limit=10)
|
||||
assert "10.0.0.1" not in pending # fresh, skipped
|
||||
assert "10.0.0.2" in pending # stale, queue it
|
||||
assert "10.0.0.3" in pending # never enriched
|
||||
pending = await repo.get_unenriched_attackers(limit=10)
|
||||
by_uuid = {entry["uuid"]: entry["ip"] for entry in pending}
|
||||
|
||||
assert fresh_uuid not in by_uuid # fresh, skipped
|
||||
assert by_uuid.get(stale_uuid) == "10.0.0.2" # stale, queue it
|
||||
assert by_uuid.get(new_uuid) == "10.0.0.3" # never enriched
|
||||
|
||||
@@ -7,7 +7,7 @@ Covers — without any real provider impls — that the loop:
|
||||
* fans out across fake providers and writes the aggregate row
|
||||
* aggregate_verdict picks the strongest provider verdict
|
||||
* a provider returning ``error`` is logged but does not poison the row
|
||||
* gates IPs through ``get_unenriched_attacker_ips`` (TTL respected)
|
||||
* gates attackers through ``get_unenriched_attackers`` (TTL respected)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -92,12 +92,17 @@ async def test_loop_exits_on_shutdown_signal(repo):
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
|
||||
|
||||
async def _seed_attacker(repo, ip: str) -> str:
|
||||
"""Seed an attackers row and return its UUID."""
|
||||
now = datetime.now(timezone.utc)
|
||||
return await repo.upsert_attacker(
|
||||
{"ip": ip, "first_seen": now, "last_seen": now, "event_count": 1}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_no_providers_skips_enrichment(repo):
|
||||
now = datetime.now(timezone.utc)
|
||||
await repo.upsert_attacker(
|
||||
{"ip": "1.1.1.1", "first_seen": now, "last_seen": now, "event_count": 1}
|
||||
)
|
||||
a_uuid = await _seed_attacker(repo, "1.1.1.1")
|
||||
shutdown = asyncio.Event()
|
||||
task = asyncio.create_task(
|
||||
run_intel_loop(
|
||||
@@ -110,16 +115,13 @@ async def test_no_providers_skips_enrichment(repo):
|
||||
await asyncio.sleep(0.15)
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
# No row written for 1.1.1.1.
|
||||
assert await repo.get_attacker_intel_by_ip("1.1.1.1") is None
|
||||
# No row written for the seeded attacker.
|
||||
assert await repo.get_attacker_intel_by_uuid(a_uuid) is None
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_fan_out_writes_aggregate_row(repo):
|
||||
now = datetime.now(timezone.utc)
|
||||
await repo.upsert_attacker(
|
||||
{"ip": "2.2.2.2", "first_seen": now, "last_seen": now, "event_count": 1}
|
||||
)
|
||||
a_uuid = await _seed_attacker(repo, "2.2.2.2")
|
||||
|
||||
gn = _FakeProvider(
|
||||
"greynoise",
|
||||
@@ -154,23 +156,22 @@ async def test_fan_out_writes_aggregate_row(repo):
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
|
||||
row = await repo.get_attacker_intel_by_ip("2.2.2.2")
|
||||
row = await repo.get_attacker_intel_by_uuid(a_uuid)
|
||||
assert row is not None
|
||||
assert row["attacker_uuid"] == a_uuid
|
||||
assert row["attacker_ip"] == "2.2.2.2"
|
||||
assert row["greynoise_classification"] == "benign"
|
||||
assert row["abuseipdb_score"] == 90
|
||||
# Strongest verdict wins.
|
||||
assert row["aggregate_verdict"] == "malicious"
|
||||
# Both providers were queried.
|
||||
# Both providers were queried by IP.
|
||||
assert gn.calls == ["2.2.2.2"]
|
||||
assert aip.calls == ["2.2.2.2"]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_provider_error_does_not_poison_row(repo):
|
||||
now = datetime.now(timezone.utc)
|
||||
await repo.upsert_attacker(
|
||||
{"ip": "3.3.3.3", "first_seen": now, "last_seen": now, "event_count": 1}
|
||||
)
|
||||
a_uuid = await _seed_attacker(repo, "3.3.3.3")
|
||||
|
||||
good = _FakeProvider(
|
||||
"greynoise",
|
||||
@@ -196,7 +197,7 @@ async def test_provider_error_does_not_poison_row(repo):
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
|
||||
row = await repo.get_attacker_intel_by_ip("3.3.3.3")
|
||||
row = await repo.get_attacker_intel_by_uuid(a_uuid)
|
||||
assert row is not None
|
||||
assert row["greynoise_classification"] == "benign"
|
||||
# Broken provider's columns stay null; row is still written.
|
||||
@@ -226,10 +227,7 @@ async def test_intel_enriched_event_published_to_bus(repo, monkeypatch):
|
||||
sub = shared_bus.subscribe(attacker(ATTACKER_INTEL_ENRICHED))
|
||||
await sub.__aenter__()
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
await repo.upsert_attacker(
|
||||
{"ip": "4.4.4.4", "first_seen": now, "last_seen": now, "event_count": 1}
|
||||
)
|
||||
a_uuid = await _seed_attacker(repo, "4.4.4.4")
|
||||
|
||||
provider = _FakeProvider(
|
||||
"greynoise",
|
||||
@@ -258,6 +256,7 @@ async def test_intel_enriched_event_published_to_bus(repo, monkeypatch):
|
||||
await sub.__aexit__(None, None, None)
|
||||
|
||||
payload = event.payload
|
||||
assert payload["attacker_uuid"] == a_uuid
|
||||
assert payload["attacker_ip"] == "4.4.4.4"
|
||||
assert payload["aggregate_verdict"] == "malicious"
|
||||
assert payload["providers"] == ["greynoise"]
|
||||
|
||||
Reference in New Issue
Block a user