Files
DECNET/decnet/web/db/sqlmodel_repo/attacker_intel.py
anti 9a7b03700c refactor(intel): migrate AttackerIntel JSON-string columns to native SQLAlchemy JSON
Five list columns (greynoise_tags, abuseipdb_categories, threatfox_threat_types,
threatfox_ioc_types, threatfox_malware_families) and four dict columns
(*_raw) are now Column(JSON) with list/dict type annotations and
default_factory=list/dict. Providers return native Python objects; the
application-layer json.dumps/json.loads round-trip and _decode_json_list
helpers are gone. to_intel_event_payload() reads columns directly.

Also caps pytest xdist at -n 4 and excludes tests/api from norecursedirs
to prevent schemathesis workers from OOM-killing the dev loop.
2026-05-10 09:17:15 -04:00

102 lines
3.5 KiB
Python

"""Attacker-intel domain methods.
Owns reads/writes for ``AttackerIntel`` rows: per-attacker enrichment
data sourced from external providers (GreyNoise, AbuseIPDB, Feodo,
ThreatFox). Joined against ``Attacker`` for the unenriched-backlog
worker query.
"""
from __future__ import annotations
import uuid as _uuid
from datetime import datetime, timezone
from typing import Any, Optional
from sqlalchemy import desc, or_, select
from sqlmodel import col
from decnet.web.db.models import Attacker, AttackerIntel
from decnet.web.db.sqlmodel_repo._helpers import _MixinBase
class AttackerIntelMixin(_MixinBase):
"""Mixin: methods composed onto ``SQLModelRepository``.
Expects ``self._session()`` from the base.
"""
async def upsert_attacker_intel(self, data: dict[str, Any]) -> str:
attacker_uuid_value = data["attacker_uuid"]
async with self._session() as session:
result = await session.execute(
select(AttackerIntel).where(
AttackerIntel.attacker_uuid == attacker_uuid_value,
)
)
existing = result.scalar_one_or_none()
if existing:
for k, v in data.items():
setattr(existing, k, v)
session.add(existing)
row_uuid = existing.uuid
else:
row_uuid = _uuid.uuid4().hex
session.add(AttackerIntel(uuid=row_uuid, **data))
await session.commit()
return row_uuid
async def get_attacker_intel_row_by_uuid(
self,
uuid: str,
) -> Optional[AttackerIntel]:
async with self._session() as session:
result = await session.execute(
select(AttackerIntel).where(AttackerIntel.attacker_uuid == uuid)
)
return result.scalar_one_or_none()
async def get_attacker_intel_by_uuid(
self,
uuid: str,
) -> Optional[dict[str, Any]]:
async with self._session() as session:
result = await session.execute(
select(AttackerIntel).where(AttackerIntel.attacker_uuid == uuid)
)
row = result.scalar_one_or_none()
if not row:
return None
return row.model_dump(mode="json")
async def get_unenriched_attackers(
self, limit: int = 100,
) -> list[dict[str, Any]]:
"""``{"uuid", "ip"}`` pairs with no intel row OR a stale (expired) one.
Stale = ``expires_at < now``. Ordered by ``attackers.last_seen`` desc
so the worker prioritises recent activity on backfill. Both columns
are projected so the worker can write keyed on UUID and dispatch
provider calls keyed on IP without a second round-trip.
"""
now = datetime.now(timezone.utc)
async with self._session() as session:
stmt = (
select(col(Attacker.uuid), col(Attacker.ip))
.outerjoin(
AttackerIntel, AttackerIntel.attacker_uuid == Attacker.uuid,
)
.where(
or_(
col(AttackerIntel.uuid).is_(None),
AttackerIntel.expires_at < now,
)
)
.order_by(desc(Attacker.last_seen))
.limit(limit)
)
result = await session.execute(stmt)
return [
{"uuid": uuid_, "ip": ip}
for uuid_, ip in result.all()
]