"""Threat-intel enrichment row — one per attacker IP, TTL-cached.""" import json as _json from datetime import datetime, timezone from typing import Any, Optional from sqlalchemy import Column from sqlmodel import Field, SQLModel from ._base import _BIG_TEXT def _decode_json_list(value: Any) -> list[Any]: if isinstance(value, list): return value if isinstance(value, str) and value: try: decoded = _json.loads(value) except (_json.JSONDecodeError, TypeError): return [] return decoded if isinstance(decoded, list) else [] return [] class AttackerIntel(SQLModel, table=True): """Aggregated threat-intel verdict for a single attacker IP. Populated by the ``decnet enrich`` worker, which queries multiple free-tier intel providers (GreyNoise Community, AbuseIPDB, abuse.ch Feodo Tracker + ThreatFox) and writes one row per attacker IP. The row is TTL-cached via ``expires_at`` so re-firings inside the cache window short-circuit before any HTTP egress. Per-provider columns are nullable until each provider has answered; the enrichment pass writes whichever providers succeeded and leaves the rest unchanged on a partial failure. ``schema_version`` is committed to storage from day one — federation gossip in v2/v3 requires cross-operator compatibility, and retrofitting a version column after rows exist is painful. Mirrors the rationale on :class:`AttackerIdentity`'s ``schema_version``. """ __tablename__ = "attacker_intel" uuid: str = Field(primary_key=True) # uuid.uuid4().hex, generated by writer # Canonical key. One intel row per attacker UUID; FK guarantees no orphan # rows when an attacker is deleted, and UNIQUE keeps upserts honest. attacker_uuid: str = Field( foreign_key="attackers.uuid", unique=True, index=True, ) # DENORMALISED — NOT a key. The IP the worker queried providers with at # write time. Useful for SIEM payloads and audit lookups; updated on every # upsert if the attacker rotates IPs. Never use this column as a lookup # key; ``attacker_uuid`` is the only canonical identifier here. attacker_ip: str = Field(index=True) schema_version: int = Field(default=1) # ── GreyNoise Community ───────────────────────────────────────────── # classification ∈ {"benign", "malicious", "suspicious", "unknown"}. # The Community endpoint does not return tags; ``greynoise_tags`` stays # empty unless an operator wires a non-Community provider that does. greynoise_classification: Optional[str] = Field(default=None, max_length=32) greynoise_name: Optional[str] = Field(default=None, max_length=128) greynoise_tags: str = Field( default="[]", sa_column=Column("greynoise_tags", _BIG_TEXT, nullable=False, default="[]"), ) # JSON list[str] — behavioral / actor tags greynoise_raw: str = Field( default="{}", sa_column=Column("greynoise_raw", _BIG_TEXT, nullable=False, default="{}"), ) greynoise_queried_at: Optional[datetime] = Field(default=None) # ── AbuseIPDB ──────────────────────────────────────────────────────── # 0..100 abuse confidence score abuseipdb_score: Optional[int] = Field(default=None) abuseipdb_categories: str = Field( default="[]", sa_column=Column( "abuseipdb_categories", _BIG_TEXT, nullable=False, default="[]", ), ) # JSON list[int] — flattened set of categories across recent reports abuseipdb_raw: str = Field( default="{}", sa_column=Column("abuseipdb_raw", _BIG_TEXT, nullable=False, default="{}"), ) abuseipdb_queried_at: Optional[datetime] = Field(default=None) # ── abuse.ch Feodo Tracker ─────────────────────────────────────────── feodo_listed: Optional[bool] = Field(default=None) feodo_malware_family: Optional[str] = Field(default=None, max_length=64) feodo_raw: str = Field( default="{}", sa_column=Column("feodo_raw", _BIG_TEXT, nullable=False, default="{}"), ) feodo_queried_at: Optional[datetime] = Field(default=None) # ── abuse.ch ThreatFox ─────────────────────────────────────────────── # ThreatFox returns a list of matches for a queried IP. Each match has # a ``threat_type`` (botnet_cc / payload_delivery / payload / # cc_skimming) and an ``ioc_type`` (url / domain / ip:port / hash # variants). We flatten the unique sets across all matches; the # IntelLifter keys ATT&CK techniques on ``threat_type``, the canonical # taxonomy field per ThreatFox's API. threatfox_listed: Optional[bool] = Field(default=None) threatfox_threat_types: str = Field( default="[]", sa_column=Column( "threatfox_threat_types", _BIG_TEXT, nullable=False, default="[]", ), ) # JSON list[str] threatfox_ioc_types: str = Field( default="[]", sa_column=Column( "threatfox_ioc_types", _BIG_TEXT, nullable=False, default="[]", ), ) # JSON list[str] threatfox_malware_families: str = Field( default="[]", sa_column=Column( "threatfox_malware_families", _BIG_TEXT, nullable=False, default="[]", ), ) # JSON list[str] threatfox_raw: str = Field( default="{}", sa_column=Column("threatfox_raw", _BIG_TEXT, nullable=False, default="{}"), ) threatfox_queried_at: Optional[datetime] = Field(default=None) # ── Aggregate verdict ──────────────────────────────────────────────── # Synthesised from per-provider columns. ∈ {"malicious", "suspicious", # "benign", "unknown"}. Used by the dashboard and webhook consumers # that don't want to reason over four provider columns. aggregate_verdict: Optional[str] = Field( default=None, max_length=32, index=True ) # ── TTL bookkeeping ────────────────────────────────────────────────── cached_at: datetime = Field( default_factory=lambda: datetime.now(timezone.utc), index=True ) expires_at: datetime = Field(index=True) def to_intel_event_payload( self, *, providers: Optional[list[str]] = None, ) -> dict[str, Any]: """Project this row into the payload shape the IntelLifter consumes. Called by both the intel worker (on live publish of ``attacker.intel.enriched``) and the TTP worker (on ``attacker.session.ended`` catch-up). The two callers produce identical payloads for the same row, so IntelLifter tag UUIDs are deterministic regardless of which path delivered them. ``providers`` is included when the intel worker knows which providers contributed; the TTP catch-up path omits it (the IntelLifter does not predicate on ``providers``). """ d: dict[str, Any] = { "attacker_uuid": self.attacker_uuid, "attacker_ip": self.attacker_ip, "aggregate_verdict": self.aggregate_verdict, # AbuseIPDB "abuseipdb_score": self.abuseipdb_score, "abuseipdb_categories": _decode_json_list(self.abuseipdb_categories), # GreyNoise "greynoise_classification": self.greynoise_classification, "greynoise_name": self.greynoise_name, "greynoise_tags": _decode_json_list(self.greynoise_tags), # Feodo "feodo_listed": self.feodo_listed, "feodo_malware_family": self.feodo_malware_family, # ThreatFox "threatfox_listed": self.threatfox_listed, "threatfox_threat_types": _decode_json_list(self.threatfox_threat_types), "threatfox_ioc_types": _decode_json_list(self.threatfox_ioc_types), "threatfox_malware_families": _decode_json_list( self.threatfox_malware_families ), } if providers is not None: d["providers"] = providers return d