On every attacker.session.ended event, the TTP worker now reads the persisted AttackerIntel row (if any) and synthesizes an intel-source TaggerEvent so intel-derived tags emit even when attacker.intel.enriched was dropped or arrived before the worker started. Key changes: - AttackerIntel.to_intel_event_payload() — single source of truth for the intel-row → lifter payload projection; shared by future callers without importing decnet.intel.* (no-SPOF contract preserved). - BaseRepository.get_attacker_intel_row_by_uuid() — returns the live SQLModel instance so the catch-up path can call to_intel_event_payload(). - _build_intel_catchup_event() in ttp/worker.py — looks up the intel row, builds the TaggerEvent, returns None on absent row (silence, not error). - _process_event() extended: appends the catch-up event to tagger_events when topic contains "session.ended". Deterministic source_id keeps compute_tag_uuid idempotent across replays; INSERT OR IGNORE deduplicates against any prior attacker.intel.enriched path. DummyRepo stub + coverage call added per feedback_run_base_repo_test.md.
188 lines
8.5 KiB
Python
188 lines
8.5 KiB
Python
"""Threat-intel enrichment row — one per attacker IP, TTL-cached."""
|
|
import json as _json
|
|
from datetime import datetime, timezone
|
|
from typing import Any, Optional
|
|
|
|
from sqlalchemy import Column
|
|
from sqlmodel import Field, SQLModel
|
|
|
|
from ._base import _BIG_TEXT
|
|
|
|
|
|
def _decode_json_list(value: Any) -> list[Any]:
|
|
if isinstance(value, list):
|
|
return value
|
|
if isinstance(value, str) and value:
|
|
try:
|
|
decoded = _json.loads(value)
|
|
except (_json.JSONDecodeError, TypeError):
|
|
return []
|
|
return decoded if isinstance(decoded, list) else []
|
|
return []
|
|
|
|
|
|
class AttackerIntel(SQLModel, table=True):
|
|
"""Aggregated threat-intel verdict for a single attacker IP.
|
|
|
|
Populated by the ``decnet enrich`` worker, which queries multiple
|
|
free-tier intel providers (GreyNoise Community, AbuseIPDB,
|
|
abuse.ch Feodo Tracker + ThreatFox) and writes one row per
|
|
attacker IP. The row is TTL-cached via ``expires_at`` so re-firings
|
|
inside the cache window short-circuit before any HTTP egress.
|
|
|
|
Per-provider columns are nullable until each provider has answered;
|
|
the enrichment pass writes whichever providers succeeded and leaves
|
|
the rest unchanged on a partial failure.
|
|
|
|
``schema_version`` is committed to storage from day one — federation
|
|
gossip in v2/v3 requires cross-operator compatibility, and
|
|
retrofitting a version column after rows exist is painful. Mirrors
|
|
the rationale on :class:`AttackerIdentity`'s ``schema_version``.
|
|
"""
|
|
|
|
__tablename__ = "attacker_intel"
|
|
|
|
uuid: str = Field(primary_key=True) # uuid.uuid4().hex, generated by writer
|
|
# Canonical key. One intel row per attacker UUID; FK guarantees no orphan
|
|
# rows when an attacker is deleted, and UNIQUE keeps upserts honest.
|
|
attacker_uuid: str = Field(
|
|
foreign_key="attackers.uuid",
|
|
unique=True,
|
|
index=True,
|
|
)
|
|
# DENORMALISED — NOT a key. The IP the worker queried providers with at
|
|
# write time. Useful for SIEM payloads and audit lookups; updated on every
|
|
# upsert if the attacker rotates IPs. Never use this column as a lookup
|
|
# key; ``attacker_uuid`` is the only canonical identifier here.
|
|
attacker_ip: str = Field(index=True)
|
|
schema_version: int = Field(default=1)
|
|
|
|
# ── GreyNoise Community ─────────────────────────────────────────────
|
|
# classification ∈ {"benign", "malicious", "suspicious", "unknown"}.
|
|
# The Community endpoint does not return tags; ``greynoise_tags`` stays
|
|
# empty unless an operator wires a non-Community provider that does.
|
|
greynoise_classification: Optional[str] = Field(default=None, max_length=32)
|
|
greynoise_name: Optional[str] = Field(default=None, max_length=128)
|
|
greynoise_tags: str = Field(
|
|
default="[]",
|
|
sa_column=Column("greynoise_tags", _BIG_TEXT, nullable=False, default="[]"),
|
|
) # JSON list[str] — behavioral / actor tags
|
|
greynoise_raw: str = Field(
|
|
default="{}",
|
|
sa_column=Column("greynoise_raw", _BIG_TEXT, nullable=False, default="{}"),
|
|
)
|
|
greynoise_queried_at: Optional[datetime] = Field(default=None)
|
|
|
|
# ── AbuseIPDB ────────────────────────────────────────────────────────
|
|
# 0..100 abuse confidence score
|
|
abuseipdb_score: Optional[int] = Field(default=None)
|
|
abuseipdb_categories: str = Field(
|
|
default="[]",
|
|
sa_column=Column(
|
|
"abuseipdb_categories", _BIG_TEXT, nullable=False, default="[]",
|
|
),
|
|
) # JSON list[int] — flattened set of categories across recent reports
|
|
abuseipdb_raw: str = Field(
|
|
default="{}",
|
|
sa_column=Column("abuseipdb_raw", _BIG_TEXT, nullable=False, default="{}"),
|
|
)
|
|
abuseipdb_queried_at: Optional[datetime] = Field(default=None)
|
|
|
|
# ── abuse.ch Feodo Tracker ───────────────────────────────────────────
|
|
feodo_listed: Optional[bool] = Field(default=None)
|
|
feodo_malware_family: Optional[str] = Field(default=None, max_length=64)
|
|
feodo_raw: str = Field(
|
|
default="{}",
|
|
sa_column=Column("feodo_raw", _BIG_TEXT, nullable=False, default="{}"),
|
|
)
|
|
feodo_queried_at: Optional[datetime] = Field(default=None)
|
|
|
|
# ── abuse.ch ThreatFox ───────────────────────────────────────────────
|
|
# ThreatFox returns a list of matches for a queried IP. Each match has
|
|
# a ``threat_type`` (botnet_cc / payload_delivery / payload /
|
|
# cc_skimming) and an ``ioc_type`` (url / domain / ip:port / hash
|
|
# variants). We flatten the unique sets across all matches; the
|
|
# IntelLifter keys ATT&CK techniques on ``threat_type``, the canonical
|
|
# taxonomy field per ThreatFox's API.
|
|
threatfox_listed: Optional[bool] = Field(default=None)
|
|
threatfox_threat_types: str = Field(
|
|
default="[]",
|
|
sa_column=Column(
|
|
"threatfox_threat_types", _BIG_TEXT, nullable=False, default="[]",
|
|
),
|
|
) # JSON list[str]
|
|
threatfox_ioc_types: str = Field(
|
|
default="[]",
|
|
sa_column=Column(
|
|
"threatfox_ioc_types", _BIG_TEXT, nullable=False, default="[]",
|
|
),
|
|
) # JSON list[str]
|
|
threatfox_malware_families: str = Field(
|
|
default="[]",
|
|
sa_column=Column(
|
|
"threatfox_malware_families", _BIG_TEXT, nullable=False, default="[]",
|
|
),
|
|
) # JSON list[str]
|
|
threatfox_raw: str = Field(
|
|
default="{}",
|
|
sa_column=Column("threatfox_raw", _BIG_TEXT, nullable=False, default="{}"),
|
|
)
|
|
threatfox_queried_at: Optional[datetime] = Field(default=None)
|
|
|
|
# ── Aggregate verdict ────────────────────────────────────────────────
|
|
# Synthesised from per-provider columns. ∈ {"malicious", "suspicious",
|
|
# "benign", "unknown"}. Used by the dashboard and webhook consumers
|
|
# that don't want to reason over four provider columns.
|
|
aggregate_verdict: Optional[str] = Field(
|
|
default=None, max_length=32, index=True
|
|
)
|
|
|
|
# ── TTL bookkeeping ──────────────────────────────────────────────────
|
|
cached_at: datetime = Field(
|
|
default_factory=lambda: datetime.now(timezone.utc), index=True
|
|
)
|
|
expires_at: datetime = Field(index=True)
|
|
|
|
def to_intel_event_payload(
|
|
self,
|
|
*,
|
|
providers: Optional[list[str]] = None,
|
|
) -> dict[str, Any]:
|
|
"""Project this row into the payload shape the IntelLifter consumes.
|
|
|
|
Called by both the intel worker (on live publish of
|
|
``attacker.intel.enriched``) and the TTP worker (on
|
|
``attacker.session.ended`` catch-up). The two callers produce
|
|
identical payloads for the same row, so IntelLifter tag UUIDs
|
|
are deterministic regardless of which path delivered them.
|
|
|
|
``providers`` is included when the intel worker knows which
|
|
providers contributed; the TTP catch-up path omits it (the
|
|
IntelLifter does not predicate on ``providers``).
|
|
"""
|
|
d: dict[str, Any] = {
|
|
"attacker_uuid": self.attacker_uuid,
|
|
"attacker_ip": self.attacker_ip,
|
|
"aggregate_verdict": self.aggregate_verdict,
|
|
# AbuseIPDB
|
|
"abuseipdb_score": self.abuseipdb_score,
|
|
"abuseipdb_categories": _decode_json_list(self.abuseipdb_categories),
|
|
# GreyNoise
|
|
"greynoise_classification": self.greynoise_classification,
|
|
"greynoise_name": self.greynoise_name,
|
|
"greynoise_tags": _decode_json_list(self.greynoise_tags),
|
|
# Feodo
|
|
"feodo_listed": self.feodo_listed,
|
|
"feodo_malware_family": self.feodo_malware_family,
|
|
# ThreatFox
|
|
"threatfox_listed": self.threatfox_listed,
|
|
"threatfox_threat_types": _decode_json_list(self.threatfox_threat_types),
|
|
"threatfox_ioc_types": _decode_json_list(self.threatfox_ioc_types),
|
|
"threatfox_malware_families": _decode_json_list(
|
|
self.threatfox_malware_families
|
|
),
|
|
}
|
|
if providers is not None:
|
|
d["providers"] = providers
|
|
return d
|