feat(intel): attacker_intel table + repo helpers

New TTL-cached threat-intel row keyed by attacker IP, with per-provider
verdict/raw/queried_at columns for GreyNoise, AbuseIPDB, abuse.ch Feodo
Tracker and ThreatFox. Carries schema_version from day one (federation
wire-format precedent set by SessionProfile). Repo gains
upsert_attacker_intel, get_attacker_intel_by_ip, and a
get_unenriched_attacker_ips backfill primitive that picks fresh + stale
rows for the forthcoming 'decnet enrich' worker.

Also documents the open-source intel-source backlog in DEVELOPMENT_V2.
This commit is contained in:
2026-04-26 04:56:47 -04:00
parent 9816cdbd53
commit 0dd3811436
7 changed files with 339 additions and 0 deletions

View File

@@ -35,6 +35,9 @@ from .attackers import (
SessionProfile,
SmtpTarget,
)
from .attacker_intel import (
AttackerIntel,
)
from .deploy import (
DeployIniRequest,
DeployResponse,
@@ -157,6 +160,7 @@ __all__ = [
# attackers
"Attacker",
"AttackerBehavior",
"AttackerIntel",
"AttackersResponse",
"SessionProfile",
"SmtpTarget",

View File

@@ -0,0 +1,83 @@
"""Threat-intel enrichment row — one per attacker IP, TTL-cached."""
from datetime import datetime, timezone
from typing import Optional
from sqlalchemy import Column
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
class AttackerIntel(SQLModel, table=True):
"""Aggregated threat-intel verdict for a single attacker IP.
Populated by the ``decnet enrich`` worker, which queries multiple
free-tier intel providers (GreyNoise Community, AbuseIPDB,
abuse.ch Feodo Tracker + ThreatFox) and writes one row per
attacker IP. The row is TTL-cached via ``expires_at`` so re-firings
inside the cache window short-circuit before any HTTP egress.
Per-provider columns are nullable until each provider has answered;
the enrichment pass writes whichever providers succeeded and leaves
the rest unchanged on a partial failure.
``schema_version`` is committed to storage from day one — federation
gossip in v2/v3 requires cross-operator compatibility, and
retrofitting a version column after rows exist is painful. Mirrors
the rationale on :class:`SessionProfile`.
"""
__tablename__ = "attacker_intel"
uuid: str = Field(primary_key=True) # uuid.uuid4().hex, generated by writer
attacker_uuid: Optional[str] = Field(default=None, index=True)
attacker_ip: str = Field(index=True, unique=True)
schema_version: int = Field(default=1)
# ── GreyNoise Community ─────────────────────────────────────────────
# classification ∈ {"benign", "malicious", "suspicious", "unknown"}
greynoise_classification: Optional[str] = Field(default=None, max_length=32)
greynoise_raw: str = Field(
default="{}",
sa_column=Column("greynoise_raw", _BIG_TEXT, nullable=False, default="{}"),
)
greynoise_queried_at: Optional[datetime] = Field(default=None)
# ── AbuseIPDB ────────────────────────────────────────────────────────
# 0..100 abuse confidence score
abuseipdb_score: Optional[int] = Field(default=None)
abuseipdb_raw: str = Field(
default="{}",
sa_column=Column("abuseipdb_raw", _BIG_TEXT, nullable=False, default="{}"),
)
abuseipdb_queried_at: Optional[datetime] = Field(default=None)
# ── abuse.ch Feodo Tracker ───────────────────────────────────────────
feodo_listed: Optional[bool] = Field(default=None)
feodo_raw: str = Field(
default="{}",
sa_column=Column("feodo_raw", _BIG_TEXT, nullable=False, default="{}"),
)
feodo_queried_at: Optional[datetime] = Field(default=None)
# ── abuse.ch ThreatFox ───────────────────────────────────────────────
threatfox_listed: Optional[bool] = Field(default=None)
threatfox_raw: str = Field(
default="{}",
sa_column=Column("threatfox_raw", _BIG_TEXT, nullable=False, default="{}"),
)
threatfox_queried_at: Optional[datetime] = Field(default=None)
# ── Aggregate verdict ────────────────────────────────────────────────
# Synthesised from per-provider columns. ∈ {"malicious", "suspicious",
# "benign", "unknown"}. Used by the dashboard and webhook consumers
# that don't want to reason over four provider columns.
aggregate_verdict: Optional[str] = Field(
default=None, max_length=32, index=True
)
# ── TTL bookkeeping ──────────────────────────────────────────────────
cached_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
expires_at: datetime = Field(index=True)

View File

@@ -278,6 +278,28 @@ class BaseRepository(ABC):
"""Retrieve the keystroke-dynamics profile row for a session."""
pass
@abstractmethod
async def upsert_attacker_intel(self, data: dict[str, Any]) -> str:
"""Insert or update the threat-intel row for an attacker IP.
``data`` MUST include ``attacker_ip`` and ``expires_at``. Returns
the row UUID. Used by the ``decnet enrich`` worker.
"""
pass
@abstractmethod
async def get_attacker_intel_by_ip(self, ip: str) -> Optional[dict[str, Any]]:
"""Return the threat-intel row for ``ip`` or ``None`` if missing."""
pass
@abstractmethod
async def get_unenriched_attacker_ips(self, limit: int = 100) -> list[str]:
"""List attacker IPs with no intel row OR whose row is past expires_at.
Used by the enrich worker to backfill on startup and on each wake.
"""
pass
@abstractmethod
async def increment_smtp_target(self, attacker_uuid: str, domain: str) -> None:
"""

View File

@@ -36,6 +36,7 @@ from decnet.web.db.models import (
State,
Attacker,
AttackerBehavior,
AttackerIntel,
SessionProfile,
SmtpTarget,
SwarmHost,
@@ -1195,6 +1196,73 @@ class SQLModelRepository(BaseRepository):
return None
return row.model_dump(mode="json")
async def upsert_attacker_intel(self, data: dict[str, Any]) -> str:
ip = data["attacker_ip"]
async with self._session() as session:
result = await session.execute(
select(AttackerIntel).where(AttackerIntel.attacker_ip == ip)
)
existing = result.scalar_one_or_none()
if existing:
for k, v in data.items():
setattr(existing, k, v)
session.add(existing)
row_uuid = existing.uuid
else:
row_uuid = uuid.uuid4().hex
session.add(AttackerIntel(uuid=row_uuid, **data))
await session.commit()
return row_uuid
async def get_attacker_intel_by_ip(
self,
ip: str,
) -> Optional[dict[str, Any]]:
async with self._session() as session:
result = await session.execute(
select(AttackerIntel).where(AttackerIntel.attacker_ip == ip)
)
row = result.scalar_one_or_none()
if not row:
return None
d = row.model_dump(mode="json")
for key in (
"greynoise_raw",
"abuseipdb_raw",
"feodo_raw",
"threatfox_raw",
):
raw = d.get(key)
if isinstance(raw, str):
try:
d[key] = json.loads(raw)
except (json.JSONDecodeError, TypeError):
pass
return d
async def get_unenriched_attacker_ips(self, limit: int = 100) -> list[str]:
"""IPs in ``attackers`` with no intel row OR a stale (expired) one.
Stale = ``expires_at < now``. Ordered by ``attackers.last_seen`` desc
so the worker prioritises recent activity on backfill.
"""
now = datetime.now(timezone.utc)
async with self._session() as session:
stmt = (
select(Attacker.ip)
.outerjoin(AttackerIntel, AttackerIntel.attacker_ip == Attacker.ip)
.where(
or_(
AttackerIntel.uuid.is_(None),
AttackerIntel.expires_at < now,
)
)
.order_by(desc(Attacker.last_seen))
.limit(limit)
)
result = await session.execute(stmt)
return [row for row in result.scalars().all()]
async def increment_smtp_target(self, attacker_uuid: str, domain: str) -> None:
"""Upsert an (attacker_uuid, domain) pair and bump count + last_seen.