feat(profiler): track SMTP victim domains per attacker

New SmtpTarget table records each (attacker, domain) pair observed via the SMTP honeypots. Only the domain is stored — local-parts are dropped at ingestion, so this table holds no user-identifying data beyond the target organisation's identity. The profiler worker extracts domains from rcpt_to / rcpt_denied / message_accepted events, normalizes them (lowercase, strip local-part, drop blocked TLDs), and upserts one row per pair with a running count + first_seen / last_seen. Three repo methods shipped: * increment_smtp_target(attacker, domain) — upsert + bump * list_smtp_targets(attacker) — per-attacker view * smtp_target_seen(domain) — cross-attacker aggregate, shaped as the federation-gossip RPC that V2 will expose. The gossip-query shape is load-bearing: each operator can answer "have any of your attackers targeted corp1.com?" without leaking which attackers or when — the aggregate returns a bool + total count + first/last seen, nothing else.
2026-04-22 22:23:27 -04:00
parent c50448995b
commit d43303251d
7 changed files with 360 additions and 1 deletions
--- a/decnet/web/db/models/init.py
+++ b/decnet/web/db/models/init.py
@@ -30,6 +30,7 @@ from .attackers import (
    AttackerBehavior,
    AttackersResponse,
    SessionProfile,
+    SmtpTarget,
 )
 from .deploy import (
    DeployIniRequest,
@@ -137,6 +138,7 @@ __all__ = [
    "AttackerBehavior",
    "AttackersResponse",
    "SessionProfile",
+    "SmtpTarget",
    # deploy
    "DeployIniRequest",
    "MutateIntervalRequest",
--- a/decnet/web/db/models/attackers.py
+++ b/decnet/web/db/models/attackers.py
@@ -3,7 +3,7 @@ from datetime import datetime, timezone
 from typing import Any, List, Optional

 from pydantic import BaseModel
-from sqlalchemy import Column, Text
+from sqlalchemy import Column, Text, UniqueConstraint
 from sqlmodel import Field, SQLModel

 from ._base import _BIG_TEXT
@@ -143,6 +143,36 @@ class SessionProfile(SQLModel, table=True):
    )


+class SmtpTarget(SQLModel, table=True):
+    """
+    Per-attacker list of victim domains observed via the SMTP honeypots.
+
+    Each row is one (attacker_uuid, domain) pair — an attacker who relays
+    mail to 500 addresses at acme.com collapses into a single row with
+    count=500. Only the *domain* is stored; local-parts (the bit before
+    `@`) are dropped at ingestion, so this table contains no PII beyond
+    the target organisation's identity.
+
+    Shape is designed for future V2 federation gossip: the
+    `smtp_target_seen(domain)` query returns aggregate counts with zero
+    cross-org attacker leakage — each operator can answer "have you seen
+    this domain being targeted?" without exposing *which* attackers did.
+    """
+    __tablename__ = "smtp_targets"
+    id: Optional[int] = Field(default=None, primary_key=True)
+    attacker_uuid: str = Field(foreign_key="attackers.uuid", index=True)
+    domain: str = Field(index=True)
+    first_seen: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+    last_seen: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc), index=True
+    )
+    # Aggregate counter — one rcpt_to / message_accepted recipient bumps this.
+    count: int = Field(default=1)
+    __table_args__ = (
+        UniqueConstraint("attacker_uuid", "domain", name="uq_smtp_targets_attacker_domain"),
+    )
+
+
 class AttackersResponse(BaseModel):
    total: int
    limit: int
--- a/decnet/web/db/repository.py
+++ b/decnet/web/db/repository.py
@@ -170,6 +170,35 @@ class BaseRepository(ABC):
        """Retrieve the keystroke-dynamics profile row for a session."""
        pass

+    @abstractmethod
+    async def increment_smtp_target(self, attacker_uuid: str, domain: str) -> None:
+        """
+        Record that ``attacker_uuid`` targeted ``domain`` via SMTP.
+
+        Upserts the (attacker_uuid, domain) row: inserts with count=1 +
+        first_seen=now on first sight, bumps count + last_seen on every
+        subsequent hit. Callers must pre-normalize ``domain`` (lowercase,
+        local-part stripped).
+        """
+        pass
+
+    @abstractmethod
+    async def list_smtp_targets(self, attacker_uuid: str) -> list[dict[str, Any]]:
+        """Return SmtpTarget rows for an attacker, ordered by most-recent first."""
+        pass
+
+    @abstractmethod
+    async def smtp_target_seen(self, domain: str) -> dict[str, Any]:
+        """
+        Cross-attacker aggregate for a victim domain.
+
+        Returns ``{seen: bool, count: int, first_seen: datetime|None,
+        last_seen: datetime|None}``. Shaped as the federation-gossip RPC
+        that V2 will expose — each operator can answer "have any of your
+        attackers targeted this domain?" without leaking attacker identity.
+        """
+        pass
+
    @abstractmethod
    async def get_attacker_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
        """Retrieve a single attacker profile by UUID."""
--- a/decnet/web/db/sqlmodel_repo.py
+++ b/decnet/web/db/sqlmodel_repo.py
@@ -35,6 +35,7 @@ from decnet.web.db.models import (
    Attacker,
    AttackerBehavior,
    SessionProfile,
+    SmtpTarget,
    SwarmHost,
    DeckyShard,
    Topology,
@@ -734,6 +735,63 @@ class SQLModelRepository(BaseRepository):
                return None
            return row.model_dump(mode="json")

+    async def increment_smtp_target(self, attacker_uuid: str, domain: str) -> None:
+        """Upsert an (attacker_uuid, domain) pair and bump count + last_seen.
+
+        Read-then-write under a single session — the UNIQUE constraint on
+        (attacker_uuid, domain) guards against duplicate rows if the race
+        ever materialises; we accept the ~1ms extra round-trip in exchange
+        for a single dialect-portable implementation.
+        """
+        async with self._session() as session:
+            result = await session.execute(
+                select(SmtpTarget)
+                .where(SmtpTarget.attacker_uuid == attacker_uuid)
+                .where(SmtpTarget.domain == domain)
+            )
+            existing = result.scalar_one_or_none()
+            now = datetime.now(timezone.utc)
+            if existing:
+                existing.count += 1
+                existing.last_seen = now
+                session.add(existing)
+            else:
+                session.add(SmtpTarget(
+                    attacker_uuid=attacker_uuid,
+                    domain=domain,
+                    first_seen=now,
+                    last_seen=now,
+                    count=1,
+                ))
+            await session.commit()
+
+    async def list_smtp_targets(self, attacker_uuid: str) -> list[dict[str, Any]]:
+        async with self._session() as session:
+            result = await session.execute(
+                select(SmtpTarget)
+                .where(SmtpTarget.attacker_uuid == attacker_uuid)
+                .order_by(desc(SmtpTarget.last_seen))
+            )
+            return [r.model_dump(mode="json") for r in result.scalars().all()]
+
+    async def smtp_target_seen(self, domain: str) -> dict[str, Any]:
+        """Aggregate rows for this domain across every attacker in the DB."""
+        async with self._session() as session:
+            result = await session.execute(
+                select(
+                    func.coalesce(func.sum(SmtpTarget.count), 0),
+                    func.min(SmtpTarget.first_seen),
+                    func.max(SmtpTarget.last_seen),
+                ).where(SmtpTarget.domain == domain)
+            )
+            total, first_seen, last_seen = result.one()
+            return {
+                "seen": int(total) > 0,
+                "count": int(total),
+                "first_seen": first_seen,
+                "last_seen": last_seen,
+            }
+
    @staticmethod
    def _deserialize_attacker(d: dict[str, Any]) -> dict[str, Any]:
        for key in ("services", "deckies", "fingerprints", "commands"):