feat(profiler): track SMTP victim domains per attacker

New SmtpTarget table records each (attacker, domain) pair observed via
the SMTP honeypots. Only the domain is stored — local-parts are dropped
at ingestion, so this table holds no user-identifying data beyond the
target organisation's identity.

The profiler worker extracts domains from rcpt_to / rcpt_denied /
message_accepted events, normalizes them (lowercase, strip local-part,
drop blocked TLDs), and upserts one row per pair with a running count +
first_seen / last_seen.

Three repo methods shipped:
  * increment_smtp_target(attacker, domain) — upsert + bump
  * list_smtp_targets(attacker) — per-attacker view
  * smtp_target_seen(domain) — cross-attacker aggregate, shaped as the
    federation-gossip RPC that V2 will expose.

The gossip-query shape is load-bearing: each operator can answer
"have any of your attackers targeted corp1.com?" without leaking
which attackers or when — the aggregate returns a bool + total count
+ first/last seen, nothing else.
This commit is contained in:
2026-04-22 22:23:27 -04:00
parent c50448995b
commit d43303251d
7 changed files with 360 additions and 1 deletions

View File

@@ -30,6 +30,7 @@ from .attackers import (
AttackerBehavior,
AttackersResponse,
SessionProfile,
SmtpTarget,
)
from .deploy import (
DeployIniRequest,
@@ -137,6 +138,7 @@ __all__ = [
"AttackerBehavior",
"AttackersResponse",
"SessionProfile",
"SmtpTarget",
# deploy
"DeployIniRequest",
"MutateIntervalRequest",

View File

@@ -3,7 +3,7 @@ from datetime import datetime, timezone
from typing import Any, List, Optional
from pydantic import BaseModel
from sqlalchemy import Column, Text
from sqlalchemy import Column, Text, UniqueConstraint
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
@@ -143,6 +143,36 @@ class SessionProfile(SQLModel, table=True):
)
class SmtpTarget(SQLModel, table=True):
"""
Per-attacker list of victim domains observed via the SMTP honeypots.
Each row is one (attacker_uuid, domain) pair — an attacker who relays
mail to 500 addresses at acme.com collapses into a single row with
count=500. Only the *domain* is stored; local-parts (the bit before
`@`) are dropped at ingestion, so this table contains no PII beyond
the target organisation's identity.
Shape is designed for future V2 federation gossip: the
`smtp_target_seen(domain)` query returns aggregate counts with zero
cross-org attacker leakage — each operator can answer "have you seen
this domain being targeted?" without exposing *which* attackers did.
"""
__tablename__ = "smtp_targets"
id: Optional[int] = Field(default=None, primary_key=True)
attacker_uuid: str = Field(foreign_key="attackers.uuid", index=True)
domain: str = Field(index=True)
first_seen: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
last_seen: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
# Aggregate counter — one rcpt_to / message_accepted recipient bumps this.
count: int = Field(default=1)
__table_args__ = (
UniqueConstraint("attacker_uuid", "domain", name="uq_smtp_targets_attacker_domain"),
)
class AttackersResponse(BaseModel):
total: int
limit: int

View File

@@ -170,6 +170,35 @@ class BaseRepository(ABC):
"""Retrieve the keystroke-dynamics profile row for a session."""
pass
@abstractmethod
async def increment_smtp_target(self, attacker_uuid: str, domain: str) -> None:
"""
Record that ``attacker_uuid`` targeted ``domain`` via SMTP.
Upserts the (attacker_uuid, domain) row: inserts with count=1 +
first_seen=now on first sight, bumps count + last_seen on every
subsequent hit. Callers must pre-normalize ``domain`` (lowercase,
local-part stripped).
"""
pass
@abstractmethod
async def list_smtp_targets(self, attacker_uuid: str) -> list[dict[str, Any]]:
"""Return SmtpTarget rows for an attacker, ordered by most-recent first."""
pass
@abstractmethod
async def smtp_target_seen(self, domain: str) -> dict[str, Any]:
"""
Cross-attacker aggregate for a victim domain.
Returns ``{seen: bool, count: int, first_seen: datetime|None,
last_seen: datetime|None}``. Shaped as the federation-gossip RPC
that V2 will expose — each operator can answer "have any of your
attackers targeted this domain?" without leaking attacker identity.
"""
pass
@abstractmethod
async def get_attacker_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
"""Retrieve a single attacker profile by UUID."""

View File

@@ -35,6 +35,7 @@ from decnet.web.db.models import (
Attacker,
AttackerBehavior,
SessionProfile,
SmtpTarget,
SwarmHost,
DeckyShard,
Topology,
@@ -734,6 +735,63 @@ class SQLModelRepository(BaseRepository):
return None
return row.model_dump(mode="json")
async def increment_smtp_target(self, attacker_uuid: str, domain: str) -> None:
"""Upsert an (attacker_uuid, domain) pair and bump count + last_seen.
Read-then-write under a single session — the UNIQUE constraint on
(attacker_uuid, domain) guards against duplicate rows if the race
ever materialises; we accept the ~1ms extra round-trip in exchange
for a single dialect-portable implementation.
"""
async with self._session() as session:
result = await session.execute(
select(SmtpTarget)
.where(SmtpTarget.attacker_uuid == attacker_uuid)
.where(SmtpTarget.domain == domain)
)
existing = result.scalar_one_or_none()
now = datetime.now(timezone.utc)
if existing:
existing.count += 1
existing.last_seen = now
session.add(existing)
else:
session.add(SmtpTarget(
attacker_uuid=attacker_uuid,
domain=domain,
first_seen=now,
last_seen=now,
count=1,
))
await session.commit()
async def list_smtp_targets(self, attacker_uuid: str) -> list[dict[str, Any]]:
async with self._session() as session:
result = await session.execute(
select(SmtpTarget)
.where(SmtpTarget.attacker_uuid == attacker_uuid)
.order_by(desc(SmtpTarget.last_seen))
)
return [r.model_dump(mode="json") for r in result.scalars().all()]
async def smtp_target_seen(self, domain: str) -> dict[str, Any]:
"""Aggregate rows for this domain across every attacker in the DB."""
async with self._session() as session:
result = await session.execute(
select(
func.coalesce(func.sum(SmtpTarget.count), 0),
func.min(SmtpTarget.first_seen),
func.max(SmtpTarget.last_seen),
).where(SmtpTarget.domain == domain)
)
total, first_seen, last_seen = result.one()
return {
"seen": int(total) > 0,
"count": int(total),
"first_seen": first_seen,
"last_seen": last_seen,
}
@staticmethod
def _deserialize_attacker(d: dict[str, Any]) -> dict[str, Any]:
for key in ("services", "deckies", "fingerprints", "commands"):