feat(profiler): track SMTP victim domains per attacker
New SmtpTarget table records each (attacker, domain) pair observed via
the SMTP honeypots. Only the domain is stored — local-parts are dropped
at ingestion, so this table holds no user-identifying data beyond the
target organisation's identity.
The profiler worker extracts domains from rcpt_to / rcpt_denied /
message_accepted events, normalizes them (lowercase, strip local-part,
drop blocked TLDs), and upserts one row per pair with a running count +
first_seen / last_seen.
Three repo methods shipped:
* increment_smtp_target(attacker, domain) — upsert + bump
* list_smtp_targets(attacker) — per-attacker view
* smtp_target_seen(domain) — cross-attacker aggregate, shaped as the
federation-gossip RPC that V2 will expose.
The gossip-query shape is load-bearing: each operator can answer
"have any of your attackers targeted corp1.com?" without leaking
which attackers or when — the aggregate returns a bool + total count
+ first/last seen, nothing else.
This commit is contained in:
@@ -15,6 +15,7 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
import contextlib
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Callable
|
||||
@@ -47,6 +48,18 @@ _COMMAND_EVENT_TYPES = frozenset({
|
||||
# Fields that carry the executed command/query text
|
||||
_COMMAND_FIELDS = ("command", "query", "input", "line", "sql", "cmd")
|
||||
|
||||
# SMTP events that carry a recipient email address. `rcpt_to` fires once per
|
||||
# accepted RCPT (open-relay mode), `rcpt_denied` once per denied RCPT
|
||||
# (harvester mode). `message_accepted` carries the comma-joined rcpt list
|
||||
# on the final DATA commit — covered for replay safety, though every
|
||||
# address it contains already arrived via `rcpt_to` earlier in the session.
|
||||
_SMTP_RCPT_EVENTS = frozenset({"rcpt_to", "rcpt_denied", "message_accepted"})
|
||||
|
||||
# Pseudo-TLDs we never want to report on: the RFC 6761 special-use names
|
||||
# plus common lab-only values. Matching happens on the *last* label so
|
||||
# `foo.example.com` is filtered but `example.corp` is not.
|
||||
_BLOCKED_TLDS = frozenset({"invalid", "test", "localhost", "local", "example"})
|
||||
|
||||
|
||||
@dataclass
|
||||
class _WorkerState:
|
||||
@@ -211,6 +224,17 @@ async def _update_profiles(
|
||||
_span.record_exception(exc)
|
||||
logger.error("attacker worker: behavior upsert failed for %s: %s", ip, exc)
|
||||
|
||||
# SMTP victim-domain tracking — extract domains from RCPT events
|
||||
# and upsert one row per (attacker, domain) pair. Same
|
||||
# soft-fail posture as the behavior rollup: errors here must
|
||||
# not block the next attacker.
|
||||
try:
|
||||
for domain in _extract_smtp_domains(events):
|
||||
await repo.increment_smtp_target(attacker_uuid, domain)
|
||||
except Exception as exc:
|
||||
_span.record_exception(exc)
|
||||
logger.error("attacker worker: smtp target upsert failed for %s: %s", ip, exc)
|
||||
|
||||
|
||||
def _build_record(
|
||||
ip: str,
|
||||
@@ -285,3 +309,53 @@ def _extract_commands_from_events(events: list[LogEvent]) -> list[dict[str, Any]
|
||||
})
|
||||
|
||||
return commands
|
||||
|
||||
|
||||
_SMTP_ADDR_RE = re.compile(r"<?([^\s<>@]+)@([A-Za-z0-9.-]+\.[A-Za-z]{2,})>?")
|
||||
|
||||
|
||||
def _normalize_smtp_domain(raw: str) -> str | None:
|
||||
"""Extract a lowercased domain from an envelope-address fragment.
|
||||
|
||||
Returns None when the input doesn't look like an email address or the
|
||||
resulting TLD is on the blocklist. Local-parts (the bit before `@`)
|
||||
are intentionally dropped — this table stores no user-identifying
|
||||
data, only the targeted organisation's domain.
|
||||
"""
|
||||
if not raw:
|
||||
return None
|
||||
match = _SMTP_ADDR_RE.search(raw.strip())
|
||||
if not match:
|
||||
return None
|
||||
domain = match.group(2).lower().strip(".")
|
||||
if not domain:
|
||||
return None
|
||||
tld = domain.rsplit(".", 1)[-1]
|
||||
if tld in _BLOCKED_TLDS:
|
||||
return None
|
||||
return domain
|
||||
|
||||
|
||||
def _extract_smtp_domains(events: list[LogEvent]) -> set[str]:
|
||||
"""Collect the set of victim domains an attacker targeted via SMTP.
|
||||
|
||||
Deduped at the attacker level — repeated hits on the same domain
|
||||
within a single batch collapse to one upsert, and the per-row count
|
||||
is bumped by ``increment_smtp_target`` on each call. The set return
|
||||
type is intentional: we care about *which* domains were seen, not
|
||||
the per-batch frequency (which the DB aggregates over time).
|
||||
"""
|
||||
domains: set[str] = set()
|
||||
for event in events:
|
||||
if event.service != "smtp" or event.event_type not in _SMTP_RCPT_EVENTS:
|
||||
continue
|
||||
if event.event_type == "message_accepted":
|
||||
raw_list = event.fields.get("rcpt_to", "")
|
||||
candidates = raw_list.split(",") if raw_list else []
|
||||
else:
|
||||
candidates = [event.fields.get("value", "")]
|
||||
for candidate in candidates:
|
||||
domain = _normalize_smtp_domain(candidate)
|
||||
if domain:
|
||||
domains.add(domain)
|
||||
return domains
|
||||
|
||||
@@ -30,6 +30,7 @@ from .attackers import (
|
||||
AttackerBehavior,
|
||||
AttackersResponse,
|
||||
SessionProfile,
|
||||
SmtpTarget,
|
||||
)
|
||||
from .deploy import (
|
||||
DeployIniRequest,
|
||||
@@ -137,6 +138,7 @@ __all__ = [
|
||||
"AttackerBehavior",
|
||||
"AttackersResponse",
|
||||
"SessionProfile",
|
||||
"SmtpTarget",
|
||||
# deploy
|
||||
"DeployIniRequest",
|
||||
"MutateIntervalRequest",
|
||||
|
||||
@@ -3,7 +3,7 @@ from datetime import datetime, timezone
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import Column, Text
|
||||
from sqlalchemy import Column, Text, UniqueConstraint
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
from ._base import _BIG_TEXT
|
||||
@@ -143,6 +143,36 @@ class SessionProfile(SQLModel, table=True):
|
||||
)
|
||||
|
||||
|
||||
class SmtpTarget(SQLModel, table=True):
|
||||
"""
|
||||
Per-attacker list of victim domains observed via the SMTP honeypots.
|
||||
|
||||
Each row is one (attacker_uuid, domain) pair — an attacker who relays
|
||||
mail to 500 addresses at acme.com collapses into a single row with
|
||||
count=500. Only the *domain* is stored; local-parts (the bit before
|
||||
`@`) are dropped at ingestion, so this table contains no PII beyond
|
||||
the target organisation's identity.
|
||||
|
||||
Shape is designed for future V2 federation gossip: the
|
||||
`smtp_target_seen(domain)` query returns aggregate counts with zero
|
||||
cross-org attacker leakage — each operator can answer "have you seen
|
||||
this domain being targeted?" without exposing *which* attackers did.
|
||||
"""
|
||||
__tablename__ = "smtp_targets"
|
||||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
attacker_uuid: str = Field(foreign_key="attackers.uuid", index=True)
|
||||
domain: str = Field(index=True)
|
||||
first_seen: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
last_seen: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc), index=True
|
||||
)
|
||||
# Aggregate counter — one rcpt_to / message_accepted recipient bumps this.
|
||||
count: int = Field(default=1)
|
||||
__table_args__ = (
|
||||
UniqueConstraint("attacker_uuid", "domain", name="uq_smtp_targets_attacker_domain"),
|
||||
)
|
||||
|
||||
|
||||
class AttackersResponse(BaseModel):
|
||||
total: int
|
||||
limit: int
|
||||
|
||||
@@ -170,6 +170,35 @@ class BaseRepository(ABC):
|
||||
"""Retrieve the keystroke-dynamics profile row for a session."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def increment_smtp_target(self, attacker_uuid: str, domain: str) -> None:
|
||||
"""
|
||||
Record that ``attacker_uuid`` targeted ``domain`` via SMTP.
|
||||
|
||||
Upserts the (attacker_uuid, domain) row: inserts with count=1 +
|
||||
first_seen=now on first sight, bumps count + last_seen on every
|
||||
subsequent hit. Callers must pre-normalize ``domain`` (lowercase,
|
||||
local-part stripped).
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def list_smtp_targets(self, attacker_uuid: str) -> list[dict[str, Any]]:
|
||||
"""Return SmtpTarget rows for an attacker, ordered by most-recent first."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def smtp_target_seen(self, domain: str) -> dict[str, Any]:
|
||||
"""
|
||||
Cross-attacker aggregate for a victim domain.
|
||||
|
||||
Returns ``{seen: bool, count: int, first_seen: datetime|None,
|
||||
last_seen: datetime|None}``. Shaped as the federation-gossip RPC
|
||||
that V2 will expose — each operator can answer "have any of your
|
||||
attackers targeted this domain?" without leaking attacker identity.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_attacker_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
|
||||
"""Retrieve a single attacker profile by UUID."""
|
||||
|
||||
@@ -35,6 +35,7 @@ from decnet.web.db.models import (
|
||||
Attacker,
|
||||
AttackerBehavior,
|
||||
SessionProfile,
|
||||
SmtpTarget,
|
||||
SwarmHost,
|
||||
DeckyShard,
|
||||
Topology,
|
||||
@@ -734,6 +735,63 @@ class SQLModelRepository(BaseRepository):
|
||||
return None
|
||||
return row.model_dump(mode="json")
|
||||
|
||||
async def increment_smtp_target(self, attacker_uuid: str, domain: str) -> None:
|
||||
"""Upsert an (attacker_uuid, domain) pair and bump count + last_seen.
|
||||
|
||||
Read-then-write under a single session — the UNIQUE constraint on
|
||||
(attacker_uuid, domain) guards against duplicate rows if the race
|
||||
ever materialises; we accept the ~1ms extra round-trip in exchange
|
||||
for a single dialect-portable implementation.
|
||||
"""
|
||||
async with self._session() as session:
|
||||
result = await session.execute(
|
||||
select(SmtpTarget)
|
||||
.where(SmtpTarget.attacker_uuid == attacker_uuid)
|
||||
.where(SmtpTarget.domain == domain)
|
||||
)
|
||||
existing = result.scalar_one_or_none()
|
||||
now = datetime.now(timezone.utc)
|
||||
if existing:
|
||||
existing.count += 1
|
||||
existing.last_seen = now
|
||||
session.add(existing)
|
||||
else:
|
||||
session.add(SmtpTarget(
|
||||
attacker_uuid=attacker_uuid,
|
||||
domain=domain,
|
||||
first_seen=now,
|
||||
last_seen=now,
|
||||
count=1,
|
||||
))
|
||||
await session.commit()
|
||||
|
||||
async def list_smtp_targets(self, attacker_uuid: str) -> list[dict[str, Any]]:
|
||||
async with self._session() as session:
|
||||
result = await session.execute(
|
||||
select(SmtpTarget)
|
||||
.where(SmtpTarget.attacker_uuid == attacker_uuid)
|
||||
.order_by(desc(SmtpTarget.last_seen))
|
||||
)
|
||||
return [r.model_dump(mode="json") for r in result.scalars().all()]
|
||||
|
||||
async def smtp_target_seen(self, domain: str) -> dict[str, Any]:
|
||||
"""Aggregate rows for this domain across every attacker in the DB."""
|
||||
async with self._session() as session:
|
||||
result = await session.execute(
|
||||
select(
|
||||
func.coalesce(func.sum(SmtpTarget.count), 0),
|
||||
func.min(SmtpTarget.first_seen),
|
||||
func.max(SmtpTarget.last_seen),
|
||||
).where(SmtpTarget.domain == domain)
|
||||
)
|
||||
total, first_seen, last_seen = result.one()
|
||||
return {
|
||||
"seen": int(total) > 0,
|
||||
"count": int(total),
|
||||
"first_seen": first_seen,
|
||||
"last_seen": last_seen,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _deserialize_attacker(d: dict[str, Any]) -> dict[str, Any]:
|
||||
for key in ("services", "deckies", "fingerprints", "commands"):
|
||||
|
||||
Reference in New Issue
Block a user