Files
DECNET/tests/test_smtp_targets.py
anti d43303251d feat(profiler): track SMTP victim domains per attacker
New SmtpTarget table records each (attacker, domain) pair observed via
the SMTP honeypots. Only the domain is stored — local-parts are dropped
at ingestion, so this table holds no user-identifying data beyond the
target organisation's identity.

The profiler worker extracts domains from rcpt_to / rcpt_denied /
message_accepted events, normalizes them (lowercase, strip local-part,
drop blocked TLDs), and upserts one row per pair with a running count +
first_seen / last_seen.

Three repo methods shipped:
  * increment_smtp_target(attacker, domain) — upsert + bump
  * list_smtp_targets(attacker) — per-attacker view
  * smtp_target_seen(domain) — cross-attacker aggregate, shaped as the
    federation-gossip RPC that V2 will expose.

The gossip-query shape is load-bearing: each operator can answer
"have any of your attackers targeted corp1.com?" without leaking
which attackers or when — the aggregate returns a bool + total count
+ first/last seen, nothing else.
2026-04-22 22:23:27 -04:00

161 lines
5.6 KiB
Python

"""
Tests for SMTP victim-domain tracking (SmtpTarget table + profiler ingestion).
Two surfaces under test:
* Repo upsert / list / aggregate-seen helpers.
* The profiler's `_extract_smtp_domains` + `_normalize_smtp_domain`
parsers — pure functions exercised directly without running the
full worker loop.
"""
from datetime import datetime, timezone
import pytest
from decnet.web.db.factory import get_repository
from decnet.correlation.parser import LogEvent
from decnet.profiler.worker import _extract_smtp_domains, _normalize_smtp_domain
@pytest.fixture
async def repo(tmp_path):
r = get_repository(db_path=str(tmp_path / "smtp_targets.db"))
await r.initialize()
return r
def _smtp_event(event_type: str, **fields) -> LogEvent:
return LogEvent(
timestamp=datetime.now(timezone.utc),
decky="decky-01",
service="smtp",
event_type=event_type,
attacker_ip="1.2.3.4",
fields=fields,
raw="",
)
# ── Domain normalization ─────────────────────────────────────────────────────
@pytest.mark.parametrize("raw, expected", [
("<john@corp1.com>", "corp1.com"),
("JOHN@CORP1.COM", "corp1.com"),
("<alice@mail.corp.io>", "mail.corp.io"),
# Empty / malformed → None
("", None),
("notanemail", None),
("@nouser.com", None),
("user@", None),
# Blocked TLDs
("admin@foo.invalid", None),
("test@bar.test", None),
("x@local.example", None),
# Punctuation / angle-bracket forms the RCPT parser already validated
("RCPT TO:<c@d.com>", "d.com"),
])
def test_normalize_smtp_domain(raw, expected):
assert _normalize_smtp_domain(raw) == expected
# ── Event → domain extraction ────────────────────────────────────────────────
def test_extract_from_rcpt_to():
events = [
_smtp_event("rcpt_to", value="<bob@target.com>"),
_smtp_event("rcpt_to", value="<alice@other.com>"),
]
assert _extract_smtp_domains(events) == {"target.com", "other.com"}
def test_extract_from_rcpt_denied():
events = [_smtp_event("rcpt_denied", value="<carol@corp.net>")]
assert _extract_smtp_domains(events) == {"corp.net"}
def test_extract_from_message_accepted_splits_recipients():
"""`message_accepted.rcpt_to` is a comma-joined list, not a single addr."""
events = [_smtp_event(
"message_accepted",
rcpt_to="<a@one.com>,<b@two.com>,<c@one.com>",
mail_from="<spam@evil.com>",
)]
assert _extract_smtp_domains(events) == {"one.com", "two.com"}
def test_extract_ignores_non_smtp_events():
"""Identical `value` fields on non-smtp services must not leak in."""
events = [
LogEvent(
timestamp=datetime.now(timezone.utc),
decky="decky-01", service="ssh", event_type="rcpt_to",
attacker_ip="1.2.3.4",
fields={"value": "<x@wrong.com>"}, raw="",
),
]
assert _extract_smtp_domains(events) == set()
def test_extract_dedupes_within_batch():
events = [
_smtp_event("rcpt_to", value="<a@corp.com>"),
_smtp_event("rcpt_to", value="<b@corp.com>"),
_smtp_event("rcpt_to", value="<c@corp.com>"),
]
assert _extract_smtp_domains(events) == {"corp.com"}
# ── Repo: increment + list + seen ────────────────────────────────────────────
@pytest.mark.anyio
async def test_increment_creates_then_bumps(repo):
await repo.increment_smtp_target("uuid-1", "corp.com")
rows = await repo.list_smtp_targets("uuid-1")
assert len(rows) == 1
assert rows[0]["domain"] == "corp.com"
assert rows[0]["count"] == 1
first_seen_1 = rows[0]["first_seen"]
# Second hit bumps count + last_seen, preserves first_seen.
await repo.increment_smtp_target("uuid-1", "corp.com")
rows = await repo.list_smtp_targets("uuid-1")
assert rows[0]["count"] == 2
assert rows[0]["first_seen"] == first_seen_1
@pytest.mark.anyio
async def test_increment_isolates_per_attacker(repo):
await repo.increment_smtp_target("uuid-a", "corp.com")
await repo.increment_smtp_target("uuid-b", "corp.com")
assert len(await repo.list_smtp_targets("uuid-a")) == 1
assert len(await repo.list_smtp_targets("uuid-b")) == 1
@pytest.mark.anyio
async def test_list_orders_by_last_seen_desc(repo):
await repo.increment_smtp_target("uuid-1", "older.com")
await repo.increment_smtp_target("uuid-1", "newer.com")
rows = await repo.list_smtp_targets("uuid-1")
# Second call (newer.com) has a later last_seen → first row.
assert [r["domain"] for r in rows] == ["newer.com", "older.com"]
@pytest.mark.anyio
async def test_smtp_target_seen_aggregates_across_attackers(repo):
await repo.increment_smtp_target("uuid-a", "corp.com")
await repo.increment_smtp_target("uuid-a", "corp.com")
await repo.increment_smtp_target("uuid-b", "corp.com")
agg = await repo.smtp_target_seen("corp.com")
assert agg["seen"] is True
assert agg["count"] == 3 # 2 + 1
assert agg["first_seen"] is not None
assert agg["last_seen"] is not None
@pytest.mark.anyio
async def test_smtp_target_seen_unknown_domain(repo):
agg = await repo.smtp_target_seen("never-targeted.org")
assert agg["seen"] is False
assert agg["count"] == 0
assert agg["first_seen"] is None
assert agg["last_seen"] is None