feat(ttp): E.3.12 EmailLifter (R0041-R0048)
SMTP message-level technique tagger per Appendix A.6: open relay abuse (rcpt_count + foreign From), mass phishing (rcpt_count + body simhash), phishing-kit X-Mailer, IDN/punycode URL, sender masquerade composite (From/Return-Path/DKIM/SPF), malicious attachment (macro/.lnk/.iso/.img/ hash match), BEC subject+body composite, encoded payload in body. PII discipline (TTP_TAGGING.md §'Hard parts §6') is enforced at the lifter layer via _filter_evidence(): emitted TTPTag.evidence is restricted to the EmailEvidence-allowed allowlist (body_sha256, matched_headers — names only, rcpt_domain_set — domains only, attachment_sha256s, rcpt_count) plus PII-safe match discriminators (matched_kit, matched_trigger, matched_url_host, etc). Raw addresses, raw body bytes, full URLs, and decoded base64 previews NEVER appear in evidence — defense-in-depth over the YAML evidence_fields hint. Tests: tests/ttp/test_email_lifter.py per-rule positive + negative + PII allowlist guard + state modulation. tests/ttp/rule_precision/ test_email_rules.py xfail flipped to real precision (R0041-R0048 H-band ≥95%). Corpus rows updated to acknowledge that R0045 (masquerade) co-fires with R0041 / R0047 when the sender-masquerade signals are present alongside open-relay or BEC patterns — overlap is by design, not a precision bug.
This commit is contained in:
@@ -1,3 +1,9 @@
|
||||
{"source_kind": "email", "payload": {"subject": "Urgent wire transfer needed", "from": "ceo@victim.example", "return_path": "evil@bad.example", "rcpt_count": 1, "body": "Please send $50k to the attached account immediately."}, "expected_rule_ids": ["R0047"], "label": "bec_wire"}
|
||||
{"source_kind": "email", "payload": {"subject": "Newsletter", "from": "marketing@legit.example", "rcpt_count": 1, "body": "Hello world."}, "expected_rule_ids": [], "label": "negative_newsletter"}
|
||||
{"source_kind": "email", "payload": {"subject": "Win a prize", "from": "promo@evil.example", "rcpt_count": 250, "body": "Click here http://evil.example/win"}, "expected_rule_ids": ["R0042"], "label": "mass_phish"}
|
||||
{"source_kind": "email", "payload": {"subject": "Urgent wire transfer needed", "from_domain": "ceo@victim.example", "return_path_domain": "evil.example", "rcpt_count": 1, "body_text": "Please send 50k transfer to the attached account immediately confidential."}, "expected_rule_ids": ["R0045", "R0047"], "label": "bec_wire"}
|
||||
{"source_kind": "email", "payload": {"subject": "Newsletter", "from_domain": "marketing@legit.example", "rcpt_count": 1, "body_text": "Hello world."}, "expected_rule_ids": [], "label": "negative_newsletter"}
|
||||
{"source_kind": "email", "payload": {"subject": "Win a prize", "from_domain": "promo@evil.example", "rcpt_count": 250, "body_text": "Click here", "body_simhash": "abc123", "urls": ["http://evil.example/win"]}, "expected_rule_ids": ["R0042"], "label": "mass_phish"}
|
||||
{"source_kind": "email", "payload": {"rcpt_count": 50, "from_domain": "victim.example", "mail_from_domain": "evil.example"}, "expected_rule_ids": ["R0041", "R0045"], "label": "open_relay"}
|
||||
{"source_kind": "email", "payload": {"x_mailer": "PHPMailer 6.0 (kit-X)", "matched_kit": "kit-X"}, "expected_rule_ids": ["R0043"], "label": "xmailer_kit"}
|
||||
{"source_kind": "email", "payload": {"urls": ["https://xn--80ak6aa92e.com/login"]}, "expected_rule_ids": ["R0044"], "label": "idn_url"}
|
||||
{"source_kind": "email", "payload": {"from_domain": "ceo@victim.example", "return_path_domain": "evil.example"}, "expected_rule_ids": ["R0045"], "label": "from_returnpath_mismatch"}
|
||||
{"source_kind": "email", "payload": {"attachment_macros": true, "attachment_sha256s": ["d"]}, "expected_rule_ids": ["R0046"], "label": "macro_attach"}
|
||||
{"source_kind": "email", "payload": {"body_text": "see body", "body_base64_bytes": 8192}, "expected_rule_ids": ["R0048"], "label": "encoded_body"}
|
||||
|
||||
@@ -48,7 +48,41 @@ async def test_lifter_bound_inert_in_v0(
|
||||
)
|
||||
|
||||
|
||||
def _build_lifter() -> "EmailLifter":
|
||||
from decnet.ttp.impl.email_lifter import EmailLifter
|
||||
from tests.ttp._stub_store import StubRuleStore
|
||||
|
||||
rules = [
|
||||
_parse_and_compile(Path("rules/ttp") / f"{rid}.yaml", RuleState())
|
||||
for rid in _RULE_IDS
|
||||
]
|
||||
lifter = EmailLifter(StubRuleStore(compiled=rules))
|
||||
for rule in rules:
|
||||
lifter._index.install(rule)
|
||||
return lifter
|
||||
|
||||
|
||||
@pytest.mark.parametrize("rule_id", _RULE_IDS)
|
||||
@pytest.mark.xfail(strict=True, reason="impl phase E.3.12 (EmailLifter)")
|
||||
def test_email_rule_precision(rule_id: str) -> None:
|
||||
pytest.fail(f"{rule_id}: EmailLifter not yet shipped (E.3.12)")
|
||||
def test_email_rule_precision(
|
||||
rule_id: str,
|
||||
corpus_loader: CohortLoader,
|
||||
) -> None:
|
||||
"""E.3.12 — drive EmailLifter over the labelled corpus and assert
|
||||
per-rule precision. R0041–R0048 are all H-band (≥0.85) → ≥95%.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
from tests.ttp.rule_precision.conftest import precision_for
|
||||
|
||||
rows = corpus_loader("email")
|
||||
if not rows:
|
||||
pytest.skip("no email corpus available")
|
||||
lifter = _build_lifter()
|
||||
fired: dict[str, list[str]] = {}
|
||||
for row in rows:
|
||||
tags = asyncio.run(lifter.tag(make_event(row)))
|
||||
fired[row.label] = [tag.rule_id for tag in tags]
|
||||
precision, _tp, _fp = precision_for(rule_id, rows, fired)
|
||||
assert precision >= 0.95, (
|
||||
f"{rule_id} precision {precision:.2f} < 0.95 on email corpus"
|
||||
)
|
||||
|
||||
305
tests/ttp/test_email_lifter.py
Normal file
305
tests/ttp/test_email_lifter.py
Normal file
@@ -0,0 +1,305 @@
|
||||
"""Per-rule unit tests for :class:`EmailLifter` (E.3.12).
|
||||
|
||||
Pins R0041–R0048 predicates and the EmailEvidence PII discipline:
|
||||
emitted ``TTPTag.evidence`` MUST NOT contain raw addresses, raw body
|
||||
bytes, or full URLs (only hashed / domain / matched-discriminator
|
||||
forms are permitted).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.ttp.base import TaggerEvent
|
||||
from decnet.ttp.impl.email_lifter import (
|
||||
_EMAIL_EVIDENCE_ALLOWED_KEYS,
|
||||
EmailLifter,
|
||||
)
|
||||
from decnet.ttp.impl.rule_engine import CompiledRule
|
||||
from decnet.ttp.store.base import RuleState
|
||||
from decnet.ttp.store.impl.filesystem import _parse_and_compile
|
||||
from tests.ttp._stub_store import StubRuleStore
|
||||
|
||||
|
||||
_RULES_DIR = Path(__file__).resolve().parents[2] / "rules" / "ttp"
|
||||
|
||||
|
||||
def _compile(rule_id: str, state: RuleState | None = None) -> CompiledRule:
|
||||
return _parse_and_compile(
|
||||
_RULES_DIR / f"{rule_id}.yaml", state or RuleState(),
|
||||
)
|
||||
|
||||
|
||||
def _ev(payload: dict[str, Any]) -> TaggerEvent:
|
||||
return TaggerEvent(
|
||||
source_kind="email",
|
||||
source_id="src-email",
|
||||
attacker_uuid="att1",
|
||||
identity_uuid=None,
|
||||
session_id=None,
|
||||
decky_id=None,
|
||||
payload=payload,
|
||||
)
|
||||
|
||||
|
||||
def _make_lifter(rule_ids: list[str]) -> EmailLifter:
|
||||
rules = [_compile(rid) for rid in rule_ids]
|
||||
lifter = EmailLifter(StubRuleStore(compiled=rules))
|
||||
for rule in rules:
|
||||
lifter._index.install(rule)
|
||||
return lifter
|
||||
|
||||
|
||||
# ── Per-rule positives ─────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_open_relay_fires_on_high_rcpt_foreign_from() -> None:
|
||||
lifter = _make_lifter(["R0041"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"rcpt_count": 50,
|
||||
"from_domain": "victim.example",
|
||||
"mail_from_domain": "evil.example",
|
||||
"rcpt_domains": ["target1.example", "target2.example"],
|
||||
"body_sha256": "a" * 64,
|
||||
})))
|
||||
techs = {tag.technique_id for tag in out}
|
||||
assert {"T1496", "T1586"} <= techs
|
||||
|
||||
|
||||
def test_open_relay_no_fire_on_matching_from() -> None:
|
||||
lifter = _make_lifter(["R0041"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"rcpt_count": 50,
|
||||
"from_domain": "same.example",
|
||||
"mail_from_domain": "same.example",
|
||||
})))
|
||||
assert out == []
|
||||
|
||||
|
||||
def test_mass_phish_fires_on_threshold_with_simhash() -> None:
|
||||
lifter = _make_lifter(["R0042"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"rcpt_count": 100,
|
||||
"body_simhash": "abc123",
|
||||
})))
|
||||
techs = {tag.technique_id for tag in out}
|
||||
assert "T1566" in techs
|
||||
|
||||
|
||||
def test_mass_phish_no_simhash_no_fire() -> None:
|
||||
"""High RCPT alone is open-relay territory; campaign needs simhash."""
|
||||
lifter = _make_lifter(["R0042"])
|
||||
out = asyncio.run(lifter.tag(_ev({"rcpt_count": 100})))
|
||||
assert out == []
|
||||
|
||||
|
||||
def test_xmailer_kit_fires_with_match() -> None:
|
||||
lifter = _make_lifter(["R0043"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"x_mailer": "PHPMailer 6.0 (kit-X)",
|
||||
"matched_kit": "kit-X",
|
||||
})))
|
||||
techs = {tag.technique_id for tag in out}
|
||||
assert {"T1566", "T1588"} <= techs
|
||||
|
||||
|
||||
def test_xmailer_kit_no_match_no_fire() -> None:
|
||||
lifter = _make_lifter(["R0043"])
|
||||
out = asyncio.run(lifter.tag(_ev({"x_mailer": "Outlook 16.0"})))
|
||||
assert out == []
|
||||
|
||||
|
||||
def test_idn_url_fires_on_punycode() -> None:
|
||||
lifter = _make_lifter(["R0044"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"urls": ["https://xn--80ak6aa92e.com/login"],
|
||||
})))
|
||||
techs = {tag.technique_id for tag in out}
|
||||
assert {"T1036", "T1566"} <= techs
|
||||
|
||||
|
||||
def test_sender_masquerade_from_returnpath_mismatch() -> None:
|
||||
lifter = _make_lifter(["R0045"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"from_domain": "ceo@victim.example",
|
||||
"return_path_domain": "evil.example",
|
||||
})))
|
||||
techs = {tag.technique_id for tag in out}
|
||||
assert "T1036" in techs
|
||||
|
||||
|
||||
def test_sender_masquerade_dkim_fail() -> None:
|
||||
lifter = _make_lifter(["R0045"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"dkim_signed": False,
|
||||
})))
|
||||
techs = {tag.technique_id for tag in out}
|
||||
assert "T1036" in techs
|
||||
|
||||
|
||||
def test_malicious_attachment_macro() -> None:
|
||||
lifter = _make_lifter(["R0046"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"attachment_macros": True,
|
||||
"attachment_sha256s": ["b" * 64],
|
||||
})))
|
||||
techs = {tag.technique_id for tag in out}
|
||||
assert {"T1204", "T1566"} <= techs
|
||||
|
||||
|
||||
def test_malicious_attachment_lnk_extension() -> None:
|
||||
lifter = _make_lifter(["R0046"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"attachment_extensions": [".lnk"],
|
||||
"attachment_sha256s": ["c" * 64],
|
||||
})))
|
||||
techs = {tag.technique_id for tag in out}
|
||||
assert {"T1204", "T1566"} <= techs
|
||||
|
||||
|
||||
def test_bec_subject_and_body_match() -> None:
|
||||
lifter = _make_lifter(["R0047"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"subject": "URGENT wire transfer needed",
|
||||
"body_text": "Please send $50k immediately, this is confidential.",
|
||||
})))
|
||||
techs = {tag.technique_id for tag in out}
|
||||
assert "T1566" in techs
|
||||
|
||||
|
||||
def test_bec_no_body_action_no_fire() -> None:
|
||||
lifter = _make_lifter(["R0047"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"subject": "URGENT review",
|
||||
"body_text": "Please review the attached doc.",
|
||||
})))
|
||||
assert out == []
|
||||
|
||||
|
||||
def test_encoded_payload_fires_on_precomputed_count() -> None:
|
||||
lifter = _make_lifter(["R0048"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"body_text": "small body text",
|
||||
"body_base64_bytes": 8192,
|
||||
})))
|
||||
techs = {tag.technique_id for tag in out}
|
||||
assert {"T1071", "T1027"} <= techs
|
||||
|
||||
|
||||
def test_encoded_payload_below_threshold_no_fire() -> None:
|
||||
lifter = _make_lifter(["R0048"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"body_text": "small body",
|
||||
"body_base64_bytes": 100,
|
||||
})))
|
||||
assert out == []
|
||||
|
||||
|
||||
# ── PII discipline ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_evidence_keys_subset_of_email_evidence_allowlist() -> None:
|
||||
"""No predicate may leak raw addresses, body bytes, or full URLs."""
|
||||
lifter = _make_lifter([
|
||||
"R0041", "R0042", "R0043", "R0044",
|
||||
"R0045", "R0046", "R0047", "R0048",
|
||||
])
|
||||
payloads = [
|
||||
{
|
||||
"rcpt_count": 50,
|
||||
"from_domain": "ceo@victim.example",
|
||||
"mail_from_domain": "evil.example",
|
||||
"return_path_domain": "evil.example",
|
||||
"rcpt_domains": ["a.example"],
|
||||
"x_mailer": "Outlook 16",
|
||||
"matched_kit": "kit-Y",
|
||||
"urls": ["https://xn--example.test/path?id=secret"],
|
||||
"dkim_signed": False,
|
||||
"spf_pass": False,
|
||||
"attachment_macros": True,
|
||||
"attachment_extensions": [".lnk"],
|
||||
"attachment_sha256s": ["d" * 64],
|
||||
"subject": "URGENT wire",
|
||||
"body_text": "please send transfer immediately",
|
||||
"body_base64_bytes": 8192,
|
||||
},
|
||||
]
|
||||
for payload in payloads:
|
||||
out = asyncio.run(lifter.tag(_ev(payload)))
|
||||
for tag in out:
|
||||
disallowed = set(tag.evidence) - _EMAIL_EVIDENCE_ALLOWED_KEYS
|
||||
assert not disallowed, (
|
||||
f"PII leak in {tag.rule_id}: unexpected keys {disallowed}"
|
||||
)
|
||||
|
||||
|
||||
def test_evidence_carries_no_raw_addresses_or_body() -> None:
|
||||
lifter = _make_lifter(["R0041", "R0045", "R0047"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"rcpt_count": 50,
|
||||
"from_domain": "ceo-direct@victim.example", # full address-shaped
|
||||
"mail_from_domain": "evil.example",
|
||||
"return_path_domain": "evil.example",
|
||||
"subject": "URGENT wire transfer needed",
|
||||
"body_text": "Send the wire to acct 12345 confidential right now",
|
||||
"rcpt_domains": ["target.example"],
|
||||
})))
|
||||
assert out
|
||||
for tag in out:
|
||||
as_str = repr(tag.evidence)
|
||||
assert "ceo-direct@" not in as_str
|
||||
assert "Send the wire" not in as_str
|
||||
assert "12345" not in as_str
|
||||
|
||||
|
||||
def test_body_sha_set_when_upstream_omits() -> None:
|
||||
lifter = _make_lifter(["R0042"])
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"rcpt_count": 100,
|
||||
"body_text": "some body",
|
||||
"body_simhash": "abc",
|
||||
})))
|
||||
assert out
|
||||
expected = hashlib.sha256(b"some body").hexdigest()
|
||||
for tag in out:
|
||||
assert tag.evidence["body_sha256"] == expected
|
||||
|
||||
|
||||
# ── State + tolerance ──────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_disabled_email_rule_no_emit() -> None:
|
||||
rule = _compile("R0042", RuleState(state="disabled"))
|
||||
lifter = EmailLifter(StubRuleStore())
|
||||
lifter._index.install(rule)
|
||||
out = asyncio.run(lifter.tag(_ev({
|
||||
"rcpt_count": 200, "body_simhash": "abc",
|
||||
})))
|
||||
assert out == []
|
||||
|
||||
|
||||
def test_empty_payload_no_errors(caplog: pytest.LogCaptureFixture) -> None:
|
||||
caplog.set_level(logging.DEBUG)
|
||||
lifter = _make_lifter([
|
||||
"R0041", "R0042", "R0043", "R0044",
|
||||
"R0045", "R0046", "R0047", "R0048",
|
||||
])
|
||||
out = asyncio.run(lifter.tag(_ev({})))
|
||||
assert out == []
|
||||
assert not [r for r in caplog.records if r.levelno >= logging.ERROR]
|
||||
|
||||
|
||||
def test_owns_only_email_prefix() -> None:
|
||||
behavioral = _compile("R0031")
|
||||
email = _compile("R0041")
|
||||
lifter = EmailLifter(StubRuleStore(compiled=[behavioral, email]))
|
||||
asyncio.run(lifter._index.hydrate_from(
|
||||
lifter._store, predicate=lifter._owns, # type: ignore[arg-type]
|
||||
))
|
||||
assert lifter._index.get("R0041") is not None
|
||||
assert lifter._index.get("R0031") is None
|
||||
@@ -42,7 +42,9 @@ def _make_lifter(cls: type[TolerantTagger]) -> TolerantTagger:
|
||||
Implemented lifters (E.3.9–E.3.12) take a :class:`RuleStore`; the
|
||||
still-empty IdentityLifter / CredentialLifter (E.3.13) take no args.
|
||||
"""
|
||||
if cls in {BehavioralLifter, IntelLifter, CanaryFingerprintLifter}:
|
||||
if cls in {
|
||||
BehavioralLifter, IntelLifter, CanaryFingerprintLifter, EmailLifter,
|
||||
}:
|
||||
return cls(StubRuleStore()) # type: ignore[call-arg]
|
||||
return cls()
|
||||
|
||||
|
||||
@@ -24,7 +24,9 @@ from tests.ttp._stub_store import StubRuleStore
|
||||
|
||||
|
||||
def _instantiate(cls: type[TolerantTagger]) -> TolerantTagger:
|
||||
if cls in {BehavioralLifter, IntelLifter, CanaryFingerprintLifter}:
|
||||
if cls in {
|
||||
BehavioralLifter, IntelLifter, CanaryFingerprintLifter, EmailLifter,
|
||||
}:
|
||||
return cls(StubRuleStore()) # type: ignore[call-arg]
|
||||
return cls()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user