feat(ttp): E.3.12 EmailLifter (R0041-R0048)

SMTP message-level technique tagger per Appendix A.6: open relay abuse
(rcpt_count + foreign From), mass phishing (rcpt_count + body simhash),
phishing-kit X-Mailer, IDN/punycode URL, sender masquerade composite
(From/Return-Path/DKIM/SPF), malicious attachment (macro/.lnk/.iso/.img/
hash match), BEC subject+body composite, encoded payload in body.

PII discipline (TTP_TAGGING.md §'Hard parts §6') is enforced at the
lifter layer via _filter_evidence(): emitted TTPTag.evidence is
restricted to the EmailEvidence-allowed allowlist (body_sha256,
matched_headers — names only, rcpt_domain_set — domains only,
attachment_sha256s, rcpt_count) plus PII-safe match discriminators
(matched_kit, matched_trigger, matched_url_host, etc). Raw addresses,
raw body bytes, full URLs, and decoded base64 previews NEVER appear in
evidence — defense-in-depth over the YAML evidence_fields hint.

Tests: tests/ttp/test_email_lifter.py per-rule positive + negative +
PII allowlist guard + state modulation. tests/ttp/rule_precision/
test_email_rules.py xfail flipped to real precision (R0041-R0048
H-band ≥95%). Corpus rows updated to acknowledge that R0045 (masquerade)
co-fires with R0041 / R0047 when the sender-masquerade signals are
present alongside open-relay or BEC patterns — overlap is by design,
not a precision bug.
This commit is contained in:
2026-05-01 20:31:03 -04:00
parent f211d394e6
commit 7a89fbb357
8 changed files with 745 additions and 17 deletions

View File

@@ -1,3 +1,9 @@
{"source_kind": "email", "payload": {"subject": "Urgent wire transfer needed", "from": "ceo@victim.example", "return_path": "evil@bad.example", "rcpt_count": 1, "body": "Please send $50k to the attached account immediately."}, "expected_rule_ids": ["R0047"], "label": "bec_wire"}
{"source_kind": "email", "payload": {"subject": "Newsletter", "from": "marketing@legit.example", "rcpt_count": 1, "body": "Hello world."}, "expected_rule_ids": [], "label": "negative_newsletter"}
{"source_kind": "email", "payload": {"subject": "Win a prize", "from": "promo@evil.example", "rcpt_count": 250, "body": "Click here http://evil.example/win"}, "expected_rule_ids": ["R0042"], "label": "mass_phish"}
{"source_kind": "email", "payload": {"subject": "Urgent wire transfer needed", "from_domain": "ceo@victim.example", "return_path_domain": "evil.example", "rcpt_count": 1, "body_text": "Please send 50k transfer to the attached account immediately confidential."}, "expected_rule_ids": ["R0045", "R0047"], "label": "bec_wire"}
{"source_kind": "email", "payload": {"subject": "Newsletter", "from_domain": "marketing@legit.example", "rcpt_count": 1, "body_text": "Hello world."}, "expected_rule_ids": [], "label": "negative_newsletter"}
{"source_kind": "email", "payload": {"subject": "Win a prize", "from_domain": "promo@evil.example", "rcpt_count": 250, "body_text": "Click here", "body_simhash": "abc123", "urls": ["http://evil.example/win"]}, "expected_rule_ids": ["R0042"], "label": "mass_phish"}
{"source_kind": "email", "payload": {"rcpt_count": 50, "from_domain": "victim.example", "mail_from_domain": "evil.example"}, "expected_rule_ids": ["R0041", "R0045"], "label": "open_relay"}
{"source_kind": "email", "payload": {"x_mailer": "PHPMailer 6.0 (kit-X)", "matched_kit": "kit-X"}, "expected_rule_ids": ["R0043"], "label": "xmailer_kit"}
{"source_kind": "email", "payload": {"urls": ["https://xn--80ak6aa92e.com/login"]}, "expected_rule_ids": ["R0044"], "label": "idn_url"}
{"source_kind": "email", "payload": {"from_domain": "ceo@victim.example", "return_path_domain": "evil.example"}, "expected_rule_ids": ["R0045"], "label": "from_returnpath_mismatch"}
{"source_kind": "email", "payload": {"attachment_macros": true, "attachment_sha256s": ["d"]}, "expected_rule_ids": ["R0046"], "label": "macro_attach"}
{"source_kind": "email", "payload": {"body_text": "see body", "body_base64_bytes": 8192}, "expected_rule_ids": ["R0048"], "label": "encoded_body"}

View File

@@ -48,7 +48,41 @@ async def test_lifter_bound_inert_in_v0(
)
def _build_lifter() -> "EmailLifter":
from decnet.ttp.impl.email_lifter import EmailLifter
from tests.ttp._stub_store import StubRuleStore
rules = [
_parse_and_compile(Path("rules/ttp") / f"{rid}.yaml", RuleState())
for rid in _RULE_IDS
]
lifter = EmailLifter(StubRuleStore(compiled=rules))
for rule in rules:
lifter._index.install(rule)
return lifter
@pytest.mark.parametrize("rule_id", _RULE_IDS)
@pytest.mark.xfail(strict=True, reason="impl phase E.3.12 (EmailLifter)")
def test_email_rule_precision(rule_id: str) -> None:
pytest.fail(f"{rule_id}: EmailLifter not yet shipped (E.3.12)")
def test_email_rule_precision(
rule_id: str,
corpus_loader: CohortLoader,
) -> None:
"""E.3.12 — drive EmailLifter over the labelled corpus and assert
per-rule precision. R0041R0048 are all H-band (≥0.85) → ≥95%.
"""
import asyncio
from tests.ttp.rule_precision.conftest import precision_for
rows = corpus_loader("email")
if not rows:
pytest.skip("no email corpus available")
lifter = _build_lifter()
fired: dict[str, list[str]] = {}
for row in rows:
tags = asyncio.run(lifter.tag(make_event(row)))
fired[row.label] = [tag.rule_id for tag in tags]
precision, _tp, _fp = precision_for(rule_id, rows, fired)
assert precision >= 0.95, (
f"{rule_id} precision {precision:.2f} < 0.95 on email corpus"
)