feat(bus): publish email.received from ingester after SMTP artifact persist
Wires the EmailLifter (R0041–R0048) producer that DEBT.md item #3 deferred. After the existing add_bounty() call in _extract_bounty (line 615), call _publish_email_received() which: * resolves the attacker_uuid via repo.get_attacker_uuid_by_ip; drops the publish if unresolved (the TTP worker can't anchor orphan events) * projects the message_stored fields onto the EmailLifter wire contract: from_domain / mail_from_domain / return_path_domain parsed via _domain_of, rcpt_count + rcpt_domains via _rcpt_projection, attachment_sha256s + attachment_extensions derived from the existing attachments_json manifest, urls from urls_json, dkim_signed/spf_pass coerced from 0/1 ints to bool * mirrors _publish_probe_pending's bus-per-call pattern and swallows all exceptions (the bus is the notification layer, not the source of truth) Fires for both relay and non-relay SMTP services. R0041 / R0043 / R0044 / R0045 are now live end-to-end; R0046 partial (extension lane). Heavyweight predicates (R0042 simhash, R0046-deep, R0047 / R0048 body_text) stay deferred per the EmailLifter heavyweight DEBT entry.
This commit is contained in:
@@ -154,6 +154,160 @@ class TestExtractBounty:
|
||||
assert bounty["payload"]["subject"] == "URGENT: invoice"
|
||||
assert bounty["payload"]["mail_from"] == "spammer@spammer.example"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_stored_publishes_email_received(self):
|
||||
"""SMTP message_stored persists the artifact AND publishes
|
||||
``email.received`` with the EmailLifter wire contract: domains,
|
||||
rcpt_count + rcpt_domains, attachment shas + extensions, urls,
|
||||
dkim/spf bools, x_mailer."""
|
||||
from decnet.web import ingester as _ing
|
||||
from decnet.web.ingester import _extract_bounty
|
||||
mock_repo = MagicMock()
|
||||
mock_repo.add_bounty = AsyncMock()
|
||||
mock_repo.upsert_credential = AsyncMock()
|
||||
mock_repo.get_attacker_uuid_by_ip = AsyncMock(return_value="att-7")
|
||||
|
||||
published: list = []
|
||||
|
||||
async def fake_publish(_bus, topic, payload, event_type=""):
|
||||
published.append((topic, payload, event_type))
|
||||
|
||||
fake_bus = MagicMock()
|
||||
fake_bus.connect = AsyncMock()
|
||||
fake_bus.close = AsyncMock()
|
||||
|
||||
with patch.object(_ing, "get_bus", return_value=fake_bus), \
|
||||
patch.object(_ing, "publish_safely", side_effect=fake_publish):
|
||||
await _extract_bounty(mock_repo, {
|
||||
"decky": "mail-decky",
|
||||
"service": "smtp",
|
||||
"attacker_ip": "203.0.113.7",
|
||||
"event_type": "message_stored",
|
||||
"fields": {
|
||||
"msg_id": "ABCD1234",
|
||||
"stored_as": "2026-04-28T12:00:00Z_abc_msg.eml",
|
||||
"sha256": "cafebabe" * 8,
|
||||
"size": "8192",
|
||||
"subject": "URGENT: invoice",
|
||||
"from_hdr": '"CEO" <ceo@bigcorp.com>',
|
||||
"to_hdr": "victim@target.tld",
|
||||
"mail_from": "<spammer@evil.example>",
|
||||
"rcpt_to": (
|
||||
"victim1@target.tld, victim2@target.tld, "
|
||||
"victim3@other.tld"
|
||||
),
|
||||
"return_path": "<bounce@kit.evil>",
|
||||
"x_mailer": "PHPMailer 6.0.7",
|
||||
"dkim_signed": "1",
|
||||
"spf_pass": "0",
|
||||
"attachment_count": "2",
|
||||
"attachments_json": (
|
||||
'[{"filename":"payload.exe","sha256":"deadbeef",'
|
||||
'"size":12,"content_type":"application/octet-stream"},'
|
||||
'{"filename":"resume.docx","sha256":"feedface",'
|
||||
'"size":34,"content_type":"application/msword"}]'
|
||||
),
|
||||
"urls_json": (
|
||||
'["https://xn--80ak6aa92e.example/login",'
|
||||
'"http://kit.evil/payload.bin"]'
|
||||
),
|
||||
"content_type": "multipart/mixed",
|
||||
},
|
||||
})
|
||||
|
||||
# Bounty still lands.
|
||||
mock_repo.add_bounty.assert_awaited_once()
|
||||
# And exactly one email.received publish.
|
||||
email_publishes = [
|
||||
p for p in published
|
||||
if p[0].endswith("email.received")
|
||||
]
|
||||
assert len(email_publishes) == 1
|
||||
topic, payload, event_type = email_publishes[0]
|
||||
assert event_type == "received"
|
||||
assert topic == "email.received"
|
||||
assert payload["attacker_uuid"] == "att-7"
|
||||
assert payload["from_domain"] == "bigcorp.com"
|
||||
assert payload["mail_from_domain"] == "evil.example"
|
||||
assert payload["return_path_domain"] == "kit.evil"
|
||||
assert payload["rcpt_count"] == 3
|
||||
assert payload["rcpt_domains"] == ["target.tld", "other.tld"]
|
||||
assert payload["x_mailer"] == "PHPMailer 6.0.7"
|
||||
assert payload["dkim_signed"] is True
|
||||
assert payload["spf_pass"] is False
|
||||
assert payload["urls"] == [
|
||||
"https://xn--80ak6aa92e.example/login",
|
||||
"http://kit.evil/payload.bin",
|
||||
]
|
||||
assert payload["attachment_sha256s"] == ["deadbeef", "feedface"]
|
||||
assert payload["attachment_extensions"] == [".exe", ".docx"]
|
||||
assert payload["source_id"] == "ABCD1234"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_stored_skips_publish_when_attacker_unresolved(self):
|
||||
"""If get_attacker_uuid_by_ip returns None, no orphan
|
||||
email.received event lands."""
|
||||
from decnet.web import ingester as _ing
|
||||
from decnet.web.ingester import _extract_bounty
|
||||
mock_repo = MagicMock()
|
||||
mock_repo.add_bounty = AsyncMock()
|
||||
mock_repo.upsert_credential = AsyncMock()
|
||||
mock_repo.get_attacker_uuid_by_ip = AsyncMock(return_value=None)
|
||||
|
||||
with patch.object(_ing, "get_bus") as p_bus, \
|
||||
patch.object(_ing, "publish_safely", new=AsyncMock()) as p_pub:
|
||||
await _extract_bounty(mock_repo, {
|
||||
"decky": "d",
|
||||
"service": "smtp",
|
||||
"attacker_ip": "10.0.0.1",
|
||||
"event_type": "message_stored",
|
||||
"fields": {
|
||||
"stored_as": "x.eml",
|
||||
"sha256": "h",
|
||||
"size": "1",
|
||||
"subject": "s",
|
||||
"from_hdr": "a@b.c",
|
||||
"to_hdr": "v@t.t",
|
||||
"mail_from": "a@b.c",
|
||||
"rcpt_to": "v@t.t",
|
||||
"attachment_count": "0",
|
||||
"content_type": "text/plain",
|
||||
},
|
||||
})
|
||||
mock_repo.add_bounty.assert_awaited_once()
|
||||
p_bus.assert_not_called()
|
||||
p_pub.assert_not_called()
|
||||
|
||||
def test_domain_of_handles_common_shapes(self):
|
||||
from decnet.web.ingester import _domain_of
|
||||
assert _domain_of('"CEO" <ceo@bigcorp.com>') == "bigcorp.com"
|
||||
assert _domain_of("ceo@bigcorp.com") == "bigcorp.com"
|
||||
assert _domain_of("<a@b.com>") == "b.com"
|
||||
assert _domain_of("BIGCORP@EXAMPLE.COM") == "example.com"
|
||||
assert _domain_of("") is None
|
||||
assert _domain_of(None) is None
|
||||
assert _domain_of("no-at-sign-here") is None
|
||||
|
||||
def test_attachment_extensions_unique_first_seen(self):
|
||||
from decnet.web.ingester import _attachment_extensions
|
||||
manifest = [
|
||||
{"filename": "a.EXE"},
|
||||
{"filename": "b.exe"}, # dedup'd against ".EXE"->".exe"
|
||||
{"filename": "noext"},
|
||||
{"filename": "report.pdf"},
|
||||
{"filename": "trailing."}, # dotless tail → skip
|
||||
]
|
||||
assert _attachment_extensions(manifest) == [".exe", ".pdf"]
|
||||
|
||||
def test_rcpt_projection_dedups_domains(self):
|
||||
from decnet.web.ingester import _rcpt_projection
|
||||
count, domains = _rcpt_projection(
|
||||
"a@x.com, b@x.com, c@y.com d@y.com",
|
||||
)
|
||||
# Whitespace-and-comma split gives 4 raw rcpts; domain set is 2.
|
||||
assert count == 4
|
||||
assert domains == ["x.com", "y.com"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_secret_b64_no_credential(self):
|
||||
"""The native branch keys off `secret_b64`. Fields lacking it
|
||||
|
||||
Reference in New Issue
Block a user