Files
DECNET/decnet/orchestrator/emailgen/threads.py
anti 3ee55ec341 feat(emailgen): Ollama-driven fake email worker for IMAP/POP3 deckies
Second orchestrator worker (decnet emailgen) that drips persona-driven,
threaded, multi-language fake emails into running mail deckies.  Personas
live on Topology.email_personas; topology-wide language_default falls
through to any persona that doesn't pin its own.  Em-dashes are
suppressed at the prompt layer by default and only lifted for personas
explicitly marked uses_llms_heavily — em-dashes are an LLM tell and a
flat corpus of em-dashed mail is a giveaway.

EML delivery writes into /var/spool/decnet-emails/<thread>/<msg>.eml on
the mail decky via docker exec; wiring the IMAP/POP3 templates to read
from that spool (replacing the hardcoded _BAIT_EMAILS) is the next step.
2026-04-26 22:16:19 -04:00

76 lines
2.5 KiB
Python

"""RFC 2822 thread-chain bookkeeping.
A thread is a worker-side UUID that groups one or more emails between
the same two personas. ``In-Reply-To`` carries the immediate parent's
``Message-ID``; ``References`` carries the full ancestry chain.
The emailgen scheduler queries the repository for the most recent email
in any thread between (sender, recipient); if it finds one, it emits a
reply (continuing the chain). Otherwise it starts a new thread.
"""
from __future__ import annotations
import uuid
from dataclasses import dataclass
from typing import Optional
@dataclass(frozen=True)
class ThreadChain:
"""Immutable view of a thread's chain at a point in time.
``thread_id`` is opaque (UUID). ``parent_message_id`` is the most
recent message in the chain — the new reply's ``In-Reply-To`` field.
``references`` is the dot-separated history fed into the
``References:`` header (oldest-first per RFC 2822 §3.6.4).
``parent_subject`` carries the subject we're replying to, so the
reply can prepend ``Re:`` correctly.
"""
thread_id: str
parent_message_id: str
references: tuple[str, ...]
parent_subject: str
def new_thread_id() -> str:
return str(uuid.uuid4())
def reply_subject(parent_subject: str) -> str:
"""Prepend ``Re:`` to *parent_subject* if not already a reply.
Folds repeat ``Re: Re: Re:`` into a single ``Re:`` — Outlook /
Thunderbird both do this and an attacker reading the maildir would
notice the corpus's missing convention immediately.
"""
s = parent_subject.strip()
lowered = s.lower()
while lowered.startswith("re:"):
s = s[3:].lstrip()
lowered = s.lower()
return f"Re: {s}"
def references_for_reply(chain: Optional[ThreadChain]) -> str:
"""Build the ``References:`` header value for a reply.
Returns a space-separated list of message-ids, oldest-first, with
the parent appended. Empty string when *chain* is None (root).
"""
if chain is None:
return ""
refs = list(chain.references) + [chain.parent_message_id]
return " ".join(refs)
def new_message_id(domain: str) -> str:
"""Build an RFC 2822 ``Message-ID`` value (incl. angle brackets).
Worker side — the value is also stored in the DB so a future reply
can be threaded against it. Domain mirrors the sender's email
domain so an attacker grepping for tells doesn't find every
fake-corp email tagged with ``@example.com``.
"""
safe_domain = domain.strip() or "localhost"
return f"<{uuid.uuid4().hex}@{safe_domain}>"