Files
DECNET/decnet/orchestrator/emailgen/prompt.py
anti 3ee55ec341 feat(emailgen): Ollama-driven fake email worker for IMAP/POP3 deckies
Second orchestrator worker (decnet emailgen) that drips persona-driven,
threaded, multi-language fake emails into running mail deckies.  Personas
live on Topology.email_personas; topology-wide language_default falls
through to any persona that doesn't pin its own.  Em-dashes are
suppressed at the prompt layer by default and only lifted for personas
explicitly marked uses_llms_heavily — em-dashes are an LLM tell and a
flat corpus of em-dashed mail is a giveaway.

EML delivery writes into /var/spool/decnet-emails/<thread>/<msg>.eml on
the mail decky via docker exec; wiring the IMAP/POP3 templates to read
from that spool (replacing the hardcoded _BAIT_EMAILS) is the next step.
2026-04-26 22:16:19 -04:00

152 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Ollama prompt builder for emailgen.
The LLM gets a tightly-scoped instruction and a small handful of
deterministic constraints. Persona mannerisms are *pre-selected* in
Python (12 of the persona's full list) and injected as hard rules —
small models otherwise treat the mannerism list as flavour text and
ignore it, and the corpus collapses into one voice.
**Em-dash suppression** is on by default; suppression is lifted only
for personas that opt in via ``uses_llms_heavily``. Em-dashes are a
strong stylometric tell for LLM-authored prose, and a honeypot mailbox
where every author uses them is a tell.
"""
from __future__ import annotations
import secrets
from dataclasses import dataclass
from typing import Optional
from decnet.orchestrator.emailgen.personas import EmailPersona
@dataclass(frozen=True)
class PromptInputs:
sender: EmailPersona
recipient: EmailPersona
context_hint: str
parent_subject: Optional[str] = None # set when replying
parent_excerpt: Optional[str] = None # short snippet of last msg
_LANGUAGE_NAMES = {
"en": "English",
"es": "Spanish",
"pt": "Portuguese",
"fr": "French",
"de": "German",
"it": "Italian",
"nl": "Dutch",
"ja": "Japanese",
"zh": "Chinese",
}
def _lang_label(code: str) -> str:
return _LANGUAGE_NAMES.get(code.lower(), code)
def select_mannerisms(
persona: EmailPersona,
*,
rng: Optional[secrets.SystemRandom] = None,
n: int = 2,
) -> list[str]:
"""Pick *n* mannerisms deterministically given *rng*.
Returns up to *n*; falls back to the full list when the persona
declares fewer. Determinism (under a seeded RNG) is what makes
tests practical — otherwise mannerism injection is unverifiable.
"""
rnd = rng or secrets.SystemRandom()
pool = list(persona.mannerisms)
if not pool:
return []
if len(pool) <= n:
return pool
rnd.shuffle(pool)
return pool[:n]
def build(
inputs: PromptInputs,
*,
rng: Optional[secrets.SystemRandom] = None,
) -> tuple[str, list[str]]:
"""Return ``(prompt, mannerisms_used)``.
``mannerisms_used`` flows back into the persisted ``payload`` JSON
so an analyst can see *why* a given email reads the way it does.
"""
sender = inputs.sender
recipient = inputs.recipient
language = _lang_label(sender.language or "en")
mannerisms = select_mannerisms(sender, rng=rng)
mannerism_block = (
"\n".join(f"- {m}" for m in mannerisms)
if mannerisms
else "- (no specific mannerisms; write in the persona's tone)"
)
if sender.uses_llms_heavily:
em_dash_rule = (
"Em-dashes are fine — this persona uses them naturally. "
"Write in your usual style."
)
else:
em_dash_rule = (
"Do NOT use em-dashes (—). Use commas, periods, or "
"parentheses instead. Em-dashes are a tell."
)
sig_block = (
f"Use this exact signature block:\n{sender.signature}"
if sender.signature
else "End with a short, plausible signature for the persona's role."
)
if inputs.parent_subject:
thread_block = (
f"This is a REPLY in an ongoing thread.\n"
f"- Parent subject: {inputs.parent_subject}\n"
f"- Parent excerpt: {inputs.parent_excerpt or '(no excerpt)'}\n"
f"- Begin the body assuming the recipient already read the parent.\n"
)
subject_rule = (
"Subject must be the parent subject prefixed with 'Re: ' "
"(no double 'Re: Re:')."
)
else:
thread_block = "This is a NEW thread (no prior context)."
subject_rule = (
"Generate a short, specific subject line (≤ 80 chars) "
"appropriate to the context."
)
prompt = f"""You are writing one corporate email, RFC 2822 plain-text body only.
Persona — sender:
- Name: {sender.name}
- Role: {sender.role}
- Tone: {sender.tone}
- Mannerisms (must show through):
{mannerism_block}
Persona — recipient:
- Name: {recipient.name}
- Role: {recipient.role}
Context hint: {inputs.context_hint}
Thread context:
{thread_block}
Hard rules:
1. Write the email body in {language}. Do not translate or code-switch.
2. {em_dash_rule}
3. {subject_rule}
4. {sig_block}
5. Output ONLY the email — first line is "Subject: <subject>", then a blank line, then the body. No commentary, no markdown fences, no preamble.
"""
return prompt.strip(), mannerisms