merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
154
decnet/realism/prompts/email.py
Normal file
154
decnet/realism/prompts/email.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""Prompt builder for the email content class.
|
||||
|
||||
The LLM gets a tightly-scoped instruction and a small handful of
|
||||
deterministic constraints. Persona mannerisms are *pre-selected* in
|
||||
Python (1–2 of the persona's full list) and injected as hard rules —
|
||||
small models otherwise treat the mannerism list as flavour text and
|
||||
ignore it, and the corpus collapses into one voice.
|
||||
|
||||
**Em-dash suppression** is on by default; suppression is lifted only
|
||||
for personas that opt in via ``uses_llms_heavily``. Em-dashes are a
|
||||
strong stylometric tell for LLM-authored prose, and a honeypot mailbox
|
||||
where every author uses them is a tell. Stage 6 of the realism
|
||||
migration extracts the suppression block into a shared
|
||||
``decnet.realism.prompts._style`` helper so file-class prompts pick
|
||||
it up too.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from decnet.realism.personas import EmailPersona
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PromptInputs:
|
||||
sender: EmailPersona
|
||||
recipient: EmailPersona
|
||||
context_hint: str
|
||||
parent_subject: Optional[str] = None # set when replying
|
||||
parent_excerpt: Optional[str] = None # short snippet of last msg
|
||||
|
||||
|
||||
_LANGUAGE_NAMES = {
|
||||
"en": "English",
|
||||
"es": "Spanish",
|
||||
"pt": "Portuguese",
|
||||
"fr": "French",
|
||||
"de": "German",
|
||||
"it": "Italian",
|
||||
"nl": "Dutch",
|
||||
"ja": "Japanese",
|
||||
"zh": "Chinese",
|
||||
}
|
||||
|
||||
|
||||
def _lang_label(code: str) -> str:
|
||||
return _LANGUAGE_NAMES.get(code.lower(), code)
|
||||
|
||||
|
||||
def select_mannerisms(
|
||||
persona: EmailPersona,
|
||||
*,
|
||||
rng: Optional[secrets.SystemRandom] = None,
|
||||
n: int = 2,
|
||||
) -> list[str]:
|
||||
"""Pick *n* mannerisms deterministically given *rng*.
|
||||
|
||||
Returns up to *n*; falls back to the full list when the persona
|
||||
declares fewer. Determinism (under a seeded RNG) is what makes
|
||||
tests practical — otherwise mannerism injection is unverifiable.
|
||||
"""
|
||||
rnd = rng or secrets.SystemRandom()
|
||||
pool = list(persona.mannerisms)
|
||||
if not pool:
|
||||
return []
|
||||
if len(pool) <= n:
|
||||
return pool
|
||||
rnd.shuffle(pool)
|
||||
return pool[:n]
|
||||
|
||||
|
||||
def build(
|
||||
inputs: PromptInputs,
|
||||
*,
|
||||
rng: Optional[secrets.SystemRandom] = None,
|
||||
) -> tuple[str, list[str]]:
|
||||
"""Return ``(prompt, mannerisms_used)``.
|
||||
|
||||
``mannerisms_used`` flows back into the persisted ``payload`` JSON
|
||||
so an analyst can see *why* a given email reads the way it does.
|
||||
"""
|
||||
sender = inputs.sender
|
||||
recipient = inputs.recipient
|
||||
language = _lang_label(sender.language or "en")
|
||||
mannerisms = select_mannerisms(sender, rng=rng)
|
||||
mannerism_block = (
|
||||
"\n".join(f"- {m}" for m in mannerisms)
|
||||
if mannerisms
|
||||
else "- (no specific mannerisms; write in the persona's tone)"
|
||||
)
|
||||
|
||||
if sender.uses_llms_heavily:
|
||||
em_dash_rule = (
|
||||
"Em-dashes are fine — this persona uses them naturally. "
|
||||
"Write in your usual style."
|
||||
)
|
||||
else:
|
||||
em_dash_rule = (
|
||||
"Do NOT use em-dashes (—). Use commas, periods, or "
|
||||
"parentheses instead. Em-dashes are a tell."
|
||||
)
|
||||
|
||||
sig_block = (
|
||||
f"Use this exact signature block:\n{sender.signature}"
|
||||
if sender.signature
|
||||
else "End with a short, plausible signature for the persona's role."
|
||||
)
|
||||
|
||||
if inputs.parent_subject:
|
||||
thread_block = (
|
||||
f"This is a REPLY in an ongoing thread.\n"
|
||||
f"- Parent subject: {inputs.parent_subject}\n"
|
||||
f"- Parent excerpt: {inputs.parent_excerpt or '(no excerpt)'}\n"
|
||||
f"- Begin the body assuming the recipient already read the parent.\n"
|
||||
)
|
||||
subject_rule = (
|
||||
"Subject must be the parent subject prefixed with 'Re: ' "
|
||||
"(no double 'Re: Re:')."
|
||||
)
|
||||
else:
|
||||
thread_block = "This is a NEW thread (no prior context)."
|
||||
subject_rule = (
|
||||
"Generate a short, specific subject line (≤ 80 chars) "
|
||||
"appropriate to the context."
|
||||
)
|
||||
|
||||
prompt = f"""You are writing one corporate email, RFC 2822 plain-text body only.
|
||||
|
||||
Persona — sender:
|
||||
- Name: {sender.name}
|
||||
- Role: {sender.role}
|
||||
- Tone: {sender.tone_custom if sender.tone == "custom" and sender.tone_custom else sender.tone}
|
||||
- Mannerisms (must show through):
|
||||
{mannerism_block}
|
||||
|
||||
Persona — recipient:
|
||||
- Name: {recipient.name}
|
||||
- Role: {recipient.role}
|
||||
|
||||
Context hint: {inputs.context_hint}
|
||||
|
||||
Thread context:
|
||||
{thread_block}
|
||||
|
||||
Hard rules:
|
||||
1. Write the email body in {language}. Do not translate or code-switch.
|
||||
2. {em_dash_rule}
|
||||
3. {subject_rule}
|
||||
4. {sig_block}
|
||||
5. Output ONLY the email — first line is "Subject: <subject>", then a blank line, then the body. No commentary, no markdown fences, no preamble.
|
||||
"""
|
||||
return prompt.strip(), mannerisms
|
||||
Reference in New Issue
Block a user