merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

@@ -0,0 +1,9 @@
"""Prompt builders for LLM-enriched content.
* :mod:`decnet.realism.prompts.email` — corporate-email body builder.
Stage 6 of the realism migration adds ``filebody.py``, ``filename.py``,
and a ``_style.py`` helper so em-dash suppression sits in one place
across email + file-class prompts.
"""
from __future__ import annotations

View File

@@ -0,0 +1,39 @@
"""Shared stylometric guards for LLM-bound prompts.
Lifted from the original ``orchestrator.emailgen.prompt`` em-dash
block so file-class prompts (note / todo / draft / script bodies)
pick up the same suppression. Per the
``feedback_em_dash_llm_tell.md`` memory: em-dashes (—) are a strong
LLM-authorship tell, suppress by default; allow only for personas
explicitly opted in via ``EmailPersona.uses_llms_heavily``.
"""
from __future__ import annotations
from decnet.realism.personas import EmailPersona
_SUPPRESS_RULE = (
"Do NOT use em-dashes (—). Use commas, periods, or "
"parentheses instead. Em-dashes are a tell."
)
_ALLOW_RULE = (
"Em-dashes are fine — this persona uses them naturally. "
"Write in your usual style."
)
def em_dash_rule(persona: EmailPersona) -> str:
"""Return the em-dash instruction line for *persona*'s prompt."""
if persona.uses_llms_heavily:
return _ALLOW_RULE
return _SUPPRESS_RULE
def strip_em_dashes(text: str, persona: EmailPersona) -> str:
"""Belt-and-braces: even with the prompt rule, small models leak
em-dashes occasionally. Substitute with comma+space so the
output reads naturally; opt-in personas pass through unchanged.
"""
if persona.uses_llms_heavily:
return text
return text.replace("", ", ").replace("", ", ")

View File

@@ -0,0 +1,154 @@
"""Prompt builder for the email content class.
The LLM gets a tightly-scoped instruction and a small handful of
deterministic constraints. Persona mannerisms are *pre-selected* in
Python (12 of the persona's full list) and injected as hard rules —
small models otherwise treat the mannerism list as flavour text and
ignore it, and the corpus collapses into one voice.
**Em-dash suppression** is on by default; suppression is lifted only
for personas that opt in via ``uses_llms_heavily``. Em-dashes are a
strong stylometric tell for LLM-authored prose, and a honeypot mailbox
where every author uses them is a tell. Stage 6 of the realism
migration extracts the suppression block into a shared
``decnet.realism.prompts._style`` helper so file-class prompts pick
it up too.
"""
from __future__ import annotations
import secrets
from dataclasses import dataclass
from typing import Optional
from decnet.realism.personas import EmailPersona
@dataclass(frozen=True)
class PromptInputs:
sender: EmailPersona
recipient: EmailPersona
context_hint: str
parent_subject: Optional[str] = None # set when replying
parent_excerpt: Optional[str] = None # short snippet of last msg
_LANGUAGE_NAMES = {
"en": "English",
"es": "Spanish",
"pt": "Portuguese",
"fr": "French",
"de": "German",
"it": "Italian",
"nl": "Dutch",
"ja": "Japanese",
"zh": "Chinese",
}
def _lang_label(code: str) -> str:
return _LANGUAGE_NAMES.get(code.lower(), code)
def select_mannerisms(
persona: EmailPersona,
*,
rng: Optional[secrets.SystemRandom] = None,
n: int = 2,
) -> list[str]:
"""Pick *n* mannerisms deterministically given *rng*.
Returns up to *n*; falls back to the full list when the persona
declares fewer. Determinism (under a seeded RNG) is what makes
tests practical — otherwise mannerism injection is unverifiable.
"""
rnd = rng or secrets.SystemRandom()
pool = list(persona.mannerisms)
if not pool:
return []
if len(pool) <= n:
return pool
rnd.shuffle(pool)
return pool[:n]
def build(
inputs: PromptInputs,
*,
rng: Optional[secrets.SystemRandom] = None,
) -> tuple[str, list[str]]:
"""Return ``(prompt, mannerisms_used)``.
``mannerisms_used`` flows back into the persisted ``payload`` JSON
so an analyst can see *why* a given email reads the way it does.
"""
sender = inputs.sender
recipient = inputs.recipient
language = _lang_label(sender.language or "en")
mannerisms = select_mannerisms(sender, rng=rng)
mannerism_block = (
"\n".join(f"- {m}" for m in mannerisms)
if mannerisms
else "- (no specific mannerisms; write in the persona's tone)"
)
if sender.uses_llms_heavily:
em_dash_rule = (
"Em-dashes are fine — this persona uses them naturally. "
"Write in your usual style."
)
else:
em_dash_rule = (
"Do NOT use em-dashes (—). Use commas, periods, or "
"parentheses instead. Em-dashes are a tell."
)
sig_block = (
f"Use this exact signature block:\n{sender.signature}"
if sender.signature
else "End with a short, plausible signature for the persona's role."
)
if inputs.parent_subject:
thread_block = (
f"This is a REPLY in an ongoing thread.\n"
f"- Parent subject: {inputs.parent_subject}\n"
f"- Parent excerpt: {inputs.parent_excerpt or '(no excerpt)'}\n"
f"- Begin the body assuming the recipient already read the parent.\n"
)
subject_rule = (
"Subject must be the parent subject prefixed with 'Re: ' "
"(no double 'Re: Re:')."
)
else:
thread_block = "This is a NEW thread (no prior context)."
subject_rule = (
"Generate a short, specific subject line (≤ 80 chars) "
"appropriate to the context."
)
prompt = f"""You are writing one corporate email, RFC 2822 plain-text body only.
Persona — sender:
- Name: {sender.name}
- Role: {sender.role}
- Tone: {sender.tone_custom if sender.tone == "custom" and sender.tone_custom else sender.tone}
- Mannerisms (must show through):
{mannerism_block}
Persona — recipient:
- Name: {recipient.name}
- Role: {recipient.role}
Context hint: {inputs.context_hint}
Thread context:
{thread_block}
Hard rules:
1. Write the email body in {language}. Do not translate or code-switch.
2. {em_dash_rule}
3. {subject_rule}
4. {sig_block}
5. Output ONLY the email — first line is "Subject: <subject>", then a blank line, then the body. No commentary, no markdown fences, no preamble.
"""
return prompt.strip(), mannerisms

View File

@@ -0,0 +1,91 @@
"""Class-conditioned prompt builder for user-class file bodies.
Stage 6 of the realism migration. Only user-classes (``note``,
``todo``, ``draft``, ``script``) get LLM enrichment — system-class
content (cron logs, daemon logs, /tmp caches) is *supposed* to look
formulaic, and an LLM-authored cron log is more suspicious than a
templated one.
The prompt asks for *short* output (LLM-authored ten-page essays in
``~/notes.txt`` are an instant tell) and pins the exit shape so the
worker doesn't need to scrape boilerplate. Em-dash suppression
flows through :mod:`decnet.realism.prompts._style`.
"""
from __future__ import annotations
from decnet.realism.personas import EmailPersona
from decnet.realism.prompts._style import em_dash_rule
from decnet.realism.taxonomy import ContentClass
_LANGUAGE_NAMES = {
"en": "English", "es": "Spanish", "pt": "Portuguese",
"fr": "French", "de": "German", "it": "Italian",
"nl": "Dutch", "ja": "Japanese", "zh": "Chinese",
}
def _lang_label(code: str) -> str:
return _LANGUAGE_NAMES.get((code or "en").lower(), code or "English")
_CLASS_GUIDANCE: dict[ContentClass, str] = {
ContentClass.NOTE: (
"A personal note file the persona keeps on their dev box. "
"26 short lines. Mix of TODOs, half-formed thoughts, "
"shorthand reminders. NOT a polished document. No headers "
"or markdown sections."
),
ContentClass.TODO: (
"A markdown TODO list the persona keeps on their dev box. "
"38 items in `- [ ] item` / `- [x] item` form. Some checked, "
"some not. Items are short, work-flavoured, lowercase, no "
"prose paragraphs. No headers. No introductory sentence."
),
ContentClass.DRAFT: (
"A short draft email or memo the persona is working on. "
"24 short paragraphs, conversational tone. No subject line, "
"no headers — this is the body in a notes file, not a sent "
"email. Sign off the way the persona would in their voice."
),
ContentClass.SCRIPT: (
"A short utility script the persona wrote. Pick a plausible "
"interpreter (bash or python3) and start with the matching "
"shebang. 1025 lines. Real-feeling intent (a backup, a "
"log rotation, a cleanup). Inline comments allowed but sparse."
),
}
def build(
content_class: ContentClass,
persona: EmailPersona,
) -> str:
"""Return a prompt for one body of *content_class* by *persona*.
Output the LLM is expected to produce: *just the file body*, no
commentary, no markdown fences. Caller substitutes em-dashes
server-side via :func:`decnet.realism.prompts._style.strip_em_dashes`
as a belt-and-braces guard.
"""
guidance = _CLASS_GUIDANCE.get(content_class)
if guidance is None:
raise KeyError(
f"no filebody prompt registered for content_class={content_class!r}"
)
language = _lang_label(persona.language or "en")
return (
f"You are writing one short file the persona below would "
f"plausibly keep on their dev box.\n\n"
f"Persona:\n"
f"- Name: {persona.name}\n"
f"- Role: {persona.role}\n"
f"- Tone: {persona.tone_custom if persona.tone == 'custom' and persona.tone_custom else persona.tone}\n\n"
f"File class: {content_class.value}\n"
f"Guidance: {guidance}\n\n"
f"Hard rules:\n"
f"1. Write the file body in {language}. Do not translate or code-switch.\n"
f"2. {em_dash_rule(persona)}\n"
f"3. Output ONLY the file body. No commentary, no markdown "
f" fences, no preamble like 'Here is the file:'.\n"
).strip()