merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
9
decnet/realism/prompts/__init__.py
Normal file
9
decnet/realism/prompts/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""Prompt builders for LLM-enriched content.
|
||||
|
||||
* :mod:`decnet.realism.prompts.email` — corporate-email body builder.
|
||||
|
||||
Stage 6 of the realism migration adds ``filebody.py``, ``filename.py``,
|
||||
and a ``_style.py`` helper so em-dash suppression sits in one place
|
||||
across email + file-class prompts.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
39
decnet/realism/prompts/_style.py
Normal file
39
decnet/realism/prompts/_style.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""Shared stylometric guards for LLM-bound prompts.
|
||||
|
||||
Lifted from the original ``orchestrator.emailgen.prompt`` em-dash
|
||||
block so file-class prompts (note / todo / draft / script bodies)
|
||||
pick up the same suppression. Per the
|
||||
``feedback_em_dash_llm_tell.md`` memory: em-dashes (—) are a strong
|
||||
LLM-authorship tell, suppress by default; allow only for personas
|
||||
explicitly opted in via ``EmailPersona.uses_llms_heavily``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.realism.personas import EmailPersona
|
||||
|
||||
|
||||
_SUPPRESS_RULE = (
|
||||
"Do NOT use em-dashes (—). Use commas, periods, or "
|
||||
"parentheses instead. Em-dashes are a tell."
|
||||
)
|
||||
_ALLOW_RULE = (
|
||||
"Em-dashes are fine — this persona uses them naturally. "
|
||||
"Write in your usual style."
|
||||
)
|
||||
|
||||
|
||||
def em_dash_rule(persona: EmailPersona) -> str:
|
||||
"""Return the em-dash instruction line for *persona*'s prompt."""
|
||||
if persona.uses_llms_heavily:
|
||||
return _ALLOW_RULE
|
||||
return _SUPPRESS_RULE
|
||||
|
||||
|
||||
def strip_em_dashes(text: str, persona: EmailPersona) -> str:
|
||||
"""Belt-and-braces: even with the prompt rule, small models leak
|
||||
em-dashes occasionally. Substitute with comma+space so the
|
||||
output reads naturally; opt-in personas pass through unchanged.
|
||||
"""
|
||||
if persona.uses_llms_heavily:
|
||||
return text
|
||||
return text.replace("—", ", ").replace("–", ", ")
|
||||
154
decnet/realism/prompts/email.py
Normal file
154
decnet/realism/prompts/email.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""Prompt builder for the email content class.
|
||||
|
||||
The LLM gets a tightly-scoped instruction and a small handful of
|
||||
deterministic constraints. Persona mannerisms are *pre-selected* in
|
||||
Python (1–2 of the persona's full list) and injected as hard rules —
|
||||
small models otherwise treat the mannerism list as flavour text and
|
||||
ignore it, and the corpus collapses into one voice.
|
||||
|
||||
**Em-dash suppression** is on by default; suppression is lifted only
|
||||
for personas that opt in via ``uses_llms_heavily``. Em-dashes are a
|
||||
strong stylometric tell for LLM-authored prose, and a honeypot mailbox
|
||||
where every author uses them is a tell. Stage 6 of the realism
|
||||
migration extracts the suppression block into a shared
|
||||
``decnet.realism.prompts._style`` helper so file-class prompts pick
|
||||
it up too.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from decnet.realism.personas import EmailPersona
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PromptInputs:
|
||||
sender: EmailPersona
|
||||
recipient: EmailPersona
|
||||
context_hint: str
|
||||
parent_subject: Optional[str] = None # set when replying
|
||||
parent_excerpt: Optional[str] = None # short snippet of last msg
|
||||
|
||||
|
||||
_LANGUAGE_NAMES = {
|
||||
"en": "English",
|
||||
"es": "Spanish",
|
||||
"pt": "Portuguese",
|
||||
"fr": "French",
|
||||
"de": "German",
|
||||
"it": "Italian",
|
||||
"nl": "Dutch",
|
||||
"ja": "Japanese",
|
||||
"zh": "Chinese",
|
||||
}
|
||||
|
||||
|
||||
def _lang_label(code: str) -> str:
|
||||
return _LANGUAGE_NAMES.get(code.lower(), code)
|
||||
|
||||
|
||||
def select_mannerisms(
|
||||
persona: EmailPersona,
|
||||
*,
|
||||
rng: Optional[secrets.SystemRandom] = None,
|
||||
n: int = 2,
|
||||
) -> list[str]:
|
||||
"""Pick *n* mannerisms deterministically given *rng*.
|
||||
|
||||
Returns up to *n*; falls back to the full list when the persona
|
||||
declares fewer. Determinism (under a seeded RNG) is what makes
|
||||
tests practical — otherwise mannerism injection is unverifiable.
|
||||
"""
|
||||
rnd = rng or secrets.SystemRandom()
|
||||
pool = list(persona.mannerisms)
|
||||
if not pool:
|
||||
return []
|
||||
if len(pool) <= n:
|
||||
return pool
|
||||
rnd.shuffle(pool)
|
||||
return pool[:n]
|
||||
|
||||
|
||||
def build(
|
||||
inputs: PromptInputs,
|
||||
*,
|
||||
rng: Optional[secrets.SystemRandom] = None,
|
||||
) -> tuple[str, list[str]]:
|
||||
"""Return ``(prompt, mannerisms_used)``.
|
||||
|
||||
``mannerisms_used`` flows back into the persisted ``payload`` JSON
|
||||
so an analyst can see *why* a given email reads the way it does.
|
||||
"""
|
||||
sender = inputs.sender
|
||||
recipient = inputs.recipient
|
||||
language = _lang_label(sender.language or "en")
|
||||
mannerisms = select_mannerisms(sender, rng=rng)
|
||||
mannerism_block = (
|
||||
"\n".join(f"- {m}" for m in mannerisms)
|
||||
if mannerisms
|
||||
else "- (no specific mannerisms; write in the persona's tone)"
|
||||
)
|
||||
|
||||
if sender.uses_llms_heavily:
|
||||
em_dash_rule = (
|
||||
"Em-dashes are fine — this persona uses them naturally. "
|
||||
"Write in your usual style."
|
||||
)
|
||||
else:
|
||||
em_dash_rule = (
|
||||
"Do NOT use em-dashes (—). Use commas, periods, or "
|
||||
"parentheses instead. Em-dashes are a tell."
|
||||
)
|
||||
|
||||
sig_block = (
|
||||
f"Use this exact signature block:\n{sender.signature}"
|
||||
if sender.signature
|
||||
else "End with a short, plausible signature for the persona's role."
|
||||
)
|
||||
|
||||
if inputs.parent_subject:
|
||||
thread_block = (
|
||||
f"This is a REPLY in an ongoing thread.\n"
|
||||
f"- Parent subject: {inputs.parent_subject}\n"
|
||||
f"- Parent excerpt: {inputs.parent_excerpt or '(no excerpt)'}\n"
|
||||
f"- Begin the body assuming the recipient already read the parent.\n"
|
||||
)
|
||||
subject_rule = (
|
||||
"Subject must be the parent subject prefixed with 'Re: ' "
|
||||
"(no double 'Re: Re:')."
|
||||
)
|
||||
else:
|
||||
thread_block = "This is a NEW thread (no prior context)."
|
||||
subject_rule = (
|
||||
"Generate a short, specific subject line (≤ 80 chars) "
|
||||
"appropriate to the context."
|
||||
)
|
||||
|
||||
prompt = f"""You are writing one corporate email, RFC 2822 plain-text body only.
|
||||
|
||||
Persona — sender:
|
||||
- Name: {sender.name}
|
||||
- Role: {sender.role}
|
||||
- Tone: {sender.tone_custom if sender.tone == "custom" and sender.tone_custom else sender.tone}
|
||||
- Mannerisms (must show through):
|
||||
{mannerism_block}
|
||||
|
||||
Persona — recipient:
|
||||
- Name: {recipient.name}
|
||||
- Role: {recipient.role}
|
||||
|
||||
Context hint: {inputs.context_hint}
|
||||
|
||||
Thread context:
|
||||
{thread_block}
|
||||
|
||||
Hard rules:
|
||||
1. Write the email body in {language}. Do not translate or code-switch.
|
||||
2. {em_dash_rule}
|
||||
3. {subject_rule}
|
||||
4. {sig_block}
|
||||
5. Output ONLY the email — first line is "Subject: <subject>", then a blank line, then the body. No commentary, no markdown fences, no preamble.
|
||||
"""
|
||||
return prompt.strip(), mannerisms
|
||||
91
decnet/realism/prompts/filebody.py
Normal file
91
decnet/realism/prompts/filebody.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""Class-conditioned prompt builder for user-class file bodies.
|
||||
|
||||
Stage 6 of the realism migration. Only user-classes (``note``,
|
||||
``todo``, ``draft``, ``script``) get LLM enrichment — system-class
|
||||
content (cron logs, daemon logs, /tmp caches) is *supposed* to look
|
||||
formulaic, and an LLM-authored cron log is more suspicious than a
|
||||
templated one.
|
||||
|
||||
The prompt asks for *short* output (LLM-authored ten-page essays in
|
||||
``~/notes.txt`` are an instant tell) and pins the exit shape so the
|
||||
worker doesn't need to scrape boilerplate. Em-dash suppression
|
||||
flows through :mod:`decnet.realism.prompts._style`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.realism.personas import EmailPersona
|
||||
from decnet.realism.prompts._style import em_dash_rule
|
||||
from decnet.realism.taxonomy import ContentClass
|
||||
|
||||
|
||||
_LANGUAGE_NAMES = {
|
||||
"en": "English", "es": "Spanish", "pt": "Portuguese",
|
||||
"fr": "French", "de": "German", "it": "Italian",
|
||||
"nl": "Dutch", "ja": "Japanese", "zh": "Chinese",
|
||||
}
|
||||
|
||||
|
||||
def _lang_label(code: str) -> str:
|
||||
return _LANGUAGE_NAMES.get((code or "en").lower(), code or "English")
|
||||
|
||||
|
||||
_CLASS_GUIDANCE: dict[ContentClass, str] = {
|
||||
ContentClass.NOTE: (
|
||||
"A personal note file the persona keeps on their dev box. "
|
||||
"2–6 short lines. Mix of TODOs, half-formed thoughts, "
|
||||
"shorthand reminders. NOT a polished document. No headers "
|
||||
"or markdown sections."
|
||||
),
|
||||
ContentClass.TODO: (
|
||||
"A markdown TODO list the persona keeps on their dev box. "
|
||||
"3–8 items in `- [ ] item` / `- [x] item` form. Some checked, "
|
||||
"some not. Items are short, work-flavoured, lowercase, no "
|
||||
"prose paragraphs. No headers. No introductory sentence."
|
||||
),
|
||||
ContentClass.DRAFT: (
|
||||
"A short draft email or memo the persona is working on. "
|
||||
"2–4 short paragraphs, conversational tone. No subject line, "
|
||||
"no headers — this is the body in a notes file, not a sent "
|
||||
"email. Sign off the way the persona would in their voice."
|
||||
),
|
||||
ContentClass.SCRIPT: (
|
||||
"A short utility script the persona wrote. Pick a plausible "
|
||||
"interpreter (bash or python3) and start with the matching "
|
||||
"shebang. 10–25 lines. Real-feeling intent (a backup, a "
|
||||
"log rotation, a cleanup). Inline comments allowed but sparse."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def build(
|
||||
content_class: ContentClass,
|
||||
persona: EmailPersona,
|
||||
) -> str:
|
||||
"""Return a prompt for one body of *content_class* by *persona*.
|
||||
|
||||
Output the LLM is expected to produce: *just the file body*, no
|
||||
commentary, no markdown fences. Caller substitutes em-dashes
|
||||
server-side via :func:`decnet.realism.prompts._style.strip_em_dashes`
|
||||
as a belt-and-braces guard.
|
||||
"""
|
||||
guidance = _CLASS_GUIDANCE.get(content_class)
|
||||
if guidance is None:
|
||||
raise KeyError(
|
||||
f"no filebody prompt registered for content_class={content_class!r}"
|
||||
)
|
||||
language = _lang_label(persona.language or "en")
|
||||
return (
|
||||
f"You are writing one short file the persona below would "
|
||||
f"plausibly keep on their dev box.\n\n"
|
||||
f"Persona:\n"
|
||||
f"- Name: {persona.name}\n"
|
||||
f"- Role: {persona.role}\n"
|
||||
f"- Tone: {persona.tone_custom if persona.tone == 'custom' and persona.tone_custom else persona.tone}\n\n"
|
||||
f"File class: {content_class.value}\n"
|
||||
f"Guidance: {guidance}\n\n"
|
||||
f"Hard rules:\n"
|
||||
f"1. Write the file body in {language}. Do not translate or code-switch.\n"
|
||||
f"2. {em_dash_rule(persona)}\n"
|
||||
f"3. Output ONLY the file body. No commentary, no markdown "
|
||||
f" fences, no preamble like 'Here is the file:'.\n"
|
||||
).strip()
|
||||
Reference in New Issue
Block a user