Replaces LICENSE (GPLv3 -> AGPLv3) and prepends `SPDX-License-Identifier: AGPL-3.0-or-later` to every source file across decnet/, decnet_web/, tests/, scripts/, and tools/. Rationale: closes the GPLv3 ASP loophole so any party operating a modified DECNET as a network service must offer their modified source. Personal copyright (Samuel Paschuan) + inbound=outbound contributions make a future unilateral relicense infeasible. - LICENSE: full AGPL-3.0 text (gnu.org/licenses/agpl-3.0.txt) - COPYRIGHT: project copyright notice - tools/add_spdx_headers.py: idempotent header injector (shebang- and PEP 263-aware) Touches 1565 source files (.py, .ts, .tsx, .js, .jsx, .css, .sh). No behavior change; comments only.
156 lines
4.7 KiB
Python
156 lines
4.7 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||
"""Prompt builder for the email content class.
|
||
|
||
The LLM gets a tightly-scoped instruction and a small handful of
|
||
deterministic constraints. Persona mannerisms are *pre-selected* in
|
||
Python (1–2 of the persona's full list) and injected as hard rules —
|
||
small models otherwise treat the mannerism list as flavour text and
|
||
ignore it, and the corpus collapses into one voice.
|
||
|
||
**Em-dash suppression** is on by default; suppression is lifted only
|
||
for personas that opt in via ``uses_llms_heavily``. Em-dashes are a
|
||
strong stylometric tell for LLM-authored prose, and a honeypot mailbox
|
||
where every author uses them is a tell. Stage 6 of the realism
|
||
migration extracts the suppression block into a shared
|
||
``decnet.realism.prompts._style`` helper so file-class prompts pick
|
||
it up too.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import secrets
|
||
from dataclasses import dataclass
|
||
from typing import Optional
|
||
|
||
from decnet.realism.personas import EmailPersona
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class PromptInputs:
|
||
sender: EmailPersona
|
||
recipient: EmailPersona
|
||
context_hint: str
|
||
parent_subject: Optional[str] = None # set when replying
|
||
parent_excerpt: Optional[str] = None # short snippet of last msg
|
||
|
||
|
||
_LANGUAGE_NAMES = {
|
||
"en": "English",
|
||
"es": "Spanish",
|
||
"pt": "Portuguese",
|
||
"fr": "French",
|
||
"de": "German",
|
||
"it": "Italian",
|
||
"nl": "Dutch",
|
||
"ja": "Japanese",
|
||
"zh": "Chinese",
|
||
}
|
||
|
||
|
||
def _lang_label(code: str) -> str:
|
||
return _LANGUAGE_NAMES.get(code.lower(), code)
|
||
|
||
|
||
def select_mannerisms(
|
||
persona: EmailPersona,
|
||
*,
|
||
rng: Optional[secrets.SystemRandom] = None,
|
||
n: int = 2,
|
||
) -> list[str]:
|
||
"""Pick *n* mannerisms deterministically given *rng*.
|
||
|
||
Returns up to *n*; falls back to the full list when the persona
|
||
declares fewer. Determinism (under a seeded RNG) is what makes
|
||
tests practical — otherwise mannerism injection is unverifiable.
|
||
"""
|
||
rnd = rng or secrets.SystemRandom()
|
||
pool = list(persona.mannerisms)
|
||
if not pool:
|
||
return []
|
||
if len(pool) <= n:
|
||
return pool
|
||
rnd.shuffle(pool)
|
||
return pool[:n]
|
||
|
||
|
||
def build(
|
||
inputs: PromptInputs,
|
||
*,
|
||
rng: Optional[secrets.SystemRandom] = None,
|
||
) -> tuple[str, list[str]]:
|
||
"""Return ``(prompt, mannerisms_used)``.
|
||
|
||
``mannerisms_used`` flows back into the persisted ``payload`` JSON
|
||
so an analyst can see *why* a given email reads the way it does.
|
||
"""
|
||
sender = inputs.sender
|
||
recipient = inputs.recipient
|
||
language = _lang_label(sender.language or "en")
|
||
mannerisms = select_mannerisms(sender, rng=rng)
|
||
mannerism_block = (
|
||
"\n".join(f"- {m}" for m in mannerisms)
|
||
if mannerisms
|
||
else "- (no specific mannerisms; write in the persona's tone)"
|
||
)
|
||
|
||
if sender.uses_llms_heavily:
|
||
em_dash_rule = (
|
||
"Em-dashes are fine — this persona uses them naturally. "
|
||
"Write in your usual style."
|
||
)
|
||
else:
|
||
em_dash_rule = (
|
||
"Do NOT use em-dashes (—). Use commas, periods, or "
|
||
"parentheses instead. Em-dashes are a tell."
|
||
)
|
||
|
||
sig_block = (
|
||
f"Use this exact signature block:\n{sender.signature}"
|
||
if sender.signature
|
||
else "End with a short, plausible signature for the persona's role."
|
||
)
|
||
|
||
if inputs.parent_subject:
|
||
thread_block = (
|
||
f"This is a REPLY in an ongoing thread.\n"
|
||
f"- Parent subject: {inputs.parent_subject}\n"
|
||
f"- Parent excerpt: {inputs.parent_excerpt or '(no excerpt)'}\n"
|
||
f"- Begin the body assuming the recipient already read the parent.\n"
|
||
)
|
||
subject_rule = (
|
||
"Subject must be the parent subject prefixed with 'Re: ' "
|
||
"(no double 'Re: Re:')."
|
||
)
|
||
else:
|
||
thread_block = "This is a NEW thread (no prior context)."
|
||
subject_rule = (
|
||
"Generate a short, specific subject line (≤ 80 chars) "
|
||
"appropriate to the context."
|
||
)
|
||
|
||
prompt = f"""You are writing one corporate email, RFC 2822 plain-text body only.
|
||
|
||
Persona — sender:
|
||
- Name: {sender.name}
|
||
- Role: {sender.role}
|
||
- Tone: {sender.tone_custom if sender.tone == "custom" and sender.tone_custom else sender.tone}
|
||
- Mannerisms (must show through):
|
||
{mannerism_block}
|
||
|
||
Persona — recipient:
|
||
- Name: {recipient.name}
|
||
- Role: {recipient.role}
|
||
|
||
Context hint: {inputs.context_hint}
|
||
|
||
Thread context:
|
||
{thread_block}
|
||
|
||
Hard rules:
|
||
1. Write the email body in {language}. Do not translate or code-switch.
|
||
2. {em_dash_rule}
|
||
3. {subject_rule}
|
||
4. {sig_block}
|
||
5. Output ONLY the email — first line is "Subject: <subject>", then a blank line, then the body. No commentary, no markdown fences, no preamble.
|
||
"""
|
||
return prompt.strip(), mannerisms
|