refactor(realism): move emailgen LLM/personas/prompt into shared library

Lift the format-agnostic pieces from decnet/orchestrator/emailgen/ into the new decnet/realism/ library so file-class content generation (stage 3 of the realism migration) can reuse them. Email-specific delivery (RFC 2822 EML, IMAP/POP3 spool, thread chains) stays in orchestrator/. Renames (history-preserving git mv): emailgen/personas.py -> realism/personas.py emailgen/prompt.py -> realism/prompts/email.py emailgen/global_pool.py -> realism/personas_pool.py emailgen/llm/ -> realism/llm/ Env-var clean break (pre-v1, no aliases): DECNET_EMAILGEN_LLM -> DECNET_REALISM_LLM DECNET_EMAILGEN_MODEL -> DECNET_REALISM_MODEL DECNET_EMAILGEN_TIMEOUT -> DECNET_REALISM_TIMEOUT DECNET_EMAILGEN_PERSONAS -> DECNET_REALISM_PERSONAS DECNET_EMAILGEN_FAKE_OUTPUT -> DECNET_REALISM_FAKE_OUTPUT Importers rewritten in: orchestrator/emailgen/scheduler.py, orchestrator/drivers/email.py, web/router/{emailgen,topology}/ api_personas.py, cli/emailgen.py. Tests for moved modules relocated to tests/realism/; tests for stay-put modules updated in place. API URL `/api/v1/emailgen/personas` and CLI `decnet emailgen import-personas` keep their public names until the service-collapse commit (stage 5).
2026-04-27 16:05:43 -04:00
parent f57c621117
commit 0b9873982d
34 changed files with 455 additions and 298 deletions
--- a/decnet/orchestrator/drivers/email.py
+++ b/decnet/orchestrator/drivers/email.py
@@ -5,9 +5,9 @@ configured emailgen spool directory (``/var/spool/decnet-emails/`` by
 default).  The IMAP/POP3 service templates read that spool at request
 time so attackers see the generated mail in their MUA.

-The LLM call goes through :mod:`decnet.orchestrator.emailgen.llm` —
-backend-agnostic by construction so swapping Ollama for the Anthropic
-API, vLLM, or llama.cpp is a config change, not a driver rewrite.
+The LLM call goes through :mod:`decnet.realism.llm` — backend-agnostic
+by construction so swapping Ollama for the Anthropic API, vLLM, or
+llama.cpp is a config change, not a driver rewrite.
 Output is parsed-and-repaired into a valid EML using
 :mod:`email.mime.*`; the worker then ``docker exec``\\s a ``tee`` to
 drop the file inside the target container, followed by a
@@ -29,10 +29,10 @@ from typing import Any, Optional

 from decnet.logging import get_logger
 from decnet.orchestrator.drivers.base import ActivityResult
-from decnet.orchestrator.emailgen.llm import LLMBackend, LLMTimeout, get_llm
-from decnet.orchestrator.emailgen.prompt import PromptInputs, build as build_prompt
 from decnet.orchestrator.emailgen.scheduler import EmailAction
 from decnet.orchestrator.emailgen.threads import new_message_id
+from decnet.realism.llm import LLMBackend, LLMTimeout, get_llm
+from decnet.realism.prompts.email import PromptInputs, build as build_prompt

 log = get_logger("orchestrator.email")

--- a/decnet/orchestrator/emailgen/init.py
+++ b/decnet/orchestrator/emailgen/init.py
@@ -11,7 +11,7 @@ heartbeat / control-listener scaffolding via :mod:`decnet.bus.publish`.

 Lazy worker re-export: :func:`emailgen_worker` is loaded on first
 attribute access so that submodules can import package-level names
-(``decnet.orchestrator.emailgen.prompt``) without triggering an eager
+(``decnet.orchestrator.emailgen.events``) without triggering an eager
 load of the worker — and through it, the email driver, which imports
 back into this package.  Without lazy loading the package + driver +
 worker form a cycle.
--- a/decnet/orchestrator/emailgen/global_pool.py
+++ b/decnet/orchestrator/emailgen/global_pool.py
@@ -1,136 +0,0 @@
-"""Global persona pool — non-topology mail deckies.
-
-DECNET runs in three deployment shapes that emit running deckies:
-
-* **MazeNET topologies**       — each topology owns its own
-  :attr:`Topology.email_personas` JSON list; the scheduler walks back
-  from the mail decky to its parent topology row.
-* **Unihost fleet**            — MACVLAN/IPVLAN deckies that have no
-  parent topology row at all.  They share one host-wide pool.
-* **SWARM shards**             — DeckyShard rows on enrolled workers.
-  Same shape as fleet for emailgen purposes (no parent topology row),
-  so they read the same global pool.
-
-This module owns the global pool: a JSON file on disk that operators
-populate via ``decnet emailgen import-personas <file>`` (or by editing
-the file directly).  The file is loaded lazily on first read and
-re-loaded on mtime change so a CLI import takes effect for the running
-worker without a restart.
-
-Path resolution order:
-
-1. ``DECNET_EMAILGEN_PERSONAS`` environment variable — explicit override.
-2. ``/etc/decnet/email_personas.json`` — canonical master path; this is
-   what ``decnet init`` will eventually own.
-3. ``~/.decnet/email_personas.json`` — dev fallback so a developer can
-   exercise the worker without root or ``decnet init``.
-
-When the file is missing / empty / unparseable, the pool is empty and
-the scheduler skips fleet/shard mail deckies the same way it skips a
-topology with too few personas.  No silent fallback to dummy personas;
-silence is correct when there's no opinion to convey.
-"""
-from __future__ import annotations
-
-import os
-import threading
-from pathlib import Path
-from typing import Optional
-
-from decnet.logging import get_logger
-from decnet.orchestrator.emailgen.personas import EmailPersona, parse_personas
-
-logger = get_logger("orchestrator.emailgen")
-
-_ENV_VAR = "DECNET_EMAILGEN_PERSONAS"
-_SYSTEM_PATH = Path("/etc/decnet/email_personas.json")
-
-
-def _user_path() -> Path:
-    return Path(os.path.expanduser("~/.decnet/email_personas.json"))
-
-
-def resolve_path() -> Path:
-    """Return the path the global pool would load from right now.
-
-    The file may not exist; callers are expected to handle that.  The
-    function is pure (no I/O) so the ``decnet emailgen import-personas``
-    CLI can ask "where would I write to?" without touching the disk.
-    """
-    override = os.environ.get(_ENV_VAR, "").strip()
-    if override:
-        return Path(override)
-    if _SYSTEM_PATH.parent.exists() or _SYSTEM_PATH.exists():
-        return _SYSTEM_PATH
-    return _user_path()
-
-
-# ── Cache ────────────────────────────────────────────────────────────────────
-# Lock-protected because two scheduler ticks could race on the first load,
-# and the read path is hot enough (every tick, every fleet/shard mail
-# decky) that re-parsing on every call is wasteful.
-
-_lock = threading.Lock()
-_cache: list[EmailPersona] = []
-_cache_path: Optional[Path] = None
-_cache_mtime: float = 0.0
-
-
-def load(*, language_default: str = "en") -> list[EmailPersona]:
-    """Return the parsed global persona pool.
-
-    *language_default* fills in any persona missing a ``language`` field;
-    fleet/shard sources have no topology-level default, so callers
-    should pass the worker's best guess (typically ``"en"``).
-
-    Threadsafe and cheap on the steady state (mtime check + dict lookup);
-    expensive only when the file changed since the last call.
-    """
-    path = resolve_path()
-    try:
-        st = path.stat()
-    except OSError:
-        with _lock:
-            global _cache, _cache_path, _cache_mtime
-            _cache = []
-            _cache_path = path
-            _cache_mtime = 0.0
-        return []
-
-    with _lock:
-        if (
-            _cache_path == path
-            and _cache_mtime == st.st_mtime
-            and _cache  # non-empty cache; empty re-parses cheaply anyway
-        ):
-            return _cache
-
-    try:
-        raw = path.read_text(encoding="utf-8")
-    except OSError as exc:
-        logger.warning("emailgen global pool: read failed path=%s: %s", path, exc)
-        return []
-
-    parsed = parse_personas(raw, language_default=language_default)
-    with _lock:
-        _cache = parsed
-        _cache_path = path
-        _cache_mtime = st.st_mtime
-    if parsed:
-        logger.info(
-            "emailgen global pool: loaded %d personas from %s", len(parsed), path,
-        )
-    return parsed
-
-
-def reset_cache() -> None:
-    """Clear the in-process cache.
-
-    Test-only helper — avoids stale state when several tests in the
-    same process exercise different on-disk pools.
-    """
-    global _cache, _cache_path, _cache_mtime
-    with _lock:
-        _cache = []
-        _cache_path = None
-        _cache_mtime = 0.0
--- a/decnet/orchestrator/emailgen/llm/init.py
+++ b/decnet/orchestrator/emailgen/llm/init.py
@@ -1,22 +0,0 @@
-"""LLM backend for emailgen.
-
-Pluggable from day one (per the provider-subpackages convention used by
-:mod:`decnet.web.db` and :mod:`decnet.bus`): the worker only depends on
-:class:`LLMBackend` from :mod:`base`; concrete transports live under
-:mod:`impl` and are selected by :func:`get_llm`.
-
-This is the seam ANTI will pull on when swapping local Ollama for the
-Anthropic API, llama.cpp, vLLM, or any other inference server — change
-``DECNET_EMAILGEN_LLM`` (or pass ``llm=`` to the driver), no driver
-rewrite.
-"""
-from __future__ import annotations
-
-from decnet.orchestrator.emailgen.llm.base import (
-    LLMBackend,
-    LLMResult,
-    LLMTimeout,
-)
-from decnet.orchestrator.emailgen.llm.factory import get_llm
-
-__all__ = ["LLMBackend", "LLMResult", "LLMTimeout", "get_llm"]
--- a/decnet/orchestrator/emailgen/llm/base.py
+++ b/decnet/orchestrator/emailgen/llm/base.py
@@ -1,47 +0,0 @@
-"""Backend protocol shared by every LLM transport.
-
-Deliberately narrow: emailgen needs one async ``generate`` call that
-takes a prompt string and returns the model's output text plus enough
-metadata for the worker to populate the orchestrator-email payload
-(model name, latency, success bit).  Streaming, embeddings, multi-turn
-chat — all out of scope here; emailgen only ever does one-shot
-single-prompt generations.
-"""
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from typing import Any, Protocol
-
-
-class LLMTimeout(Exception):
-    """Raised when a generation exceeds the backend's wall-clock cap.
-
-    Backends MUST raise this rather than returning silently empty
-    output; the driver discriminates timeout from "model produced
-    nothing useful" so payloads carry the right ``stage`` value.
-    """
-
-
-@dataclass
-class LLMResult:
-    """Outcome of one ``generate`` call.
-
-    ``success`` is ``False`` when the backend ran cleanly but produced
-    no usable output (e.g. an empty stdout).  Hard failures (subprocess
-    crash, network error) raise; soft failures land here so the driver
-    can persist + log them as one event.
-    """
-    success: bool
-    text: str
-    model: str
-    latency_ms: int
-    extra: dict[str, Any] = field(default_factory=dict)
-
-
-class LLMBackend(Protocol):
-    """Minimal contract for an emailgen LLM provider."""
-
-    model: str
-    timeout: float
-
-    async def generate(self, prompt: str) -> LLMResult: ...
--- a/decnet/orchestrator/emailgen/llm/factory.py
+++ b/decnet/orchestrator/emailgen/llm/factory.py
@@ -1,46 +0,0 @@
-"""Backend dispatch.
-
-Reads ``DECNET_EMAILGEN_LLM`` to pick a concrete :class:`LLMBackend`.
-Defaults to ``ollama`` because that's what the prototype proved out and
-what most dev boxes have on hand.
-
-Supported keys:
-
-* ``ollama`` — :class:`decnet.orchestrator.emailgen.llm.impl.ollama.OllamaBackend`
-* ``fake``   — :class:`decnet.orchestrator.emailgen.llm.impl.fake.FakeBackend`
-  (canned output, used by tests so they don't shell out)
-
-Anthropic / vLLM / llama.cpp slots in here as a third branch when the
-need shows up.  Per the provider-subpackages memory, do NOT collapse
-factory dispatch into the impl modules — keeps the ``__init__`` import
-graph cycle-free and the env contract auditable in one place.
-"""
-from __future__ import annotations
-
-import os
-from typing import Any
-
-from decnet.orchestrator.emailgen.llm.base import LLMBackend
-
-
-def get_llm(*, model: str | None = None, **kwargs: Any) -> LLMBackend:
-    """Instantiate the LLM backend selected by environment.
-
-    *model* (when provided) overrides whatever the backend's own default
-    is — e.g. for OllamaBackend that's ``llama3.1`` unless
-    ``DECNET_EMAILGEN_MODEL`` says otherwise.  Lets the worker honour
-    ``decnet emailgen run --model gpt-oss`` without each backend having
-    to know about CLI flags.
-    """
-    backend_key = os.environ.get("DECNET_EMAILGEN_LLM", "ollama").lower()
-
-    if backend_key == "ollama":
-        from decnet.orchestrator.emailgen.llm.impl.ollama import OllamaBackend
-        return OllamaBackend(model=model, **kwargs)
-    if backend_key == "fake":
-        from decnet.orchestrator.emailgen.llm.impl.fake import FakeBackend
-        return FakeBackend(model=model or "fake-model", **kwargs)
-    raise ValueError(
-        f"Unsupported DECNET_EMAILGEN_LLM={backend_key!r}; "
-        "expected one of: ollama, fake"
-    )
--- a/decnet/orchestrator/emailgen/llm/impl/init.py
+++ b/decnet/orchestrator/emailgen/llm/impl/init.py
@@ -1,6 +0,0 @@
-"""Concrete LLM-backend implementations.
-
-Importers go through :func:`decnet.orchestrator.emailgen.llm.get_llm`,
-not these modules directly — same convention as
-:mod:`decnet.web.db.sqlite` and :mod:`decnet.bus.unix_client`.
-"""
--- a/decnet/orchestrator/emailgen/llm/impl/fake.py
+++ b/decnet/orchestrator/emailgen/llm/impl/fake.py
@@ -1,50 +0,0 @@
-"""In-process fake backend for tests.
-
-Returns a canned ``Subject:\\n\\nbody`` string so the driver path can be
-exercised without an Ollama install.  Configurable via ``DECNET_EMAILGEN_FAKE_OUTPUT``
-(env) or the ``output`` constructor arg — the env-var path lets
-integration tests run the worker end-to-end with deterministic output.
-"""
-from __future__ import annotations
-
-import os
-import time
-from typing import Optional
-
-from decnet.orchestrator.emailgen.llm.base import LLMBackend, LLMResult
-
-
-_DEFAULT_OUTPUT = (
-    "Subject: Quick update\n\n"
-    "Hi,\n\nFollowing up on the topic.\n\nBest regards,\nFake Persona\n"
-)
-
-
-class FakeBackend(LLMBackend):
-    def __init__(
-        self,
-        *,
-        model: str = "fake-model",
-        timeout: float = 1.0,
-        output: Optional[str] = None,
-        success: bool = True,
-    ) -> None:
-        self.model = model
-        self.timeout = timeout
-        self._output = (
-            output
-            if output is not None
-            else os.environ.get("DECNET_EMAILGEN_FAKE_OUTPUT", _DEFAULT_OUTPUT)
-        )
-        self._success = success
-
-    async def generate(self, prompt: str) -> LLMResult:    # noqa: ARG002
-        t0 = time.monotonic()
-        latency_ms = int((time.monotonic() - t0) * 1000)
-        return LLMResult(
-            success=self._success,
-            text=self._output if self._success else "",
-            model=self.model,
-            latency_ms=latency_ms,
-            extra={"rc": 0 if self._success else 1},
-        )
--- a/decnet/orchestrator/emailgen/llm/impl/ollama.py
+++ b/decnet/orchestrator/emailgen/llm/impl/ollama.py
@@ -1,107 +0,0 @@
-"""Ollama subprocess backend.
-
-Shells out to ``ollama run <model>`` with the prompt fed via stdin.
-Mirrors what the original prototype at ``DECNET-EMAILs/main.py`` did,
-but lifted out of the driver so the rest of emailgen never imports a
-specific transport.
-
-Why subprocess and not the Ollama HTTP API:
-* No new dependency (``ollama`` Python lib is optional).
-* Works on hosts where Ollama is bound to a unix socket, an unusual TCP
-  port, or behind a remote-mount layer — `ollama run` resolves all that.
-* Same path the operator uses by hand (``ollama run llama3.1``); easier
-  to debug discrepancies between worker output and a console session.
-
-Cost: per-call process spawn (~50ms on a warm box).  Acceptable for
-emailgen's tick rate (one email every 5 minutes by default).  When that
-cost matters, swap to an HTTP-API backend; the seam is in
-:mod:`decnet.orchestrator.emailgen.llm.factory`.
-"""
-from __future__ import annotations
-
-import asyncio
-import os
-import time
-from typing import Optional
-
-from decnet.logging import get_logger
-from decnet.orchestrator.emailgen.llm.base import (
-    LLMBackend,
-    LLMResult,
-    LLMTimeout,
-)
-
-log = get_logger("orchestrator.emailgen.llm")
-
-_OLLAMA = "ollama"
-_DEFAULT_MODEL = os.environ.get("DECNET_EMAILGEN_MODEL", "llama3.1")
-_DEFAULT_TIMEOUT = float(os.environ.get("DECNET_EMAILGEN_TIMEOUT", "60"))
-
-
-class OllamaBackend(LLMBackend):
-    """Concrete :class:`LLMBackend` that shells out to ``ollama run``."""
-
-    def __init__(
-        self,
-        *,
-        model: Optional[str] = None,
-        timeout: Optional[float] = None,
-    ) -> None:
-        self.model = model or _DEFAULT_MODEL
-        self.timeout = timeout if timeout is not None else _DEFAULT_TIMEOUT
-
-    async def generate(self, prompt: str) -> LLMResult:
-        t0 = time.monotonic()
-        try:
-            proc = await asyncio.create_subprocess_exec(
-                _OLLAMA, "run", self.model,
-                stdin=asyncio.subprocess.PIPE,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE,
-            )
-        except FileNotFoundError as exc:
-            latency_ms = int((time.monotonic() - t0) * 1000)
-            return LLMResult(
-                success=False,
-                text="",
-                model=self.model,
-                latency_ms=latency_ms,
-                extra={"rc": 127, "stderr": f"argv[0] not found: {exc}"},
-            )
-        try:
-            stdout, stderr = await asyncio.wait_for(
-                proc.communicate(prompt.encode("utf-8")),
-                timeout=self.timeout,
-            )
-        except asyncio.TimeoutError as exc:
-            try:
-                proc.kill()
-            except ProcessLookupError:
-                pass
-            raise LLMTimeout(
-                f"ollama run {self.model} exceeded {self.timeout}s"
-            ) from exc
-
-        latency_ms = int((time.monotonic() - t0) * 1000)
-        rc = proc.returncode if proc.returncode is not None else -1
-        text = stdout.decode("utf-8", "replace")
-        stderr_s = stderr.decode("utf-8", "replace")
-        if rc != 0 or not text.strip():
-            log.warning(
-                "ollama backend non-zero / empty rc=%d model=%s stderr=%r",
-                rc, self.model, stderr_s[:200],
-            )
-            return LLMResult(
-                success=False,
-                text=text,
-                model=self.model,
-                latency_ms=latency_ms,
-                extra={"rc": rc, "stderr": stderr_s.strip()[:256]},
-            )
-        return LLMResult(
-            success=True,
-            text=text,
-            model=self.model,
-            latency_ms=latency_ms,
-            extra={"rc": rc},
-        )
--- a/decnet/orchestrator/emailgen/personas.py
+++ b/decnet/orchestrator/emailgen/personas.py
@@ -1,119 +0,0 @@
-"""Persona schema for the emailgen worker.
-
-Stored as a JSON list on :attr:`Topology.email_personas`.  Each persona
-describes one fictional employee whose mailbox lives on the topology's
-IMAP/POP3 decky.  The schema deliberately stays narrow: the LLM gets
-*enough* differentiation to write distinct voices, no more.
-
-Invalid entries are dropped with a warning (returned alongside the
-parsed list) rather than raising — a single typo in one persona must
-not stall the entire emailgen tick.
-"""
-from __future__ import annotations
-
-import json
-from typing import Literal, Optional
-
-from pydantic import BaseModel, Field, ValidationError, field_validator
-
-from decnet.logging import get_logger
-
-logger = get_logger("orchestrator.emailgen")
-
-Tone = Literal["formal", "direct", "casual", "technical"]
-ReplyLatency = Literal["fast", "normal", "slow"]
-
-
-class EmailPersona(BaseModel):
-    """One fake mailbox owner.
-
-    ``language`` is ISO 639-1 (``en``, ``es``, ``pt``…); when unset on the
-    persona it falls back to the topology's ``language_default``.
-    ``uses_llms_heavily`` lifts the prompt-layer em-dash suppression for
-    that persona — em-dashes are an LLM tell, but a persona explicitly
-    pegged as a heavy LLM user should *naturally* produce them.
-    """
-    name: str = Field(min_length=1, max_length=128)
-    email: str = Field(min_length=3, max_length=255)
-    role: str = Field(min_length=1, max_length=128)
-    tone: Tone = "formal"
-    mannerisms: list[str] = Field(default_factory=list, max_length=12)
-    language: Optional[str] = Field(default=None, max_length=8)
-    signature: Optional[str] = Field(default=None, max_length=512)
-    active_hours: str = Field(default="09:00-18:00", max_length=32)
-    reply_latency: ReplyLatency = "normal"
-    uses_llms_heavily: bool = False
-
-    @field_validator("email")
-    @classmethod
-    def _email_shape(cls, v: str) -> str:
-        # Cheap structural check — full RFC 5322 isn't worth the
-        # dependency.  We only need ``user@domain`` with non-empty parts
-        # for the prompt builder + Message-ID generator.
-        if "@" not in v:
-            raise ValueError("email must contain '@'")
-        local, _, domain = v.rpartition("@")
-        if not local or not domain or "." not in domain:
-            raise ValueError("email must look like user@domain.tld")
-        return v
-
-
-def parse_personas(
-    raw: str | list | None,
-    *,
-    language_default: str = "en",
-) -> list[EmailPersona]:
-    """Parse the JSON-or-list ``email_personas`` value into models.
-
-    Resolves ``language`` against *language_default* so downstream
-    consumers (prompt builder, scheduler) never need to know about
-    fallback semantics.
-    """
-    if not raw:
-        return []
-    if isinstance(raw, str):
-        try:
-            raw = json.loads(raw)
-        except json.JSONDecodeError as exc:
-            logger.warning("emailgen personas: invalid JSON, skipping: %s", exc)
-            return []
-    if not isinstance(raw, list):
-        logger.warning(
-            "emailgen personas: expected list, got %s", type(raw).__name__
-        )
-        return []
-    out: list[EmailPersona] = []
-    for i, entry in enumerate(raw):
-        try:
-            persona = EmailPersona.model_validate(entry)
-        except ValidationError as exc:
-            logger.warning(
-                "emailgen personas: dropping invalid entry index=%d: %s",
-                i, exc.errors(include_url=False),
-            )
-            continue
-        if persona.language is None:
-            persona = persona.model_copy(update={"language": language_default})
-        out.append(persona)
-    return out
-
-
-def in_active_hours(persona: EmailPersona, now_hour: int) -> bool:
-    """Return True if *now_hour* (0–23) falls in the persona's window.
-
-    Format: ``"HH:MM-HH:MM"``. Wrap-around windows (``"22:00-06:00"``)
-    are supported. Invalid windows treat the persona as always-on so a
-    config typo never silences the whole fleet.
-    """
-    try:
-        start_s, end_s = persona.active_hours.split("-")
-        start_h = int(start_s.split(":")[0])
-        end_h = int(end_s.split(":")[0])
-    except (ValueError, IndexError):
-        return True
-    if start_h == end_h:
-        return True
-    if start_h < end_h:
-        return start_h <= now_hour < end_h
-    # Wrap-around (e.g. 22:00-06:00).
-    return now_hour >= start_h or now_hour < end_h
--- a/decnet/orchestrator/emailgen/prompt.py
+++ b/decnet/orchestrator/emailgen/prompt.py
@@ -1,151 +0,0 @@
-"""Ollama prompt builder for emailgen.
-
-The LLM gets a tightly-scoped instruction and a small handful of
-deterministic constraints.  Persona mannerisms are *pre-selected* in
-Python (1–2 of the persona's full list) and injected as hard rules —
-small models otherwise treat the mannerism list as flavour text and
-ignore it, and the corpus collapses into one voice.
-
-**Em-dash suppression** is on by default; suppression is lifted only
-for personas that opt in via ``uses_llms_heavily``.  Em-dashes are a
-strong stylometric tell for LLM-authored prose, and a honeypot mailbox
-where every author uses them is a tell.
-"""
-from __future__ import annotations
-
-import secrets
-from dataclasses import dataclass
-from typing import Optional
-
-from decnet.orchestrator.emailgen.personas import EmailPersona
-
-
-@dataclass(frozen=True)
-class PromptInputs:
-    sender: EmailPersona
-    recipient: EmailPersona
-    context_hint: str
-    parent_subject: Optional[str] = None      # set when replying
-    parent_excerpt: Optional[str] = None      # short snippet of last msg
-
-
-_LANGUAGE_NAMES = {
-    "en": "English",
-    "es": "Spanish",
-    "pt": "Portuguese",
-    "fr": "French",
-    "de": "German",
-    "it": "Italian",
-    "nl": "Dutch",
-    "ja": "Japanese",
-    "zh": "Chinese",
-}
-
-
-def _lang_label(code: str) -> str:
-    return _LANGUAGE_NAMES.get(code.lower(), code)
-
-
-def select_mannerisms(
-    persona: EmailPersona,
-    *,
-    rng: Optional[secrets.SystemRandom] = None,
-    n: int = 2,
-) -> list[str]:
-    """Pick *n* mannerisms deterministically given *rng*.
-
-    Returns up to *n*; falls back to the full list when the persona
-    declares fewer.  Determinism (under a seeded RNG) is what makes
-    tests practical — otherwise mannerism injection is unverifiable.
-    """
-    rnd = rng or secrets.SystemRandom()
-    pool = list(persona.mannerisms)
-    if not pool:
-        return []
-    if len(pool) <= n:
-        return pool
-    rnd.shuffle(pool)
-    return pool[:n]
-
-
-def build(
-    inputs: PromptInputs,
-    *,
-    rng: Optional[secrets.SystemRandom] = None,
-) -> tuple[str, list[str]]:
-    """Return ``(prompt, mannerisms_used)``.
-
-    ``mannerisms_used`` flows back into the persisted ``payload`` JSON
-    so an analyst can see *why* a given email reads the way it does.
-    """
-    sender = inputs.sender
-    recipient = inputs.recipient
-    language = _lang_label(sender.language or "en")
-    mannerisms = select_mannerisms(sender, rng=rng)
-    mannerism_block = (
-        "\n".join(f"- {m}" for m in mannerisms)
-        if mannerisms
-        else "- (no specific mannerisms; write in the persona's tone)"
-    )
-
-    if sender.uses_llms_heavily:
-        em_dash_rule = (
-            "Em-dashes are fine — this persona uses them naturally. "
-            "Write in your usual style."
-        )
-    else:
-        em_dash_rule = (
-            "Do NOT use em-dashes (—). Use commas, periods, or "
-            "parentheses instead. Em-dashes are a tell."
-        )
-
-    sig_block = (
-        f"Use this exact signature block:\n{sender.signature}"
-        if sender.signature
-        else "End with a short, plausible signature for the persona's role."
-    )
-
-    if inputs.parent_subject:
-        thread_block = (
-            f"This is a REPLY in an ongoing thread.\n"
-            f"- Parent subject: {inputs.parent_subject}\n"
-            f"- Parent excerpt: {inputs.parent_excerpt or '(no excerpt)'}\n"
-            f"- Begin the body assuming the recipient already read the parent.\n"
-        )
-        subject_rule = (
-            "Subject must be the parent subject prefixed with 'Re: ' "
-            "(no double 'Re: Re:')."
-        )
-    else:
-        thread_block = "This is a NEW thread (no prior context)."
-        subject_rule = (
-            "Generate a short, specific subject line (≤ 80 chars) "
-            "appropriate to the context."
-        )
-
-    prompt = f"""You are writing one corporate email, RFC 2822 plain-text body only.
-
-Persona — sender:
- Name: {sender.name}
- Role: {sender.role}
- Tone: {sender.tone}
- Mannerisms (must show through):
-{mannerism_block}
-
-Persona — recipient:
- Name: {recipient.name}
- Role: {recipient.role}
-
-Context hint: {inputs.context_hint}
-
-Thread context:
-{thread_block}
-
-Hard rules:
-1. Write the email body in {language}. Do not translate or code-switch.
-2. {em_dash_rule}
-3. {subject_rule}
-4. {sig_block}
-5. Output ONLY the email — first line is "Subject: <subject>", then a blank line, then the body. No commentary, no markdown fences, no preamble.
-"""
-    return prompt.strip(), mannerisms
--- a/decnet/orchestrator/emailgen/scheduler.py
+++ b/decnet/orchestrator/emailgen/scheduler.py
@@ -25,18 +25,18 @@ from datetime import datetime
 from typing import Any, Optional

 from decnet.logging import get_logger
-from decnet.orchestrator.emailgen import global_pool
-from decnet.orchestrator.emailgen.personas import (
-    EmailPersona,
-    in_active_hours,
-    parse_personas,
-)
 from decnet.orchestrator.emailgen.threads import (
    ThreadChain,
    new_thread_id,
    references_for_reply,
    reply_subject,
 )
+from decnet.realism import personas_pool as global_pool
+from decnet.realism.personas import (
+    EmailPersona,
+    in_active_hours,
+    parse_personas,
+)

 logger = get_logger("orchestrator.emailgen")