merge: testing → main (reconcile 2-week divergence)

2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions
--- a/decnet/realism/init.py
+++ b/decnet/realism/init.py
@@ -0,0 +1,27 @@
+"""Realism library — synthetic content + scheduling primitives.
+
+A shared, importable library that produces *plausible* artifacts (file
+names, file bodies, email content) and the diurnal/persona machinery
+that decides *when* and *for whom* to produce them.
+
+Workers (orchestrator, canary cultivator, future-emailgen-equivalents)
+import from here.  This package owns:
+
+* :mod:`decnet.realism.taxonomy` — :class:`ContentClass` enum and the
+  :class:`Plan` dataclass that planners emit.
+* :mod:`decnet.realism.diurnal` — work-hours gating and a backdated
+  ``mtime`` sampler so planted files don't all stamp at wall-clock-now.
+* :mod:`decnet.realism.planner` — picks ``(decky, persona, class,
+  action, mtime)`` tuples for the orchestrator's tick loop.
+* :mod:`decnet.realism.personas` — persona schema (the
+  :class:`EmailPersona` record describing each fictional employee).
+* :mod:`decnet.realism.prompts` — prompt builders, one per content
+  class, sharing an em-dash-suppression style helper.
+* :mod:`decnet.realism.llm` — :class:`LLMBackend` ABC + factory + impl
+  subpackage; pluggable text-generation backend.
+
+The library has **no worker, no systemd unit, no CLI of its own** —
+it's plain Python that consumers import.  The CLI surface that does
+exist (``decnet realism import-personas``) is registered by
+:mod:`decnet.cli.realism` after stage 5 of the migration.
+"""
--- a/decnet/realism/bodies.py
+++ b/decnet/realism/bodies.py
@@ -0,0 +1,421 @@
+"""Per-content-class body generators (deterministic templates).
+
+Stage 3 of the realism migration ships deterministic per-class
+templates — varied enough that two notes on the same decky aren't
+identical, formulaic enough that system-class files (cron logs,
+journal entries) look like cron actually wrote them.
+
+Stage 6 wires LLM enrichment for user-classes; the templates here
+remain the fallback path so the orchestrator tick never blocks on
+Ollama.
+
+Determinism: every namer/body takes a :class:`SystemRandom` (from
+:mod:`secrets`).  Tests pin the RNG seed for reproducibility; the
+orchestrator passes a fresh RNG per tick so production picks are
+unpredictable.
+
+The factory mirrors :mod:`decnet.realism.naming`: caller passes a
+:class:`~decnet.realism.taxonomy.ContentClass`; we return the body
+generator registered for it.  Email + canary classes raise —
+those bodies come from the email driver and canary cultivator
+respectively, not from realism.bodies.
+"""
+from __future__ import annotations
+
+import asyncio
+import secrets
+from datetime import datetime, timezone
+from typing import Callable, Optional
+
+from decnet.logging import get_logger
+from decnet.realism.taxonomy import ContentClass
+
+log = get_logger("realism.bodies")
+
+
+# ── User-class body generators ─────────────────────────────────────────────
+
+
+_NOTE_TEMPLATES: tuple[str, ...] = (
+    "follow up with the team on this",
+    "remember to ping the on-call",
+    "ask about the staging migration timeline",
+    "double-check the runbook before next shift",
+    "todo: rotate keys; check on backup task",
+    "meeting notes from yesterday — copy onto wiki when free",
+    "this is broken in prod; talk to ops monday",
+    "draft response to the auditor — keep it short",
+)
+
+
+def _body_note(persona: str, rng: secrets.SystemRandom) -> str:
+    n = rng.randint(2, 5)
+    lines = rng.sample(_NOTE_TEMPLATES, k=min(n, len(_NOTE_TEMPLATES)))
+    return "\n".join(lines) + "\n"
+
+
+_TODO_VERBS: tuple[str, ...] = (
+    "rotate keys", "review pr",
+    "clean up logs", "update docs",
+    "follow up on ticket",
+    "test backup restore",
+    "deploy to staging",
+    "ack auditor email",
+    "patch CVE backlog",
+)
+
+
+def _body_todo(persona: str, rng: secrets.SystemRandom) -> str:
+    n = rng.randint(3, 7)
+    items = rng.sample(_TODO_VERBS, k=min(n, len(_TODO_VERBS)))
+    # Roughly a third pre-checked — looks like a list that's been
+    # touched at least once.
+    out = []
+    for item in items:
+        marker = "[x]" if rng.random() < 0.33 else "[ ]"
+        out.append(f"- {marker} {item}")
+    return "\n".join(out) + "\n"
+
+
+_DRAFT_PARAGRAPHS: tuple[str, ...] = (
+    "Hi team,\n\nQuick update on the project. We're tracking ahead of schedule "
+    "on the migration but the staging soak revealed a regression in the "
+    "auth path. I'll have a fix in by end of week.\n\nThanks,\n",
+    "Hi,\n\nFollowing up on yesterday's meeting. Action items below:\n\n"
+    "- Engineering owns the deployment plan\n"
+    "- Ops will draft the runbook update\n"
+    "- We sync again Friday\n\n",
+    "All,\n\nProposal attached. Key points:\n\n"
+    "1. We are not changing the data model in this release\n"
+    "2. The new endpoint is opt-in via feature flag\n"
+    "3. Rollback path is one config flip\n\n"
+    "Feedback by EOD?\n\n",
+)
+
+
+def _body_draft(persona: str, rng: secrets.SystemRandom) -> str:
+    return rng.choice(_DRAFT_PARAGRAPHS)
+
+
+_SCRIPT_TEMPLATES: tuple[str, ...] = (
+    "#!/usr/bin/env bash\nset -euo pipefail\n\n"
+    "BACKUP_DIR=/var/backups\n"
+    "STAMP=$(date +%Y%m%d-%H%M)\n"
+    "echo \"backup start $STAMP\"\n"
+    "tar czf \"$BACKUP_DIR/db-$STAMP.tar.gz\" /var/lib/mysql\n"
+    "echo \"backup done\"\n",
+    "#!/usr/bin/env bash\nset -e\n\n"
+    "# clean up old logs\n"
+    "find /var/log -name '*.log.*.gz' -mtime +30 -delete\n",
+    "#!/usr/bin/env python3\n\"\"\"Quick fix for the reporting job.\"\"\"\n"
+    "import sys\n\n"
+    "def main():\n    print('todo: real fix here')\n\n"
+    "if __name__ == '__main__':\n    sys.exit(main())\n",
+)
+
+
+def _body_script(persona: str, rng: secrets.SystemRandom) -> str:
+    return rng.choice(_SCRIPT_TEMPLATES)
+
+
+# ── System-class body generators ───────────────────────────────────────────
+
+
+_CRON_COMMANDS: tuple[str, ...] = (
+    "(root) CMD (run-parts /etc/cron.daily)",
+    "(root) CMD (run-parts /etc/cron.hourly)",
+    "(www-data) CMD (cd /var/www && /usr/bin/php artisan schedule:run)",
+    "(backup) CMD (/usr/local/bin/backup.sh)",
+    "(root) CMD (test -x /usr/sbin/anacron || ( cd / && run-parts --report /etc/cron.daily ))",
+)
+
+
+def _body_log_cron(persona: str, rng: secrets.SystemRandom) -> str:
+    n = rng.randint(8, 24)
+    base = datetime.now(timezone.utc)
+    lines = []
+    for i in range(n):
+        hour = (base.hour - i) % 24
+        minute = rng.randint(0, 59)
+        pid = rng.randint(1000, 99999)
+        cmd = rng.choice(_CRON_COMMANDS)
+        # ISO-ish "Apr 27 09:13:44 host CRON[1234]: ..." cron syslog shape.
+        date_s = base.strftime("%b %d")
+        lines.append(
+            f"{date_s} {hour:02d}:{minute:02d}:{rng.randint(0,59):02d} "
+            f"hostname CRON[{pid}]: {cmd}"
+        )
+    return "\n".join(lines) + "\n"
+
+
+_DAEMON_LINES: tuple[str, ...] = (
+    "systemd[1]: Started Daily apt download activities.",
+    "systemd[1]: apt-daily.service: Succeeded.",
+    "systemd[1]: Reached target Multi-User System.",
+    "kernel: [UFW BLOCK] IN=eth0 OUT= MAC=…",
+    "sshd[2103]: pam_unix(sshd:session): session opened for user admin by (uid=0)",
+    "sshd[2103]: Received disconnect from 10.0.0.4 port 47282:11: disconnected by user",
+    "CRON[1894]: pam_unix(cron:session): session closed for user root",
+)
+
+
+def _body_log_daemon(persona: str, rng: secrets.SystemRandom) -> str:
+    n = rng.randint(10, 30)
+    lines = []
+    base = datetime.now(timezone.utc)
+    for _ in range(n):
+        lines.append(
+            f"{base.strftime('%b %d %H:%M:%S')} hostname "
+            f"{rng.choice(_DAEMON_LINES)}"
+        )
+    return "\n".join(lines) + "\n"
+
+
+def _body_cache_tmp(persona: str, rng: secrets.SystemRandom) -> str:
+    # ~64-256 bytes of opaque session-ish payload — most /tmp/.cache-*
+    # files in the wild are short binary or k=v dumps.  We emit ASCII
+    # so docker exec write paths don't need binary-safety acrobatics.
+    nbytes = rng.randint(64, 256)
+    chars = "abcdefghijklmnopqrstuvwxyz0123456789"
+    return "session=" + "".join(rng.choice(chars) for _ in range(nbytes)) + "\n"
+
+
+def _body_email(persona: str, rng: secrets.SystemRandom) -> str:
+    raise NotImplementedError(
+        "email bodies come from the email driver, not realism.bodies"
+    )
+
+
+def _body_canary(persona: str, rng: secrets.SystemRandom) -> str:
+    raise NotImplementedError(
+        "canary bodies come from the canary cultivator (stage 7), "
+        "not realism.bodies"
+    )
+
+
+# ── Dispatch ───────────────────────────────────────────────────────────────
+
+
+_BODIES: dict[ContentClass, Callable[[str, secrets.SystemRandom], str]] = {
+    ContentClass.NOTE: _body_note,
+    ContentClass.TODO: _body_todo,
+    ContentClass.DRAFT: _body_draft,
+    ContentClass.SCRIPT: _body_script,
+    ContentClass.LOG_CRON: _body_log_cron,
+    ContentClass.LOG_DAEMON: _body_log_daemon,
+    ContentClass.CACHE_TMP: _body_cache_tmp,
+    ContentClass.EMAIL: _body_email,
+    ContentClass.CANARY_AWS_CREDS: _body_canary,
+    ContentClass.CANARY_ENV_FILE: _body_canary,
+    ContentClass.CANARY_GIT_CONFIG: _body_canary,
+    ContentClass.CANARY_SSH_KEY: _body_canary,
+    ContentClass.CANARY_HONEYDOC: _body_canary,
+    ContentClass.CANARY_HONEYDOC_DOCX: _body_canary,
+    ContentClass.CANARY_HONEYDOC_PDF: _body_canary,
+    ContentClass.CANARY_MYSQL_DUMP: _body_canary,
+}
+
+
+def make_body(
+    content_class: ContentClass,
+    persona: str,
+    *,
+    rand: Optional[secrets.SystemRandom] = None,
+) -> str:
+    """Return deterministic body bytes (utf-8 string) for *content_class*.
+
+    Stage 3 ships templates only.  :func:`make_body_with_llm` is the
+    LLM-aware variant added in stage 6 — kept on a separate name so
+    the deterministic path stays trivially callable from tests and
+    from the LLM fallback itself.
+    """
+    rng = rand or secrets.SystemRandom()
+    gen = _BODIES.get(content_class)
+    if gen is None:
+        raise KeyError(
+            f"no body generator registered for content_class={content_class!r}"
+        )
+    return gen(persona, rng)
+
+
+async def make_body_with_llm(
+    content_class: ContentClass,
+    persona,  # EmailPersona — typed loosely to avoid an import cycle
+    *,
+    llm=None,  # LLMBackend | None
+    breaker=None,  # LLMCircuitBreaker | None
+    timeout: float = 60.0,
+    rand: Optional[secrets.SystemRandom] = None,
+) -> str:
+    """LLM-enriched body for user-classes; deterministic fallback otherwise.
+
+    Falls back to :func:`make_body` whenever:
+
+    * ``llm`` is None,
+    * ``breaker.allow_call()`` returns False (sustained failure),
+    * the LLM call times out or returns empty,
+    * the content class isn't a user-class (system-class content
+      should look formulaic, so we never invoke LLM there).
+
+    Em-dash stripping runs on the LLM output as a belt-and-braces
+    guard (see :mod:`decnet.realism.prompts._style`).  The function
+    is async because LLM calls are; the deterministic path returns
+    immediately so the orchestrator's tick doesn't pay async overhead
+    when LLM is disabled.
+    """
+    rng = rand or secrets.SystemRandom()
+
+    # System / canary / email classes never touch the LLM.
+    if not content_class.is_user_class():
+        return make_body(content_class, persona.name, rand=rng)
+
+    if llm is None or (breaker is not None and not breaker.allow_call()):
+        return make_body(content_class, persona.name, rand=rng)
+
+    # Lazy imports keep the prompt + style modules out of the
+    # deterministic path's import graph.
+    from decnet.realism.llm.base import LLMTimeout
+    from decnet.realism.prompts import filebody as _filebody
+    from decnet.realism.prompts._style import strip_em_dashes
+
+    prompt = _filebody.build(content_class, persona)
+    try:
+        result = await asyncio.wait_for(llm.generate(prompt), timeout=timeout)
+    except (LLMTimeout, asyncio.TimeoutError):
+        log.debug("realism.bodies LLM timeout class=%s persona=%s",
+                  content_class.value, persona.name)
+        if breaker is not None:
+            breaker.record_failure()
+        return make_body(content_class, persona.name, rand=rng)
+    except Exception as exc:  # noqa: BLE001
+        log.warning("realism.bodies LLM error class=%s persona=%s: %s",
+                    content_class.value, persona.name, exc)
+        if breaker is not None:
+            breaker.record_failure()
+        return make_body(content_class, persona.name, rand=rng)
+
+    if not result.success or not result.text.strip():
+        if breaker is not None:
+            breaker.record_failure()
+        return make_body(content_class, persona.name, rand=rng)
+
+    if breaker is not None:
+        breaker.record_success()
+    return strip_em_dashes(result.text.rstrip() + "\n", persona)
+
+
+# ── Edit-in-place mutators ─────────────────────────────────────────────────
+# Stage 3b: deterministic per-class mutations.  The contract: take the
+# previous body bytes, return a plausible *next* iteration (append a
+# line, flip a checkbox, fix a typo).  Append-only for logs; small
+# in-place edits for user content.  LLM enrichment in stage 6 wires
+# next_iteration to ask "what would <persona> write next" with the
+# previous body in the prompt; the deterministic path stays as the
+# fallback.
+
+
+def _edit_todo(
+    prev: str, persona: str, rng: secrets.SystemRandom,
+) -> str:
+    """Flip an unchecked box, append a new item, or both.
+
+    Real TODO files evolve: items get checked off as work happens, new
+    items get added, occasionally a sub-bullet appears under an
+    existing one.  We pick one of those mutations per call.
+    """
+    lines = prev.splitlines()
+    unchecked_indices = [
+        i for i, ln in enumerate(lines) if ln.startswith("- [ ]")
+    ]
+    op = rng.choice(("flip", "append", "both") if unchecked_indices else ("append",))
+    if op in ("flip", "both") and unchecked_indices:
+        idx = rng.choice(unchecked_indices)
+        lines[idx] = lines[idx].replace("- [ ]", "- [x]", 1)
+    if op in ("append", "both"):
+        new_item = rng.choice(_TODO_VERBS)
+        marker = "[x]" if rng.random() < 0.15 else "[ ]"
+        lines.append(f"- {marker} {new_item}")
+    return "\n".join(lines) + ("" if prev.endswith("\n") else "\n")
+
+
+def _edit_note(
+    prev: str, persona: str, rng: secrets.SystemRandom,
+) -> str:
+    """Append one new note line or insert a follow-up under an existing one."""
+    new_line = rng.choice(_NOTE_TEMPLATES)
+    if prev.endswith("\n"):
+        return prev + new_line + "\n"
+    return prev + "\n" + new_line + "\n"
+
+
+def _edit_draft(
+    prev: str, persona: str, rng: secrets.SystemRandom,
+) -> str:
+    """Append a new short paragraph to the existing draft."""
+    addition = (
+        "\nFollow-up: I'll send the deck once finance signs off on the numbers.\n",
+        "\nP.S.: Looping in ops on the rollout sequence — they have context I don't.\n",
+        "\nLet me know if any of this needs another pass.\n",
+    )
+    return prev.rstrip() + "\n" + rng.choice(addition)
+
+
+def _edit_script(
+    prev: str, persona: str, rng: secrets.SystemRandom,
+) -> str:
+    """Append a comment line — scripts evolve via comments and small fixes."""
+    comments = (
+        "# TODO: handle the empty-input case\n",
+        "# 2026-04-27: hardened error path after the prod incident\n",
+        "# noqa: shellcheck disagrees but this is what the runbook says\n",
+    )
+    return prev.rstrip() + "\n" + rng.choice(comments)
+
+
+def _edit_log_cron(
+    prev: str, persona: str, rng: secrets.SystemRandom,
+) -> str:
+    """Append one new cron syslog line — logs only ever grow."""
+    extra = _body_log_cron(persona, rng)
+    return prev.rstrip() + "\n" + extra.splitlines()[-1] + "\n"
+
+
+def _edit_log_daemon(
+    prev: str, persona: str, rng: secrets.SystemRandom,
+) -> str:
+    extra = _body_log_daemon(persona, rng)
+    return prev.rstrip() + "\n" + extra.splitlines()[-1] + "\n"
+
+
+_EDITORS: dict[ContentClass, Callable[[str, str, secrets.SystemRandom], str]] = {
+    ContentClass.NOTE: _edit_note,
+    ContentClass.TODO: _edit_todo,
+    ContentClass.DRAFT: _edit_draft,
+    ContentClass.SCRIPT: _edit_script,
+    ContentClass.LOG_CRON: _edit_log_cron,
+    ContentClass.LOG_DAEMON: _edit_log_daemon,
+}
+
+
+def next_iteration(
+    content_class: ContentClass,
+    persona: str,
+    previous_body: str,
+    *,
+    rand: Optional[secrets.SystemRandom] = None,
+) -> str:
+    """Return the next-iteration body for an edit-in-place mutation.
+
+    Raises :class:`KeyError` for content classes that don't support
+    editing (canary blobs, cache-tmp scratch files, email).  The
+    planner filters those out before producing an :class:`EditAction`,
+    so reaching this branch with an unsupported class is a bug worth
+    surfacing loudly.
+    """
+    rng = rand or secrets.SystemRandom()
+    editor = _EDITORS.get(content_class)
+    if editor is None:
+        raise KeyError(
+            f"content_class={content_class!r} does not support edits"
+        )
+    return editor(previous_body, persona, rng)
--- a/decnet/realism/diurnal.py
+++ b/decnet/realism/diurnal.py
@@ -0,0 +1,152 @@
+"""Work-hours gating and backdated mtime sampling.
+
+The current orchestrator stamps every planted file at wall-clock-now,
+which is one of the realism failures driving this migration: a `cron.log`
+that says it was last touched at 03:14:22 UTC on a workstation
+attributed to a 9-to-5 admin reads as fake on first glance.
+
+Two helpers:
+
+* :func:`in_work_hours` — gate planner ticks so a persona's files only
+  appear inside the persona's ``active_hours`` window.  Wrap-around
+  windows (``"22:00-06:00"``) are supported.
+* :func:`sample_mtime` — return a backdated datetime whose hour-of-day
+  falls inside the persona's window, biased toward "recent but not
+  now".  Drivers pass this to ``touch -d``.
+
+Clock and RNG are injectable so tests don't need to ``freeze_time`` or
+patch :mod:`secrets`.
+"""
+from __future__ import annotations
+
+import secrets
+from datetime import datetime, timedelta
+from typing import Protocol
+
+
+class _ClockLike(Protocol):
+    def __call__(self) -> datetime: ...
+
+
+class _RandLike(Protocol):
+    def random(self) -> float: ...
+    def randint(self, a: int, b: int) -> int: ...
+
+
+def _parse_window(window: str) -> tuple[int, int, int, int] | None:
+    """Parse ``"HH:MM-HH:MM"`` into ``(start_h, start_m, end_h, end_m)``.
+
+    Returns ``None`` for malformed input — callers treat that as
+    "always-on" so a single config typo never silences the whole fleet
+    (mirrors :func:`decnet.realism.personas.in_active_hours` semantics).
+    """
+    try:
+        start_s, end_s = window.split("-")
+        start_h, start_m = (int(p) for p in start_s.split(":"))
+        end_h, end_m = (int(p) for p in end_s.split(":"))
+    except (ValueError, IndexError):
+        return None
+    if not (0 <= start_h < 24 and 0 <= end_h < 24):
+        return None
+    if not (0 <= start_m < 60 and 0 <= end_m < 60):
+        return None
+    return start_h, start_m, end_h, end_m
+
+
+def in_work_hours(window: str, now: datetime) -> bool:
+    """Return ``True`` when *now* falls inside the persona window.
+
+    *window* is ``"HH:MM-HH:MM"``.  Wrap-around (``start > end``) means
+    "spans midnight."  Equal ``start`` and ``end`` means always-on.
+    Malformed windows return ``True`` — fail-open so a typo doesn't
+    silence the fleet.
+    """
+    parsed = _parse_window(window)
+    if parsed is None:
+        return True
+    start_h, start_m, end_h, end_m = parsed
+    if (start_h, start_m) == (end_h, end_m):
+        return True
+    cur = now.hour * 60 + now.minute
+    start = start_h * 60 + start_m
+    end = end_h * 60 + end_m
+    if start < end:
+        return start <= cur < end
+    # Wrap-around (e.g. 22:00-06:00).
+    return cur >= start or cur < end
+
+
+def sample_mtime(
+    window: str,
+    now: datetime,
+    *,
+    rand: _RandLike | None = None,
+    backdate_min_hours: float = 0.5,
+    backdate_max_days: float = 14.0,
+) -> datetime:
+    """Return a backdated ``datetime`` for ``touch -d`` after a write.
+
+    The sampled time is in the past relative to *now*, capped at
+    *backdate_max_days* days ago and at least *backdate_min_hours* ago.
+    Weighted toward recent — half-life roughly 2 days — so most planted
+    files look "edited recently" without all clustering at +30min.
+
+    The hour-of-day of the result is forced into *window* so an
+    `admin` persona's `TODO.md` doesn't carry an mtime of 03:14:22.
+    Wrap-around windows are honoured.
+
+    Falls back to a uniform 0.5h–14d backdate if *window* is malformed.
+    """
+    rng = rand or secrets.SystemRandom()
+    parsed = _parse_window(window)
+
+    # Exponential-ish backdate via -ln(u): heavier mass near "recent".
+    # Cap by clipping; cheap and good enough for realism.
+    u = max(rng.random(), 1e-6)  # avoid log(0)
+    import math
+    span_hours = max(backdate_min_hours, min(backdate_max_days * 24, -math.log(u) * 12.0))
+    candidate = now - timedelta(hours=span_hours)
+
+    if parsed is None:
+        return candidate
+
+    start_h, start_m, end_h, end_m = parsed
+    if (start_h, start_m) == (end_h, end_m):
+        return candidate
+
+    # If the candidate's hour-of-day is outside the window, snap it into
+    # the window on the same calendar date — preserves the "this many
+    # days ago" feel while making the clock-face credible.
+    cur = candidate.hour * 60 + candidate.minute
+    start = start_h * 60 + start_m
+    end = end_h * 60 + end_m
+    if start < end:
+        in_window = start <= cur < end
+        snap_minutes = rng.randint(start, max(start, end - 1))
+    else:
+        # Wrap-around: in-window if cur is in either segment.
+        in_window = cur >= start or cur < end
+        # Snap into the larger of the two segments by total length.
+        before_midnight = (24 * 60) - start
+        after_midnight = end
+        if before_midnight >= after_midnight:
+            snap_minutes = rng.randint(start, 24 * 60 - 1)
+        else:
+            snap_minutes = rng.randint(0, max(0, end - 1))
+
+    if in_window:
+        return candidate
+    snapped = candidate.replace(
+        hour=snap_minutes // 60,
+        minute=snap_minutes % 60,
+        second=rng.randint(0, 59),
+        microsecond=0,
+    )
+    # If the hour-snap pushed us too close to *now* (candidate was
+    # earlier today but the random in-window minute landed near or
+    # later than the current clock), shift back a full day so the
+    # result honours the min-backdate floor.
+    floor = now - timedelta(hours=backdate_min_hours)
+    while snapped > floor:
+        snapped -= timedelta(days=1)
+    return snapped
--- a/decnet/realism/llm/init.py
+++ b/decnet/realism/llm/init.py
@@ -0,0 +1,17 @@
+"""LLM backend for the realism library.
+
+Pluggable per the provider-subpackages convention (mirrors
+:mod:`decnet.web.db` and :mod:`decnet.bus`): consumers depend on
+:class:`LLMBackend` from :mod:`base`; concrete transports live under
+:mod:`impl` and are selected by :func:`get_llm`.
+
+This is the seam to pull on when swapping local Ollama for the
+Anthropic API, llama.cpp, vLLM, or any other inference server — change
+``DECNET_REALISM_LLM`` (or pass ``llm=`` directly), no caller rewrite.
+"""
+from __future__ import annotations
+
+from decnet.realism.llm.base import LLMBackend, LLMResult, LLMTimeout
+from decnet.realism.llm.factory import get_llm
+
+__all__ = ["LLMBackend", "LLMResult", "LLMTimeout", "get_llm"]
--- a/decnet/realism/llm/base.py
+++ b/decnet/realism/llm/base.py
@@ -0,0 +1,47 @@
+"""Backend protocol shared by every LLM transport.
+
+Deliberately narrow: realism consumers need one async ``generate``
+call that takes a prompt string and returns the model's output text
+plus enough metadata to populate per-event payloads (model name,
+latency, success bit).  Streaming, embeddings, multi-turn chat — all
+out of scope here; realism only ever does one-shot single-prompt
+generations.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Protocol
+
+
+class LLMTimeout(Exception):
+    """Raised when a generation exceeds the backend's wall-clock cap.
+
+    Backends MUST raise this rather than returning silently empty
+    output; the driver discriminates timeout from "model produced
+    nothing useful" so payloads carry the right ``stage`` value.
+    """
+
+
+@dataclass
+class LLMResult:
+    """Outcome of one ``generate`` call.
+
+    ``success`` is ``False`` when the backend ran cleanly but produced
+    no usable output (e.g. an empty stdout).  Hard failures (subprocess
+    crash, network error) raise; soft failures land here so the driver
+    can persist + log them as one event.
+    """
+    success: bool
+    text: str
+    model: str
+    latency_ms: int
+    extra: dict[str, Any] = field(default_factory=dict)
+
+
+class LLMBackend(Protocol):
+    """Minimal contract for a realism LLM provider."""
+
+    model: str
+    timeout: float
+
+    async def generate(self, prompt: str) -> LLMResult: ...
--- a/decnet/realism/llm/circuit.py
+++ b/decnet/realism/llm/circuit.py
@@ -0,0 +1,99 @@
+"""Process-local circuit breaker for LLM calls.
+
+Per-call timeouts (``asyncio.wait_for(llm.generate, timeout=...)``)
+protect a single tick from a single hung Ollama.  They do NOT protect
+the worker from a *sustained* problem: 100 consecutive 60-second
+timeouts chew up an hour of orchestrator time on dead requests before
+anything notices.
+
+This breaker watches a sliding window of recent outcomes and flips
+``open`` after ``failure_threshold`` consecutive failures.  Open
+breakers short-circuit ``allow_call`` to ``False`` so callers fall
+back to deterministic templates without the per-tick cost.  After
+``cooldown_seconds`` the breaker enters ``half_open`` and the next
+call is allowed; success closes the breaker, failure re-opens it
+with a fresh cooldown.
+
+Process-local on purpose — cross-process state would require shared
+memory and is overkill for a single orchestrator worker.
+"""
+from __future__ import annotations
+
+import threading
+import time
+from enum import Enum
+
+
+class _State(Enum):
+    CLOSED = "closed"
+    OPEN = "open"
+    HALF_OPEN = "half_open"
+
+
+class LLMCircuitBreaker:
+    """Threadsafe sliding-window circuit breaker.
+
+    Default ``failure_threshold=3`` consecutive failures → open;
+    ``cooldown_seconds=60`` of open before transitioning to
+    half-open.  These match the realism worker's tick cadence: 3
+    consecutive 60s timeouts = 3 minutes of dead air, which is the
+    point at which a deterministic fallback is overdue.
+    """
+
+    def __init__(
+        self,
+        *,
+        failure_threshold: int = 3,
+        cooldown_seconds: float = 60.0,
+        clock=time.monotonic,
+    ) -> None:
+        self._failure_threshold = failure_threshold
+        self._cooldown = cooldown_seconds
+        self._clock = clock
+        self._lock = threading.Lock()
+        self._state = _State.CLOSED
+        self._consecutive_failures = 0
+        self._opened_at: float = 0.0
+
+    @property
+    def state(self) -> str:
+        with self._lock:
+            return self._state.value
+
+    def allow_call(self) -> bool:
+        """Return True if the next call should run, False if it should
+        short-circuit to the fallback path.
+
+        Promotes ``open`` → ``half_open`` after the cooldown elapses
+        so the next caller acts as a probe.
+        """
+        with self._lock:
+            if self._state == _State.CLOSED:
+                return True
+            if self._state == _State.HALF_OPEN:
+                return True
+            # OPEN: check cooldown.
+            if self._clock() - self._opened_at >= self._cooldown:
+                self._state = _State.HALF_OPEN
+                return True
+            return False
+
+    def record_success(self) -> None:
+        with self._lock:
+            self._state = _State.CLOSED
+            self._consecutive_failures = 0
+            self._opened_at = 0.0
+
+    def record_failure(self) -> None:
+        with self._lock:
+            if self._state == _State.HALF_OPEN:
+                # The probe call failed — re-open with a fresh cooldown.
+                self._state = _State.OPEN
+                self._opened_at = self._clock()
+                # Don't reset the failure count; the probe failure
+                # implies the underlying issue is unresolved.
+                return
+            self._consecutive_failures += 1
+            if self._consecutive_failures >= self._failure_threshold:
+                self._state = _State.OPEN
+                self._opened_at = self._clock()
--- a/decnet/realism/llm/factory.py
+++ b/decnet/realism/llm/factory.py
@@ -0,0 +1,46 @@
+"""Backend dispatch.
+
+Reads ``DECNET_REALISM_LLM`` to pick a concrete :class:`LLMBackend`.
+Defaults to ``ollama`` because that's what the prototype proved out and
+what most dev boxes have on hand.
+
+Supported keys:
+
+* ``ollama`` — :class:`decnet.realism.llm.impl.ollama.OllamaBackend`
+* ``fake``   — :class:`decnet.realism.llm.impl.fake.FakeBackend`
+  (canned output, used by tests so they don't shell out)
+
+Anthropic / vLLM / llama.cpp slots in here as a third branch when the
+need shows up.  Per the provider-subpackages convention, do NOT collapse
+factory dispatch into the impl modules — keeps the ``__init__`` import
+graph cycle-free and the env contract auditable in one place.
+"""
+from __future__ import annotations
+
+import os
+from typing import Any
+
+from decnet.realism.llm.base import LLMBackend
+
+
+def get_llm(*, model: str | None = None, **kwargs: Any) -> LLMBackend:
+    """Instantiate the LLM backend selected by environment.
+
+    *model* (when provided) overrides whatever the backend's own default
+    is — e.g. for :class:`OllamaBackend` that's ``llama3.1`` unless
+    ``DECNET_REALISM_MODEL`` says otherwise.  Lets the worker honour
+    ``decnet orchestrate --model gpt-oss`` without each backend having
+    to know about CLI flags.
+    """
+    backend_key = os.environ.get("DECNET_REALISM_LLM", "ollama").lower()
+
+    if backend_key == "ollama":
+        from decnet.realism.llm.impl.ollama import OllamaBackend
+        return OllamaBackend(model=model, **kwargs)
+    if backend_key == "fake":
+        from decnet.realism.llm.impl.fake import FakeBackend
+        return FakeBackend(model=model or "fake-model", **kwargs)
+    raise ValueError(
+        f"Unsupported DECNET_REALISM_LLM={backend_key!r}; "
+        "expected one of: ollama, fake"
+    )
--- a/decnet/realism/llm/impl/init.py
+++ b/decnet/realism/llm/impl/init.py
@@ -0,0 +1,6 @@
+"""Concrete LLM-backend implementations.
+
+Importers go through :func:`decnet.realism.llm.get_llm`, not these
+modules directly — same convention as :mod:`decnet.web.db.sqlite` and
+:mod:`decnet.bus.unix_client`.
+"""
--- a/decnet/realism/llm/impl/fake.py
+++ b/decnet/realism/llm/impl/fake.py
@@ -0,0 +1,50 @@
+"""In-process fake backend for tests.
+
+Returns a canned string so the driver path can be exercised without an
+Ollama install.  Configurable via ``DECNET_REALISM_FAKE_OUTPUT`` (env)
+or the ``output`` constructor arg — the env-var path lets integration
+tests run the worker end-to-end with deterministic output.
+"""
+from __future__ import annotations
+
+import os
+import time
+from typing import Optional
+
+from decnet.realism.llm.base import LLMBackend, LLMResult
+
+
+_DEFAULT_OUTPUT = (
+    "Subject: Quick update\n\n"
+    "Hi,\n\nFollowing up on the topic.\n\nBest regards,\nFake Persona\n"
+)
+
+
+class FakeBackend(LLMBackend):
+    def __init__(
+        self,
+        *,
+        model: str = "fake-model",
+        timeout: float = 1.0,
+        output: Optional[str] = None,
+        success: bool = True,
+    ) -> None:
+        self.model = model
+        self.timeout = timeout
+        self._output = (
+            output
+            if output is not None
+            else os.environ.get("DECNET_REALISM_FAKE_OUTPUT", _DEFAULT_OUTPUT)
+        )
+        self._success = success
+
+    async def generate(self, prompt: str) -> LLMResult:    # noqa: ARG002
+        t0 = time.monotonic()
+        latency_ms = int((time.monotonic() - t0) * 1000)
+        return LLMResult(
+            success=self._success,
+            text=self._output if self._success else "",
+            model=self.model,
+            latency_ms=latency_ms,
+            extra={"rc": 0 if self._success else 1},
+        )
--- a/decnet/realism/llm/impl/ollama.py
+++ b/decnet/realism/llm/impl/ollama.py
@@ -0,0 +1,100 @@
+"""Ollama subprocess backend.
+
+Shells out to ``ollama run <model>`` with the prompt fed via stdin.
+
+Why subprocess and not the Ollama HTTP API:
+* No new dependency (``ollama`` Python lib is optional).
+* Works on hosts where Ollama is bound to a unix socket, an unusual TCP
+  port, or behind a remote-mount layer — `ollama run` resolves all that.
+* Same path the operator uses by hand (``ollama run llama3.1``); easier
+  to debug discrepancies between worker output and a console session.
+
+Cost: per-call process spawn (~50ms on a warm box).  Acceptable for
+realism tick rates (one body per ~5 minutes per persona by default).
+When that cost matters, swap to an HTTP-API backend; the seam is in
+:mod:`decnet.realism.llm.factory`.
+"""
+from __future__ import annotations
+
+import asyncio
+import os
+import time
+from typing import Optional
+
+from decnet.logging import get_logger
+from decnet.realism.llm.base import LLMBackend, LLMResult, LLMTimeout
+
+log = get_logger("realism.llm")
+
+_OLLAMA = "ollama"
+_DEFAULT_MODEL = os.environ.get("DECNET_REALISM_MODEL", "llama3.1")
+_DEFAULT_TIMEOUT = float(os.environ.get("DECNET_REALISM_TIMEOUT", "60"))
+
+
+class OllamaBackend(LLMBackend):
+    """Concrete :class:`LLMBackend` that shells out to ``ollama run``."""
+
+    def __init__(
+        self,
+        *,
+        model: Optional[str] = None,
+        timeout: Optional[float] = None,
+    ) -> None:
+        self.model = model or _DEFAULT_MODEL
+        self.timeout = timeout if timeout is not None else _DEFAULT_TIMEOUT
+
+    async def generate(self, prompt: str) -> LLMResult:
+        t0 = time.monotonic()
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                _OLLAMA, "run", self.model,
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+        except FileNotFoundError as exc:
+            latency_ms = int((time.monotonic() - t0) * 1000)
+            return LLMResult(
+                success=False,
+                text="",
+                model=self.model,
+                latency_ms=latency_ms,
+                extra={"rc": 127, "stderr": f"argv[0] not found: {exc}"},
+            )
+        try:
+            stdout, stderr = await asyncio.wait_for(
+                proc.communicate(prompt.encode("utf-8")),
+                timeout=self.timeout,
+            )
+        except asyncio.TimeoutError as exc:
+            try:
+                proc.kill()
+            except ProcessLookupError:
+                pass
+            raise LLMTimeout(
+                f"ollama run {self.model} exceeded {self.timeout}s"
+            ) from exc
+
+        latency_ms = int((time.monotonic() - t0) * 1000)
+        rc = proc.returncode if proc.returncode is not None else -1
+        text = stdout.decode("utf-8", "replace")
+        stderr_s = stderr.decode("utf-8", "replace")
+        if rc != 0 or not text.strip():
+            log.warning(
+                "ollama backend non-zero / empty rc=%d model=%s stderr=%r",
+                rc, self.model, stderr_s[:200],
+            )
+            return LLMResult(
+                success=False,
+                text=text,
+                model=self.model,
+                latency_ms=latency_ms,
+                extra={"rc": rc, "stderr": stderr_s.strip()[:256]},
+            )
+        return LLMResult(
+            success=True,
+            text=text,
+            model=self.model,
+            latency_ms=latency_ms,
+            extra={"rc": rc},
+        )
--- a/decnet/realism/naming.py
+++ b/decnet/realism/naming.py
@@ -0,0 +1,184 @@
+"""Per-content-class filename generators.
+
+The pre-realism orchestrator emitted ``notes-1777315854.txt``
+(unix-epoch suffix) — a tell on first glance.  Real users name
+``notes.txt``, ``TODO.md``, ``backup-2025-04.sql.gz``.  Real systems
+write ``cron.log``, ``cron.log.1``, ``cron.log.2.gz`` (logrotate
+shape, no epoch).
+
+Stage 3 ships **deterministic templates only**, persona-conditioned.
+Stage 6 wires LLM enrichment for the user-classes (``note``, ``todo``,
+``draft``, ``script``); the deterministic templates remain the
+fallback when LLM is disabled or times out.
+
+The factory mirrors :func:`decnet.canary.factory.get_generator`:
+caller passes a :class:`~decnet.realism.taxonomy.ContentClass`; we
+return the namer registered for it.  Renaming a content_class is a
+schema change and would invalidate ``synthetic_files.path`` lookups,
+so the dispatch is exhaustive — no silent fallbacks for unknown
+classes.
+"""
+from __future__ import annotations
+
+import secrets
+import string
+from typing import Callable, Optional
+
+from decnet.realism.personas import login_for
+from decnet.realism.taxonomy import ContentClass
+
+
+# Persona → home-dir convention.  Most personas are linux-style; the
+# rare "windows" persona gets ``C:\\Users\\<persona>\\Documents`` style
+# paths (out of scope until per-OS personas land).  For now everything
+# is POSIX.
+def _home(persona: str) -> str:
+    """Return the canonical home directory for *persona*."""
+    return f"/home/{login_for(persona)}"
+
+
+def _random_token(rng: secrets.SystemRandom, length: int = 6) -> str:
+    """Lowercase-alphanum token of length *length* — like ``mkstemp``."""
+    return "".join(rng.choice(string.ascii_lowercase + string.digits) for _ in range(length))
+
+
+# ── User-class namers ──────────────────────────────────────────────────────
+
+
+_NOTE_NAMES: tuple[str, ...] = (
+    "notes.txt", "scratch.md", "ideas.txt", "Untitled-3.txt",
+    "draft.md", "keys.txt", "passwords.txt", "TODO.md",
+)
+
+_TODO_NAMES: tuple[str, ...] = (
+    "TODO.md", "todo.txt", "things.md", "tasks.txt", "punchlist.md",
+)
+
+_DRAFT_NAMES: tuple[str, ...] = (
+    "Q3-budget-DRAFT.md", "proposal.md", "letter.txt",
+    "rfc-internal.md", "memo.txt", "1on1-notes.md",
+)
+
+_SCRIPT_NAMES: tuple[str, ...] = (
+    "backup.sh", "deploy.sh", "cleanup.sh", "rotate.sh",
+    "fix.py", "tmp.py", "scratch.py",
+)
+
+
+def _name_user(
+    persona: str, names: tuple[str, ...], rng: secrets.SystemRandom,
+) -> str:
+    return f"{_home(persona)}/{rng.choice(names)}"
+
+
+def _name_note(persona: str, rng: secrets.SystemRandom) -> str:
+    return _name_user(persona, _NOTE_NAMES, rng)
+
+
+def _name_todo(persona: str, rng: secrets.SystemRandom) -> str:
+    return _name_user(persona, _TODO_NAMES, rng)
+
+
+def _name_draft(persona: str, rng: secrets.SystemRandom) -> str:
+    return _name_user(persona, _DRAFT_NAMES, rng)
+
+
+def _name_script(persona: str, rng: secrets.SystemRandom) -> str:
+    return _name_user(persona, _SCRIPT_NAMES, rng)
+
+
+# ── System-class namers ────────────────────────────────────────────────────
+
+
+# logrotate skeleton: cron.log, cron.log.1, cron.log.2.gz.  No epoch
+# suffix — the realism failure today is `cron-1777317867.log`.
+_CRON_LOGROTATE: tuple[str, ...] = (
+    "/var/log/cron.log", "/var/log/cron.log.1", "/var/log/cron.log.2.gz",
+)
+_DAEMON_LOGROTATE: tuple[str, ...] = (
+    "/var/log/daemon.log", "/var/log/syslog", "/var/log/messages",
+    "/var/log/auth.log", "/var/log/auth.log.1",
+)
+
+
+def _name_log_cron(persona: str, rng: secrets.SystemRandom) -> str:
+    return rng.choice(_CRON_LOGROTATE)
+
+
+def _name_log_daemon(persona: str, rng: secrets.SystemRandom) -> str:
+    return rng.choice(_DAEMON_LOGROTATE)
+
+
+def _name_cache_tmp(persona: str, rng: secrets.SystemRandom) -> str:
+    # mkstemp shape: /tmp/.cache-XXXXXX with random alphanumerics.
+    # Hidden dot keeps it out of `ls` by default — same as glibc/python.
+    # Bandit B108 fires on the literal "/tmp/" path; suppressed at the
+    # site because this is a path we are *generating for a target
+    # decky*, not a file we are opening on the host.
+    return f"/tmp/.cache-{_random_token(rng, 6)}"  # nosec B108
+
+
+# ── Email + canary placeholders ────────────────────────────────────────────
+# Email "names" (paths) are produced by the email driver's spool logic,
+# not by realism naming. Canary paths are advisory — operators usually
+# specify ``placement_path`` directly. Stage 7 of the realism migration
+# refines canary placement based on persona + content_class.
+
+
+def _name_email(persona: str, rng: secrets.SystemRandom) -> str:
+    raise NotImplementedError(
+        "email paths come from the email driver's spool logic, not "
+        "realism.naming"
+    )
+
+
+def _name_canary(persona: str, rng: secrets.SystemRandom) -> str:
+    raise NotImplementedError(
+        "canary placement is set by the canary cultivator (stage 7), "
+        "not realism.naming"
+    )
+
+
+# ── Dispatch ───────────────────────────────────────────────────────────────
+
+
+_NAMERS: dict[ContentClass, Callable[[str, secrets.SystemRandom], str]] = {
+    ContentClass.NOTE: _name_note,
+    ContentClass.TODO: _name_todo,
+    ContentClass.DRAFT: _name_draft,
+    ContentClass.SCRIPT: _name_script,
+    ContentClass.LOG_CRON: _name_log_cron,
+    ContentClass.LOG_DAEMON: _name_log_daemon,
+    ContentClass.CACHE_TMP: _name_cache_tmp,
+    ContentClass.EMAIL: _name_email,
+    ContentClass.CANARY_AWS_CREDS: _name_canary,
+    ContentClass.CANARY_ENV_FILE: _name_canary,
+    ContentClass.CANARY_GIT_CONFIG: _name_canary,
+    ContentClass.CANARY_SSH_KEY: _name_canary,
+    ContentClass.CANARY_HONEYDOC: _name_canary,
+    ContentClass.CANARY_HONEYDOC_DOCX: _name_canary,
+    ContentClass.CANARY_HONEYDOC_PDF: _name_canary,
+    ContentClass.CANARY_MYSQL_DUMP: _name_canary,
+}
+
+
+def make_path(
+    content_class: ContentClass,
+    persona: str,
+    *,
+    rand: Optional[secrets.SystemRandom] = None,
+) -> str:
+    """Return a plausible absolute container-side path for *content_class*.
+
+    Persona-conditioned for user-classes (``/home/<persona>/…``).
+    System-classes ignore persona and pick from a logrotate-shaped
+    skeleton.  Email and canary classes raise — those paths come
+    from the respective drivers, not from realism naming.
+    """
+    rng = rand or secrets.SystemRandom()
+    namer = _NAMERS.get(content_class)
+    if namer is None:
+        raise KeyError(
+            f"no namer registered for content_class={content_class!r}"
+        )
+    return namer(persona, rng)
--- a/decnet/realism/personas.py
+++ b/decnet/realism/personas.py
@@ -0,0 +1,153 @@
+"""Persona schema for realism content generation.
+
+Stored as a JSON list on :attr:`Topology.email_personas`.  Each persona
+describes one fictional employee — sender of email *and* author of
+files (notes, TODOs, drafts, scripts) on the deckies they're sampled
+onto.  The schema deliberately stays narrow: the LLM gets *enough*
+differentiation to write distinct voices, no more.
+
+The class is still named :class:`EmailPersona` because every persona
+in the pool today carries a mandatory email address (used for IMAP/
+POP3 spool delivery).  Future per-decky personas without mailboxes
+would justify a rename / superclass; not in scope for the realism
+migration.
+
+Invalid entries are dropped with a warning (returned alongside the
+parsed list) rather than raising — a single typo in one persona must
+not stall the entire realism tick.
+"""
+from __future__ import annotations
+
+import json
+from typing import Literal, Optional
+
+from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
+
+from decnet.logging import get_logger
+
+logger = get_logger("realism.personas")
+
+Tone = Literal["formal", "direct", "casual", "technical", "custom"]
+ReplyLatency = Literal["fast", "normal", "slow"]
+
+
+class EmailPersona(BaseModel):
+    """One fake mailbox owner.
+
+    ``language`` is ISO 639-1 (``en``, ``es``, ``pt``…); when unset on the
+    persona it falls back to the topology's ``language_default``.
+    ``uses_llms_heavily`` lifts the prompt-layer em-dash suppression for
+    that persona — em-dashes are an LLM tell, but a persona explicitly
+    pegged as a heavy LLM user should *naturally* produce them.
+    """
+    name: str = Field(min_length=1, max_length=128)
+    email: str = Field(min_length=3, max_length=255)
+    role: str = Field(min_length=1, max_length=128)
+    tone: Tone = "formal"
+    tone_custom: Optional[str] = Field(default=None, max_length=128)
+    mannerisms: list[str] = Field(default_factory=list, max_length=12)
+    language: Optional[str] = Field(default=None, max_length=8)
+    signature: Optional[str] = Field(default=None, max_length=512)
+    active_hours: str = Field(default="09:00-18:00", max_length=32)
+    reply_latency: ReplyLatency = "normal"
+    uses_llms_heavily: bool = False
+
+    @model_validator(mode="after")
+    def _custom_tone_requires_text(self) -> "EmailPersona":
+        # ``tone="custom"`` lets operators describe a voice the four canned
+        # tones don't capture (sarcastic, deadpan, terse, etc.).  The free
+        # text is interpolated into the prompt verbatim, so an empty
+        # value would just leave the LLM with the literal word "custom" —
+        # reject it loudly instead of silently producing a useless prompt.
+        if self.tone == "custom" and not (self.tone_custom and self.tone_custom.strip()):
+            raise ValueError("tone_custom is required when tone is 'custom'")
+        return self
+
+    @field_validator("email")
+    @classmethod
+    def _email_shape(cls, v: str) -> str:
+        # Cheap structural check — full RFC 5322 isn't worth the
+        # dependency.  We only need ``user@domain`` with non-empty parts
+        # for the prompt builder + Message-ID generator.
+        if "@" not in v:
+            raise ValueError("email must contain '@'")
+        local, _, domain = v.rpartition("@")
+        if not local or not domain or "." not in domain:
+            raise ValueError("email must look like user@domain.tld")
+        return v
+
+
+def parse_personas(
+    raw: str | list | None,
+    *,
+    language_default: str = "en",
+) -> list[EmailPersona]:
+    """Parse the JSON-or-list ``email_personas`` value into models.
+
+    Resolves ``language`` against *language_default* so downstream
+    consumers (prompt builder, scheduler) never need to know about
+    fallback semantics.
+    """
+    if not raw:
+        return []
+    if isinstance(raw, str):
+        try:
+            raw = json.loads(raw)
+        except json.JSONDecodeError as exc:
+            logger.warning("realism personas: invalid JSON, skipping: %s", exc)
+            return []
+    if not isinstance(raw, list):
+        logger.warning(
+            "realism personas: expected list, got %s", type(raw).__name__
+        )
+        return []
+    out: list[EmailPersona] = []
+    for i, entry in enumerate(raw):
+        try:
+            persona = EmailPersona.model_validate(entry)
+        except ValidationError as exc:
+            logger.warning(
+                "realism personas: dropping invalid entry index=%d: %s",
+                i, exc.errors(include_url=False),
+            )
+            continue
+        if persona.language is None:
+            persona = persona.model_copy(update={"language": language_default})
+        out.append(persona)
+    return out
+
+
+def login_for(persona: str) -> str:
+    """Return the linux login derived from a persona's display name.
+
+    Lowercase, strip spaces; if the result isn't a plausible POSIX
+    login (alnum ASCII), fall back to ``user`` so the path doesn't
+    leak the persona's display name onto the decky filesystem.
+    Shared by realism path naming (``decnet/realism/naming.py``) and
+    canary cultivation (``decnet/canary/cultivator.py``).
+    """
+    candidate = persona.lower().replace(" ", "")
+    if candidate.isalnum() and candidate.isascii() and candidate:
+        return candidate
+    return "user"
+
+
+def in_active_hours(persona: EmailPersona, now_hour: int) -> bool:
+    """Return True if *now_hour* (0–23) falls in the persona's window.
+
+    Format: ``"HH:MM-HH:MM"``. Wrap-around windows (``"22:00-06:00"``)
+    are supported. Invalid windows treat the persona as always-on so a
+    config typo never silences the whole fleet.
+    """
+    try:
+        start_s, end_s = persona.active_hours.split("-")
+        start_h = int(start_s.split(":")[0])
+        end_h = int(end_s.split(":")[0])
+    except (ValueError, IndexError):
+        return True
+    if start_h == end_h:
+        return True
+    if start_h < end_h:
+        return start_h <= now_hour < end_h
+    # Wrap-around (e.g. 22:00-06:00).
+    return now_hour >= start_h or now_hour < end_h
--- a/decnet/realism/personas_pool.py
+++ b/decnet/realism/personas_pool.py
@@ -0,0 +1,145 @@
+"""Global persona pool — non-topology deckies.
+
+DECNET runs in three deployment shapes that emit running deckies:
+
+* **MazeNET topologies**       — each topology owns its own
+  :attr:`Topology.email_personas` JSON list; consumers walk from the
+  decky back to its parent topology row.
+* **Unihost fleet**            — MACVLAN/IPVLAN deckies that have no
+  parent topology row at all.  They share one host-wide pool.
+* **SWARM shards**             — DeckyShard rows on enrolled workers.
+  Same shape as fleet for realism purposes (no parent topology row),
+  so they read the same global pool.
+
+This module owns the global pool: a JSON file on disk that operators
+populate via ``decnet realism import-personas <file>`` (or by editing
+the file directly).  The file is loaded lazily on first read and
+re-loaded on mtime change so a CLI import takes effect for the running
+worker without a restart.
+
+Path resolution order:
+
+1. ``DECNET_REALISM_PERSONAS`` environment variable — explicit override.
+2. ``/etc/decnet/email_personas.json`` — canonical master path; this is
+   what ``decnet init`` will eventually own.  Filename retained
+   (``email_personas.json``) because the on-disk schema hasn't changed
+   and operators may already have committed copies.
+3. ``~/.decnet/email_personas.json`` — dev fallback so a developer can
+   exercise consumers without root or ``decnet init``.
+
+When the file is missing / empty / unparseable, the pool is empty and
+consumers skip fleet/shard deckies the same way they skip a topology
+with too few personas.  No silent fallback to dummy personas; silence
+is correct when there's no opinion to convey.
+"""
+from __future__ import annotations
+
+import os
+import threading
+from pathlib import Path
+from typing import Optional
+
+from decnet.logging import get_logger
+from decnet.realism.personas import EmailPersona, parse_personas
+
+logger = get_logger("realism.personas_pool")
+
+_ENV_VAR = "DECNET_REALISM_PERSONAS"
+_SYSTEM_PATH = Path("/etc/decnet/email_personas.json")
+
+
+def _user_path() -> Path:
+    return Path(os.path.expanduser("~/.decnet/email_personas.json"))
+
+
+def resolve_path() -> Path:
+    """Return the path the global pool would load from right now.
+
+    The file may not exist; callers are expected to handle that.  The
+    function is pure (no I/O) so the ``decnet realism import-personas``
+    CLI can ask "where would I write to?" without touching the disk.
+    """
+    override = os.environ.get(_ENV_VAR, "").strip()
+    if override:
+        return Path(override)
+    if _SYSTEM_PATH.exists():
+        return _SYSTEM_PATH
+    # ``/etc/decnet`` exists on a fully-provisioned host (post ``decnet
+    # init``) but may be read-only for the API user on dev boxes — fall
+    # back to the user path when the directory isn't writable so a fresh
+    # PUT lands somewhere instead of erroring out.  We only do this when
+    # the system file doesn't exist yet; once it does, it's authoritative.
+    if _SYSTEM_PATH.parent.exists() and os.access(_SYSTEM_PATH.parent, os.W_OK):
+        return _SYSTEM_PATH
+    return _user_path()
+
+
+# ── Cache ────────────────────────────────────────────────────────────────────
+# Lock-protected because two scheduler ticks could race on the first load,
+# and the read path is hot enough (every tick, every fleet/shard mail
+# decky) that re-parsing on every call is wasteful.
+
+_lock = threading.Lock()
+_cache: list[EmailPersona] = []
+_cache_path: Optional[Path] = None
+_cache_mtime: float = 0.0
+
+
+def load(*, language_default: str = "en") -> list[EmailPersona]:
+    """Return the parsed global persona pool.
+
+    *language_default* fills in any persona missing a ``language`` field;
+    fleet/shard sources have no topology-level default, so callers
+    should pass the worker's best guess (typically ``"en"``).
+
+    Threadsafe and cheap on the steady state (mtime check + dict lookup);
+    expensive only when the file changed since the last call.
+    """
+    path = resolve_path()
+    try:
+        st = path.stat()
+    except OSError:
+        with _lock:
+            global _cache, _cache_path, _cache_mtime
+            _cache = []
+            _cache_path = path
+            _cache_mtime = 0.0
+        return []
+
+    with _lock:
+        if (
+            _cache_path == path
+            and _cache_mtime == st.st_mtime
+            and _cache  # non-empty cache; empty re-parses cheaply anyway
+        ):
+            return _cache
+
+    try:
+        raw = path.read_text(encoding="utf-8")
+    except OSError as exc:
+        logger.warning("realism global pool: read failed path=%s: %s", path, exc)
+        return []
+
+    parsed = parse_personas(raw, language_default=language_default)
+    with _lock:
+        _cache = parsed
+        _cache_path = path
+        _cache_mtime = st.st_mtime
+    if parsed:
+        logger.info(
+            "realism global pool: loaded %d personas from %s", len(parsed), path,
+        )
+    return parsed
+
+
+def reset_cache() -> None:
+    """Clear the in-process cache.
+
+    Test-only helper — avoids stale state when several tests in the
+    same process exercise different on-disk pools.
+    """
+    global _cache, _cache_path, _cache_mtime
+    with _lock:
+        _cache = []
+        _cache_path = None
+        _cache_mtime = 0.0
--- a/decnet/realism/planner.py
+++ b/decnet/realism/planner.py
@@ -0,0 +1,368 @@
+"""Realism planner — picks the next ``(decky, persona, class, action)`` tuple.
+
+Stage 3: returns ``create``-only plans (the edit branch lands in
+stage 3b).  Pure-function, deterministic given the same inputs:
+caller passes deckies (with personas pre-resolved on each row),
+``now``, and an RNG.
+
+The persona resolution split — topology-pool vs. global-pool — is
+the orchestrator's job, not the planner's.  Each decky dict reaching
+:func:`pick` carries a ``_realism_personas`` key with the resolved
+:class:`~decnet.realism.personas.EmailPersona` list.  Keeps the
+planner test-isolated and avoids forcing it to know about the
+:class:`~decnet.web.db.repository.BaseRepository` / topology pool /
+global pool.
+
+Diurnal gating uses :func:`decnet.realism.diurnal.in_work_hours` per
+persona; we filter the (decky, persona) pairs *before* picking, so a
+persona outside its window is never considered.
+"""
+from __future__ import annotations
+
+import secrets
+from datetime import datetime
+from typing import Any, Optional, Sequence
+
+from decnet.realism import bodies, naming
+from decnet.realism.diurnal import in_work_hours, sample_mtime
+from decnet.realism.personas import EmailPersona
+from decnet.realism.taxonomy import ContentClass, Plan, PlanAction  # noqa: F401
+
+
+# Stage-3 weighted sampling defaults:
+#   * User content (notes/todo/draft/script) gets the bulk — those are
+#     the realism win when a persona "looks busy."
+#   * System content (cron/daemon/cache) is plausible filler.
+#   * Email + canary are owned by other paths and not picked here.
+#   * Canary classes are picked rarely. Each plant materialises a real
+#     CanaryToken row + DNS slug + HTTP URL — flooding the fleet makes
+#     the dashboard noisy. ~3% of file picks land here.
+#
+# These are the *defaults*. Operator-tuned overrides arrive via
+# :func:`apply_payload` (admin PUT /api/v1/realism/config). The
+# orchestrator worker periodically refreshes the in-process state from
+# the ``realism_config`` table; pick() reads the live globals each call.
+_DEFAULT_USER_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = (
+    (ContentClass.NOTE, 30),
+    (ContentClass.TODO, 20),
+    (ContentClass.DRAFT, 15),
+    (ContentClass.SCRIPT, 10),
+)
+_DEFAULT_SYSTEM_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = (
+    (ContentClass.LOG_CRON, 12),
+    (ContentClass.LOG_DAEMON, 8),
+    (ContentClass.CACHE_TMP, 5),
+)
+_DEFAULT_CANARY_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = (
+    (ContentClass.CANARY_AWS_CREDS, 1),
+    (ContentClass.CANARY_ENV_FILE, 1),
+    (ContentClass.CANARY_GIT_CONFIG, 1),
+    (ContentClass.CANARY_SSH_KEY, 1),
+    (ContentClass.CANARY_HONEYDOC, 1),
+    (ContentClass.CANARY_HONEYDOC_DOCX, 1),
+    (ContentClass.CANARY_HONEYDOC_PDF, 1),
+    (ContentClass.CANARY_MYSQL_DUMP, 1),
+)
+_DEFAULT_CANARY_PROBABILITY = 0.03
+
+# Live (mutable) globals — reassigned by :func:`apply_payload`. pick()
+# reads these. Reset to defaults via :func:`reset_to_defaults` (used by
+# tests + the API DELETE path).
+_USER_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = _DEFAULT_USER_CLASS_WEIGHTS
+_SYSTEM_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = _DEFAULT_SYSTEM_CLASS_WEIGHTS
+_CANARY_CLASS_WEIGHTS: tuple[tuple[ContentClass, int], ...] = _DEFAULT_CANARY_CLASS_WEIGHTS
+_CANARY_PROBABILITY: float = _DEFAULT_CANARY_PROBABILITY
+
+
+def _serialize_weights(
+    weights: tuple[tuple[ContentClass, int], ...],
+) -> list[dict[str, Any]]:
+    return [{"content_class": cls.value, "weight": w} for cls, w in weights]
+
+
+def _parse_weights(
+    raw: Any, allowed: set[ContentClass],
+) -> tuple[tuple[ContentClass, int], ...]:
+    """Parse ``[{"content_class": "...", "weight": N}, ...]`` into the
+    planner's internal tuple shape. Drops entries whose ``content_class``
+    isn't in *allowed* (defends against an operator pasting in a canary
+    class on the user list, which would skew sampling without the
+    canary-probability gate).
+
+    Raises ``ValueError`` on structural problems (non-list, non-int
+    weight, negative weight, empty result) so the API can return 400.
+    """
+    if not isinstance(raw, list):
+        raise ValueError("weights must be a list")
+    out: list[tuple[ContentClass, int]] = []
+    for entry in raw:
+        if not isinstance(entry, dict):
+            raise ValueError("each weight entry must be an object")
+        cls_name = entry.get("content_class")
+        weight = entry.get("weight")
+        if not isinstance(weight, int) or weight < 0:
+            raise ValueError(
+                f"weight for {cls_name!r} must be a non-negative integer"
+            )
+        try:
+            cls = ContentClass(cls_name)
+        except (ValueError, TypeError):
+            raise ValueError(f"unknown content_class: {cls_name!r}")
+        if cls not in allowed:
+            # Silently drop — a class that doesn't belong on this list
+            # (e.g. a canary class on the user list) is operator error,
+            # but we don't want to fail the whole save over one stray
+            # entry. The roundtrip in current_payload() will show the
+            # operator their entry didn't land.
+            continue
+        out.append((cls, weight))
+    if not out:
+        raise ValueError("weights list resolved to zero valid entries")
+    if sum(w for _, w in out) <= 0:
+        raise ValueError("weights must sum to a positive number")
+    return tuple(out)
+
+
+_USER_CLASSES: set[ContentClass] = {
+    ContentClass.NOTE, ContentClass.TODO, ContentClass.DRAFT, ContentClass.SCRIPT,
+}
+_SYSTEM_CLASSES: set[ContentClass] = {
+    ContentClass.LOG_CRON, ContentClass.LOG_DAEMON, ContentClass.CACHE_TMP,
+}
+_CANARY_CLASSES: set[ContentClass] = {
+    ContentClass.CANARY_AWS_CREDS, ContentClass.CANARY_ENV_FILE,
+    ContentClass.CANARY_GIT_CONFIG, ContentClass.CANARY_SSH_KEY,
+    ContentClass.CANARY_HONEYDOC, ContentClass.CANARY_HONEYDOC_DOCX,
+    ContentClass.CANARY_HONEYDOC_PDF, ContentClass.CANARY_MYSQL_DUMP,
+}
+
+
+def current_payload() -> dict[str, Any]:
+    """Export the live planner config as a JSON-safe dict.
+
+    Wire shape returned by ``GET /api/v1/realism/config``."""
+    return {
+        "user_class_weights": _serialize_weights(_USER_CLASS_WEIGHTS),
+        "system_class_weights": _serialize_weights(_SYSTEM_CLASS_WEIGHTS),
+        "canary_class_weights": _serialize_weights(_CANARY_CLASS_WEIGHTS),
+        "canary_probability": _CANARY_PROBABILITY,
+    }
+
+
+def apply_payload(payload: dict[str, Any]) -> None:
+    """Override the planner's live globals from a wire payload.
+
+    Validates structurally and rebinds module-level names atomically
+    per field — partial failures don't leave the planner in a torn
+    state because validation happens before any rebind.
+
+    Unknown fields are ignored (forward-compat); fields not present
+    leave the corresponding global untouched."""
+    global _USER_CLASS_WEIGHTS, _SYSTEM_CLASS_WEIGHTS
+    global _CANARY_CLASS_WEIGHTS, _CANARY_PROBABILITY
+
+    new_user = _USER_CLASS_WEIGHTS
+    new_system = _SYSTEM_CLASS_WEIGHTS
+    new_canary = _CANARY_CLASS_WEIGHTS
+    new_prob = _CANARY_PROBABILITY
+
+    if "user_class_weights" in payload:
+        new_user = _parse_weights(payload["user_class_weights"], _USER_CLASSES)
+    if "system_class_weights" in payload:
+        new_system = _parse_weights(
+            payload["system_class_weights"], _SYSTEM_CLASSES,
+        )
+    if "canary_class_weights" in payload:
+        new_canary = _parse_weights(
+            payload["canary_class_weights"], _CANARY_CLASSES,
+        )
+    if "canary_probability" in payload:
+        prob = payload["canary_probability"]
+        if not isinstance(prob, (int, float)) or not (0.0 <= prob <= 1.0):
+            raise ValueError("canary_probability must be in [0.0, 1.0]")
+        new_prob = float(prob)
+
+    _USER_CLASS_WEIGHTS = new_user
+    _SYSTEM_CLASS_WEIGHTS = new_system
+    _CANARY_CLASS_WEIGHTS = new_canary
+    _CANARY_PROBABILITY = new_prob
+
+
+def reset_to_defaults() -> None:
+    """Restore hardcoded defaults. Used by tests and the API reset path."""
+    global _USER_CLASS_WEIGHTS, _SYSTEM_CLASS_WEIGHTS
+    global _CANARY_CLASS_WEIGHTS, _CANARY_PROBABILITY
+    _USER_CLASS_WEIGHTS = _DEFAULT_USER_CLASS_WEIGHTS
+    _SYSTEM_CLASS_WEIGHTS = _DEFAULT_SYSTEM_CLASS_WEIGHTS
+    _CANARY_CLASS_WEIGHTS = _DEFAULT_CANARY_CLASS_WEIGHTS
+    _CANARY_PROBABILITY = _DEFAULT_CANARY_PROBABILITY
+
+
+def _weighted_pick(
+    weights: tuple[tuple[ContentClass, int], ...],
+    rng: secrets.SystemRandom,
+) -> ContentClass:
+    total = sum(w for _, w in weights)
+    target = rng.randint(1, total)
+    running = 0
+    for cls, w in weights:
+        running += w
+        if target <= running:
+            return cls
+    return weights[-1][0]  # unreachable, satisfy mypy
+
+
+def _eligible_pairs(
+    deckies: Sequence[dict[str, Any]],
+    now: datetime,
+) -> list[tuple[dict[str, Any], EmailPersona]]:
+    """Cross-product of deckies × resolved personas, diurnal-filtered.
+
+    A decky with no personas (empty ``_realism_personas``) is skipped
+    entirely; same fail-quiet semantics as the emailgen scheduler.
+    """
+    out: list[tuple[dict[str, Any], EmailPersona]] = []
+    for decky in deckies:
+        personas: list[EmailPersona] = decky.get("_realism_personas") or []
+        for persona in personas:
+            if in_work_hours(persona.active_hours, now):
+                out.append((decky, persona))
+    return out
+
+
+def pick(
+    deckies: Sequence[dict[str, Any]],
+    now: datetime,
+    *,
+    edit_candidate: Optional[dict[str, Any]] = None,
+    rand: Optional[secrets.SystemRandom] = None,
+) -> Optional[Plan]:
+    """Return a single :class:`Plan` for the orchestrator's tick.
+
+    Stage-3b policy: weighted action roll — 60% create, 30% edit, 10%
+    "leave alone" (planner returns ``None`` to skip).  When the roll
+    is "edit" and *edit_candidate* is set (a row from
+    :meth:`BaseRepository.pick_random_synthetic_file_for_edit`), we
+    return an edit Plan; otherwise we fall through to create.
+
+    The orchestrator scheduler is responsible for fetching the edit
+    candidate before calling — keeps this function pure-of-DB and
+    test-friendly.
+
+    Returns ``None`` when no eligible (decky, persona) pair exists or
+    when the action roll lands on "leave alone."
+    """
+    rng = rand or secrets.SystemRandom()
+
+    eligible = _eligible_pairs(deckies, now)
+    if not eligible:
+        return None
+
+    # Action roll.  Edit only fires when there's a candidate from the
+    # repo — otherwise we either re-roll to create or skip.
+    roll = rng.random()
+    if roll < 0.10:
+        return None  # "leave alone" — quiet tick is realism too
+    if roll < 0.40 and edit_candidate is not None:
+        return _edit_plan(edit_candidate, now, rng)
+
+    decky, persona = rng.choice(eligible)
+
+    # Canary first — they're rare (~3% of file picks), uniformly
+    # weighted across generators.  Falling here means the orchestrator
+    # plants a callback-bearing artifact this tick instead of an
+    # inert one.
+    if rng.random() < _CANARY_PROBABILITY:
+        content_class = _weighted_pick(_CANARY_CLASS_WEIGHTS, rng)
+        # Canary placement is the cultivator's job — plan.target_path
+        # is advisory; a "" lets the cultivator override entirely.
+        target_path = ""
+        body_hint = None
+        mtime = sample_mtime(persona.active_hours, now, rand=rng)
+        return Plan(
+            decky_uuid=decky["uuid"],
+            decky_name=decky["name"],
+            persona=persona.name,
+            content_class=content_class,
+            action="create",
+            target_path=target_path,
+            mtime=mtime,
+            body_hint=body_hint,
+            notes=(
+                f"persona={persona.name}",
+                f"class={content_class.value}",
+                "kind=canary",
+            ),
+        )
+
+    # User vs system content — biased toward user (realism wins are
+    # bigger there).
+    if rng.random() < 0.7:
+        content_class = _weighted_pick(_USER_CLASS_WEIGHTS, rng)
+    else:
+        content_class = _weighted_pick(_SYSTEM_CLASS_WEIGHTS, rng)
+
+    target_path = naming.make_path(content_class, persona.name, rand=rng)
+    body_hint = bodies.make_body(content_class, persona.name, rand=rng)
+    mtime = sample_mtime(persona.active_hours, now, rand=rng)
+
+    return Plan(
+        decky_uuid=decky["uuid"],
+        decky_name=decky["name"],
+        persona=persona.name,
+        content_class=content_class,
+        action="create",
+        target_path=target_path,
+        mtime=mtime,
+        body_hint=body_hint,
+        notes=(
+            f"persona={persona.name}",
+            f"class={content_class.value}",
+            f"window={persona.active_hours}",
+        ),
+    )
+
+
+def _edit_plan(
+    candidate: dict[str, Any],
+    now: datetime,
+    rng: secrets.SystemRandom,
+) -> Optional[Plan]:
+    """Build an edit-action :class:`Plan` from a synthetic_files row.
+
+    The candidate dict is the shape :meth:`BaseRepository.list_synthetic_files`
+    returns — we only need ``decky_uuid``, ``path``, ``persona``,
+    ``content_class``, ``last_body``, ``uuid``.  Returns ``None`` if
+    the candidate's content_class is somehow not editable (defensive
+    — the repo query already filters those out).
+    """
+    try:
+        cls = ContentClass(candidate["content_class"])
+    except (KeyError, ValueError):
+        return None
+    if cls.is_canary() or cls == ContentClass.CACHE_TMP:
+        return None
+    # mtime: edits bump forward by ~hours-to-days, but never past now.
+    # We model as "the file was edited some time after creation but
+    # before now" — sample_mtime with a tighter cap keeps it recent.
+    edit_mtime = sample_mtime(
+        "00:00-00:00", now, rand=rng,
+        backdate_min_hours=1.0, backdate_max_days=2.0,
+    )
+    return Plan(
+        decky_uuid=candidate["decky_uuid"],
+        decky_name=candidate.get("decky_name", ""),
+        persona=candidate.get("persona", ""),
+        content_class=cls,
+        action="edit",
+        target_path=candidate["path"],
+        mtime=edit_mtime,
+        body_hint=None,  # edit uses previous_body, not a fresh hint
+        previous_body=candidate.get("last_body", ""),
+        notes=(
+            f"persona={candidate.get('persona', '')}",
+            f"class={cls.value}",
+            "action=edit",
+            f"synthetic_file_uuid={candidate.get('uuid', '')}",
+        ),
+    )
--- a/decnet/realism/prompts/init.py
+++ b/decnet/realism/prompts/init.py
@@ -0,0 +1,9 @@
+"""Prompt builders for LLM-enriched content.
+
+* :mod:`decnet.realism.prompts.email` — corporate-email body builder.
+
+Stage 6 of the realism migration adds ``filebody.py``, ``filename.py``,
+and a ``_style.py`` helper so em-dash suppression sits in one place
+across email + file-class prompts.
+"""
+from __future__ import annotations
--- a/decnet/realism/prompts/_style.py
+++ b/decnet/realism/prompts/_style.py
@@ -0,0 +1,39 @@
+"""Shared stylometric guards for LLM-bound prompts.
+
+Lifted from the original ``orchestrator.emailgen.prompt`` em-dash
+block so file-class prompts (note / todo / draft / script bodies)
+pick up the same suppression.  Per the
+``feedback_em_dash_llm_tell.md`` memory: em-dashes (—) are a strong
+LLM-authorship tell, suppress by default; allow only for personas
+explicitly opted in via ``EmailPersona.uses_llms_heavily``.
+"""
+from __future__ import annotations
+
+from decnet.realism.personas import EmailPersona
+
+
+_SUPPRESS_RULE = (
+    "Do NOT use em-dashes (—). Use commas, periods, or "
+    "parentheses instead. Em-dashes are a tell."
+)
+_ALLOW_RULE = (
+    "Em-dashes are fine — this persona uses them naturally. "
+    "Write in your usual style."
+)
+
+
+def em_dash_rule(persona: EmailPersona) -> str:
+    """Return the em-dash instruction line for *persona*'s prompt."""
+    if persona.uses_llms_heavily:
+        return _ALLOW_RULE
+    return _SUPPRESS_RULE
+
+
+def strip_em_dashes(text: str, persona: EmailPersona) -> str:
+    """Belt-and-braces: even with the prompt rule, small models leak
+    em-dashes occasionally.  Substitute with comma+space so the
+    output reads naturally; opt-in personas pass through unchanged.
+    """
+    if persona.uses_llms_heavily:
+        return text
+    return text.replace("—", ", ").replace("–", ", ")
--- a/decnet/realism/prompts/email.py
+++ b/decnet/realism/prompts/email.py
@@ -0,0 +1,154 @@
+"""Prompt builder for the email content class.
+
+The LLM gets a tightly-scoped instruction and a small handful of
+deterministic constraints.  Persona mannerisms are *pre-selected* in
+Python (1–2 of the persona's full list) and injected as hard rules —
+small models otherwise treat the mannerism list as flavour text and
+ignore it, and the corpus collapses into one voice.
+
+**Em-dash suppression** is on by default; suppression is lifted only
+for personas that opt in via ``uses_llms_heavily``.  Em-dashes are a
+strong stylometric tell for LLM-authored prose, and a honeypot mailbox
+where every author uses them is a tell.  Stage 6 of the realism
+migration extracts the suppression block into a shared
+``decnet.realism.prompts._style`` helper so file-class prompts pick
+it up too.
+"""
+from __future__ import annotations
+
+import secrets
+from dataclasses import dataclass
+from typing import Optional
+
+from decnet.realism.personas import EmailPersona
+
+
+@dataclass(frozen=True)
+class PromptInputs:
+    sender: EmailPersona
+    recipient: EmailPersona
+    context_hint: str
+    parent_subject: Optional[str] = None      # set when replying
+    parent_excerpt: Optional[str] = None      # short snippet of last msg
+
+
+_LANGUAGE_NAMES = {
+    "en": "English",
+    "es": "Spanish",
+    "pt": "Portuguese",
+    "fr": "French",
+    "de": "German",
+    "it": "Italian",
+    "nl": "Dutch",
+    "ja": "Japanese",
+    "zh": "Chinese",
+}
+
+
+def _lang_label(code: str) -> str:
+    return _LANGUAGE_NAMES.get(code.lower(), code)
+
+
+def select_mannerisms(
+    persona: EmailPersona,
+    *,
+    rng: Optional[secrets.SystemRandom] = None,
+    n: int = 2,
+) -> list[str]:
+    """Pick *n* mannerisms deterministically given *rng*.
+
+    Returns up to *n*; falls back to the full list when the persona
+    declares fewer.  Determinism (under a seeded RNG) is what makes
+    tests practical — otherwise mannerism injection is unverifiable.
+    """
+    rnd = rng or secrets.SystemRandom()
+    pool = list(persona.mannerisms)
+    if not pool:
+        return []
+    if len(pool) <= n:
+        return pool
+    rnd.shuffle(pool)
+    return pool[:n]
+
+
+def build(
+    inputs: PromptInputs,
+    *,
+    rng: Optional[secrets.SystemRandom] = None,
+) -> tuple[str, list[str]]:
+    """Return ``(prompt, mannerisms_used)``.
+
+    ``mannerisms_used`` flows back into the persisted ``payload`` JSON
+    so an analyst can see *why* a given email reads the way it does.
+    """
+    sender = inputs.sender
+    recipient = inputs.recipient
+    language = _lang_label(sender.language or "en")
+    mannerisms = select_mannerisms(sender, rng=rng)
+    mannerism_block = (
+        "\n".join(f"- {m}" for m in mannerisms)
+        if mannerisms
+        else "- (no specific mannerisms; write in the persona's tone)"
+    )
+
+    if sender.uses_llms_heavily:
+        em_dash_rule = (
+            "Em-dashes are fine — this persona uses them naturally. "
+            "Write in your usual style."
+        )
+    else:
+        em_dash_rule = (
+            "Do NOT use em-dashes (—). Use commas, periods, or "
+            "parentheses instead. Em-dashes are a tell."
+        )
+
+    sig_block = (
+        f"Use this exact signature block:\n{sender.signature}"
+        if sender.signature
+        else "End with a short, plausible signature for the persona's role."
+    )
+
+    if inputs.parent_subject:
+        thread_block = (
+            f"This is a REPLY in an ongoing thread.\n"
+            f"- Parent subject: {inputs.parent_subject}\n"
+            f"- Parent excerpt: {inputs.parent_excerpt or '(no excerpt)'}\n"
+            f"- Begin the body assuming the recipient already read the parent.\n"
+        )
+        subject_rule = (
+            "Subject must be the parent subject prefixed with 'Re: ' "
+            "(no double 'Re: Re:')."
+        )
+    else:
+        thread_block = "This is a NEW thread (no prior context)."
+        subject_rule = (
+            "Generate a short, specific subject line (≤ 80 chars) "
+            "appropriate to the context."
+        )
+
+    prompt = f"""You are writing one corporate email, RFC 2822 plain-text body only.
+
+Persona — sender:
+- Name: {sender.name}
+- Role: {sender.role}
+- Tone: {sender.tone_custom if sender.tone == "custom" and sender.tone_custom else sender.tone}
+- Mannerisms (must show through):
+{mannerism_block}
+
+Persona — recipient:
+- Name: {recipient.name}
+- Role: {recipient.role}
+
+Context hint: {inputs.context_hint}
+
+Thread context:
+{thread_block}
+
+Hard rules:
+1. Write the email body in {language}. Do not translate or code-switch.
+2. {em_dash_rule}
+3. {subject_rule}
+4. {sig_block}
+5. Output ONLY the email — first line is "Subject: <subject>", then a blank line, then the body. No commentary, no markdown fences, no preamble.
+"""
+    return prompt.strip(), mannerisms
--- a/decnet/realism/prompts/filebody.py
+++ b/decnet/realism/prompts/filebody.py
@@ -0,0 +1,91 @@
+"""Class-conditioned prompt builder for user-class file bodies.
+
+Stage 6 of the realism migration.  Only user-classes (``note``,
+``todo``, ``draft``, ``script``) get LLM enrichment — system-class
+content (cron logs, daemon logs, /tmp caches) is *supposed* to look
+formulaic, and an LLM-authored cron log is more suspicious than a
+templated one.
+
+The prompt asks for *short* output (LLM-authored ten-page essays in
+``~/notes.txt`` are an instant tell) and pins the exit shape so the
+worker doesn't need to scrape boilerplate.  Em-dash suppression
+flows through :mod:`decnet.realism.prompts._style`.
+"""
+from __future__ import annotations
+
+from decnet.realism.personas import EmailPersona
+from decnet.realism.prompts._style import em_dash_rule
+from decnet.realism.taxonomy import ContentClass
+
+
+_LANGUAGE_NAMES = {
+    "en": "English", "es": "Spanish", "pt": "Portuguese",
+    "fr": "French", "de": "German", "it": "Italian",
+    "nl": "Dutch", "ja": "Japanese", "zh": "Chinese",
+}
+
+
+def _lang_label(code: str) -> str:
+    return _LANGUAGE_NAMES.get((code or "en").lower(), code or "English")
+
+
+_CLASS_GUIDANCE: dict[ContentClass, str] = {
+    ContentClass.NOTE: (
+        "A personal note file the persona keeps on their dev box.  "
+        "2–6 short lines.  Mix of TODOs, half-formed thoughts, "
+        "shorthand reminders.  NOT a polished document.  No headers "
+        "or markdown sections."
+    ),
+    ContentClass.TODO: (
+        "A markdown TODO list the persona keeps on their dev box.  "
+        "3–8 items in `- [ ] item` / `- [x] item` form.  Some checked, "
+        "some not.  Items are short, work-flavoured, lowercase, no "
+        "prose paragraphs.  No headers.  No introductory sentence."
+    ),
+    ContentClass.DRAFT: (
+        "A short draft email or memo the persona is working on.  "
+        "2–4 short paragraphs, conversational tone.  No subject line, "
+        "no headers — this is the body in a notes file, not a sent "
+        "email.  Sign off the way the persona would in their voice."
+    ),
+    ContentClass.SCRIPT: (
+        "A short utility script the persona wrote.  Pick a plausible "
+        "interpreter (bash or python3) and start with the matching "
+        "shebang.  10–25 lines.  Real-feeling intent (a backup, a "
+        "log rotation, a cleanup).  Inline comments allowed but sparse."
+    ),
+}
+
+
+def build(
+    content_class: ContentClass,
+    persona: EmailPersona,
+) -> str:
+    """Return a prompt for one body of *content_class* by *persona*.
+
+    Output the LLM is expected to produce: *just the file body*, no
+    commentary, no markdown fences.  Caller substitutes em-dashes
+    server-side via :func:`decnet.realism.prompts._style.strip_em_dashes`
+    as a belt-and-braces guard.
+    """
+    guidance = _CLASS_GUIDANCE.get(content_class)
+    if guidance is None:
+        raise KeyError(
+            f"no filebody prompt registered for content_class={content_class!r}"
+        )
+    language = _lang_label(persona.language or "en")
+    return (
+        f"You are writing one short file the persona below would "
+        f"plausibly keep on their dev box.\n\n"
+        f"Persona:\n"
+        f"- Name: {persona.name}\n"
+        f"- Role: {persona.role}\n"
+        f"- Tone: {persona.tone_custom if persona.tone == 'custom' and persona.tone_custom else persona.tone}\n\n"
+        f"File class: {content_class.value}\n"
+        f"Guidance: {guidance}\n\n"
+        f"Hard rules:\n"
+        f"1. Write the file body in {language}. Do not translate or code-switch.\n"
+        f"2. {em_dash_rule(persona)}\n"
+        f"3. Output ONLY the file body. No commentary, no markdown "
+        f"   fences, no preamble like 'Here is the file:'.\n"
+    ).strip()
--- a/decnet/realism/taxonomy.py
+++ b/decnet/realism/taxonomy.py
@@ -0,0 +1,150 @@
+"""Content classes and the :class:`Plan` dataclass.
+
+The planner emits :class:`Plan` instances; drivers consume them.  Every
+planted artifact (inert noise file, email, callback-bearing canary)
+maps to exactly one :class:`ContentClass` member, which is what the
+realism engine uses to dispatch to the right namer / body generator /
+prompt template.
+
+Categories:
+
+* **User content** (LLM-eligible): ``note``, ``todo``, ``draft``,
+  ``script``.  Created by humans on workstations; LLM enrichment makes
+  them feel lived-in.
+* **System content** (deterministic only): ``log_cron``, ``log_daemon``,
+  ``cache_tmp``.  These are *supposed* to look formulaic — that's how
+  cron/journald actually write them.  LLM here would harm realism.
+* **Email** (LLM-eligible): one persona writing to another.  Owned by
+  the email driver, not the file driver.
+* **Canary** (deterministic, callback-bearing): one ``canary_*`` member
+  per :mod:`decnet.canary.factory.KNOWN_GENERATORS` entry.  Picked
+  rarely and rate-limited per-decky by the planner.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import StrEnum
+from typing import Literal, Optional
+
+
+class ContentClass(StrEnum):
+    """The kind of artifact a planner has decided to produce.
+
+    Values are stable over the wire — they're persisted on
+    ``synthetic_files.content_class`` and used as bus-event discriminants
+    so renaming a member is a schema change.  Add new members at the
+    bottom; never reorder.
+    """
+
+    # User-generated, LLM-enrichable
+    NOTE = "note"
+    TODO = "todo"
+    DRAFT = "draft"
+    SCRIPT = "script"
+
+    # System-generated, template-only (LLM would harm realism)
+    LOG_CRON = "log_cron"
+    LOG_DAEMON = "log_daemon"
+    CACHE_TMP = "cache_tmp"
+
+    # Email — owned by the email driver, planner picks the action shape
+    EMAIL = "email"
+
+    # Callback-bearing — provided by decnet.canary.cultivator at
+    # dispatch time, not by realism.bodies.  One member per generator
+    # in decnet.canary.factory.KNOWN_GENERATORS.
+    CANARY_AWS_CREDS = "canary_aws_creds"
+    CANARY_ENV_FILE = "canary_env_file"
+    CANARY_GIT_CONFIG = "canary_git_config"
+    CANARY_SSH_KEY = "canary_ssh_key"
+    CANARY_HONEYDOC = "canary_honeydoc"
+    CANARY_HONEYDOC_DOCX = "canary_honeydoc_docx"
+    CANARY_HONEYDOC_PDF = "canary_honeydoc_pdf"
+    CANARY_MYSQL_DUMP = "canary_mysql_dump"
+
+    def is_canary(self) -> bool:
+        return self.value.startswith("canary_")
+
+    def is_user_class(self) -> bool:
+        return self in (
+            ContentClass.NOTE,
+            ContentClass.TODO,
+            ContentClass.DRAFT,
+            ContentClass.SCRIPT,
+        )
+
+    def is_system_class(self) -> bool:
+        return self in (
+            ContentClass.LOG_CRON,
+            ContentClass.LOG_DAEMON,
+            ContentClass.CACHE_TMP,
+        )
+
+
+PlanAction = Literal["create", "edit", "rotate"]
+
+
+@dataclass(frozen=True)
+class Plan:
+    """One realism decision: what to do, where, as whom, when.
+
+    Frozen so the planner can return the same instance to multiple
+    consumers (e.g. orchestrator dispatcher + canary cultivator) without
+    them stomping each other's view of the schedule.
+
+    Attributes
+    ----------
+    decky_uuid, decky_name :
+        Target decky.  Both carried so drivers don't need a repo
+        round-trip to map UUID → container name.
+    persona :
+        Persona name (``EmailPersona.name``) — this is the user the
+        action is "performed by."  Sampled from the topology's persona
+        pool at plan time.
+    content_class :
+        :class:`ContentClass` member.  Drives namer/body dispatch.
+    action :
+        ``"create"`` mints a new artifact; ``"edit"`` mutates a
+        previously-planted one (read-modify-write — requires
+        :attr:`previous_body`); ``"rotate"`` is the log-rotation shape
+        (``cron.log`` → ``cron.log.1``).
+    target_path :
+        Absolute container-side path the driver should write.  Already
+        persona-aware (e.g. ``/home/admin/TODO.md`` not
+        ``/home/{user}/TODO.md``).
+    mtime :
+        Backdated wall-clock the driver should ``touch -d`` after
+        writing.  Sampled by :func:`decnet.realism.diurnal.sample_mtime`
+        so files don't all stamp at the moment they were created.
+    body_hint :
+        Deterministic body the engine has *already* committed to.  LLM
+        enrichment, when enabled, may replace it but on timeout/failure
+        the driver falls back to this — so the tick never blocks
+        unboundedly.
+    previous_body :
+        Required for ``action="edit"``.  The bytes the driver read back
+        from the decky before mutating; passed to
+        :func:`decnet.realism.bodies.next_iteration`.
+    """
+
+    decky_uuid: str
+    decky_name: str
+    persona: str
+    content_class: ContentClass
+    action: PlanAction
+    target_path: str
+    mtime: datetime
+    body_hint: Optional[str] = None
+    previous_body: Optional[str] = None
+    notes: tuple[str, ...] = field(default_factory=tuple)
+
+    def __post_init__(self) -> None:
+        if self.action == "edit" and self.previous_body is None:
+            # Belt-and-braces: the planner produced an edit Plan without
+            # the prior body. The driver would either have to make a
+            # second docker exec to re-read or silently degrade to
+            # create. Both bad. Fail loudly at construction.
+            raise ValueError(
+                "Plan.action='edit' requires previous_body; got None"
+            )