feat(realism): LLM enrichment for user-class file bodies

Stage 6 of the realism migration. User-class file bodies (note, todo, draft, script) optionally get LLM-authored content; system classes (cron / daemon logs, /tmp caches) stay template-only because formulaic *is* the right look for them. New surface: - realism.llm.circuit.LLMCircuitBreaker — process-local sliding-window breaker. 3 consecutive failures trip open; 60s cooldown to half-open; half-open success closes, failure re-opens. Protects the orchestrator tick from sustained Ollama wedges (per-call timeout already covers one-shot hangs). - realism.prompts._style — em-dash suppression lifted from the email prompt. Persona.uses_llms_heavily opts out per the feedback_em_dash_llm_tell.md memory. Includes strip_em_dashes belt-and-braces sub for output that slipped past the prompt rule. - realism.prompts.filebody — class-conditioned prompts (note / todo / draft / script) with persona context, language pinning, output shape rule. - realism.bodies.make_body_with_llm — async wrapper around make_body that calls the LLM when one is provided AND the breaker allows. Falls back to template on timeout / error / empty / system-class. Wiring: - scheduler.pick_file accepts optional llm + llm_breaker + llm_timeout. When the planner picks a create action and the content_class is a user-class, the body_hint is replaced with the LLM-authored body (or falls back to the deterministic body_hint). - orchestrator.worker constructs get_llm() at startup gated by DECNET_REALISM_LLM env var (any non-empty value enables; empty / "off" / "none" / "0" disables). Passes llm + breaker through every tick. - decnet orchestrate gains --llm/--no-llm flag overriding the env var.
2026-04-27 16:42:58 -04:00
parent b321e29002
commit 4e436da569
9 changed files with 625 additions and 11 deletions
--- a/decnet/realism/bodies.py
+++ b/decnet/realism/bodies.py
@@ -22,12 +22,16 @@ respectively, not from realism.bodies.
 """
 from __future__ import annotations

+import asyncio
 import secrets
 from datetime import datetime, timezone
 from typing import Callable, Optional

+from decnet.logging import get_logger
 from decnet.realism.taxonomy import ContentClass

+log = get_logger("realism.bodies")
+

 # ── User-class body generators ─────────────────────────────────────────────

@@ -220,9 +224,10 @@ def make_body(
 ) -> str:
    """Return deterministic body bytes (utf-8 string) for *content_class*.

-    Stage 3 ships templates only; stage 6 adds an optional
-    ``LLMBackend`` parameter that, when supplied and the breaker is
-    closed, replaces the template return for user-classes.
+    Stage 3 ships templates only.  :func:`make_body_with_llm` is the
+    LLM-aware variant added in stage 6 — kept on a separate name so
+    the deterministic path stays trivially callable from tests and
+    from the LLM fallback itself.
    """
    rng = rand or secrets.SystemRandom()
    gen = _BODIES.get(content_class)
@@ -233,6 +238,72 @@ def make_body(
    return gen(persona, rng)


+async def make_body_with_llm(
+    content_class: ContentClass,
+    persona,  # EmailPersona — typed loosely to avoid an import cycle
+    *,
+    llm=None,  # LLMBackend | None
+    breaker=None,  # LLMCircuitBreaker | None
+    timeout: float = 60.0,
+    rand: Optional[secrets.SystemRandom] = None,
+) -> str:
+    """LLM-enriched body for user-classes; deterministic fallback otherwise.
+
+    Falls back to :func:`make_body` whenever:
+
+    * ``llm`` is None,
+    * ``breaker.allow_call()`` returns False (sustained failure),
+    * the LLM call times out or returns empty,
+    * the content class isn't a user-class (system-class content
+      should look formulaic, so we never invoke LLM there).
+
+    Em-dash stripping runs on the LLM output as a belt-and-braces
+    guard (see :mod:`decnet.realism.prompts._style`).  The function
+    is async because LLM calls are; the deterministic path returns
+    immediately so the orchestrator's tick doesn't pay async overhead
+    when LLM is disabled.
+    """
+    rng = rand or secrets.SystemRandom()
+
+    # System / canary / email classes never touch the LLM.
+    if not content_class.is_user_class():
+        return make_body(content_class, persona.name, rand=rng)
+
+    if llm is None or (breaker is not None and not breaker.allow_call()):
+        return make_body(content_class, persona.name, rand=rng)
+
+    # Lazy imports keep the prompt + style modules out of the
+    # deterministic path's import graph.
+    from decnet.realism.llm.base import LLMTimeout
+    from decnet.realism.prompts import filebody as _filebody
+    from decnet.realism.prompts._style import strip_em_dashes
+
+    prompt = _filebody.build(content_class, persona)
+    try:
+        result = await asyncio.wait_for(llm.generate(prompt), timeout=timeout)
+    except (LLMTimeout, asyncio.TimeoutError):
+        log.debug("realism.bodies LLM timeout class=%s persona=%s",
+                  content_class.value, persona.name)
+        if breaker is not None:
+            breaker.record_failure()
+        return make_body(content_class, persona.name, rand=rng)
+    except Exception as exc:  # noqa: BLE001
+        log.warning("realism.bodies LLM error class=%s persona=%s: %s",
+                    content_class.value, persona.name, exc)
+        if breaker is not None:
+            breaker.record_failure()
+        return make_body(content_class, persona.name, rand=rng)
+
+    if not result.success or not result.text.strip():
+        if breaker is not None:
+            breaker.record_failure()
+        return make_body(content_class, persona.name, rand=rng)
+
+    if breaker is not None:
+        breaker.record_success()
+    return strip_em_dashes(result.text.rstrip() + "\n", persona)
+
+
 # ── Edit-in-place mutators ─────────────────────────────────────────────────
 # Stage 3b: deterministic per-class mutations.  The contract: take the
 # previous body bytes, return a plausible *next* iteration (append a
--- a/decnet/realism/llm/circuit.py
+++ b/decnet/realism/llm/circuit.py
@@ -0,0 +1,99 @@
+"""Process-local circuit breaker for LLM calls.
+
+Per-call timeouts (``asyncio.wait_for(llm.generate, timeout=...)``)
+protect a single tick from a single hung Ollama.  They do NOT protect
+the worker from a *sustained* problem: 100 consecutive 60-second
+timeouts chew up an hour of orchestrator time on dead requests before
+anything notices.
+
+This breaker watches a sliding window of recent outcomes and flips
+``open`` after ``failure_threshold`` consecutive failures.  Open
+breakers short-circuit ``allow_call`` to ``False`` so callers fall
+back to deterministic templates without the per-tick cost.  After
+``cooldown_seconds`` the breaker enters ``half_open`` and the next
+call is allowed; success closes the breaker, failure re-opens it
+with a fresh cooldown.
+
+Process-local on purpose — cross-process state would require shared
+memory and is overkill for a single orchestrator worker.
+"""
+from __future__ import annotations
+
+import threading
+import time
+from enum import Enum
+
+
+class _State(Enum):
+    CLOSED = "closed"
+    OPEN = "open"
+    HALF_OPEN = "half_open"
+
+
+class LLMCircuitBreaker:
+    """Threadsafe sliding-window circuit breaker.
+
+    Default ``failure_threshold=3`` consecutive failures → open;
+    ``cooldown_seconds=60`` of open before transitioning to
+    half-open.  These match the realism worker's tick cadence: 3
+    consecutive 60s timeouts = 3 minutes of dead air, which is the
+    point at which a deterministic fallback is overdue.
+    """
+
+    def __init__(
+        self,
+        *,
+        failure_threshold: int = 3,
+        cooldown_seconds: float = 60.0,
+        clock=time.monotonic,
+    ) -> None:
+        self._failure_threshold = failure_threshold
+        self._cooldown = cooldown_seconds
+        self._clock = clock
+        self._lock = threading.Lock()
+        self._state = _State.CLOSED
+        self._consecutive_failures = 0
+        self._opened_at: float = 0.0
+
+    @property
+    def state(self) -> str:
+        with self._lock:
+            return self._state.value
+
+    def allow_call(self) -> bool:
+        """Return True if the next call should run, False if it should
+        short-circuit to the fallback path.
+
+        Promotes ``open`` → ``half_open`` after the cooldown elapses
+        so the next caller acts as a probe.
+        """
+        with self._lock:
+            if self._state == _State.CLOSED:
+                return True
+            if self._state == _State.HALF_OPEN:
+                return True
+            # OPEN: check cooldown.
+            if self._clock() - self._opened_at >= self._cooldown:
+                self._state = _State.HALF_OPEN
+                return True
+            return False
+
+    def record_success(self) -> None:
+        with self._lock:
+            self._state = _State.CLOSED
+            self._consecutive_failures = 0
+            self._opened_at = 0.0
+
+    def record_failure(self) -> None:
+        with self._lock:
+            if self._state == _State.HALF_OPEN:
+                # The probe call failed — re-open with a fresh cooldown.
+                self._state = _State.OPEN
+                self._opened_at = self._clock()
+                # Don't reset the failure count; the probe failure
+                # implies the underlying issue is unresolved.
+                return
+            self._consecutive_failures += 1
+            if self._consecutive_failures >= self._failure_threshold:
+                self._state = _State.OPEN
+                self._opened_at = self._clock()
--- a/decnet/realism/prompts/_style.py
+++ b/decnet/realism/prompts/_style.py
@@ -0,0 +1,39 @@
+"""Shared stylometric guards for LLM-bound prompts.
+
+Lifted from the original ``orchestrator.emailgen.prompt`` em-dash
+block so file-class prompts (note / todo / draft / script bodies)
+pick up the same suppression.  Per the
+``feedback_em_dash_llm_tell.md`` memory: em-dashes (—) are a strong
+LLM-authorship tell, suppress by default; allow only for personas
+explicitly opted in via ``EmailPersona.uses_llms_heavily``.
+"""
+from __future__ import annotations
+
+from decnet.realism.personas import EmailPersona
+
+
+_SUPPRESS_RULE = (
+    "Do NOT use em-dashes (—). Use commas, periods, or "
+    "parentheses instead. Em-dashes are a tell."
+)
+_ALLOW_RULE = (
+    "Em-dashes are fine — this persona uses them naturally. "
+    "Write in your usual style."
+)
+
+
+def em_dash_rule(persona: EmailPersona) -> str:
+    """Return the em-dash instruction line for *persona*'s prompt."""
+    if persona.uses_llms_heavily:
+        return _ALLOW_RULE
+    return _SUPPRESS_RULE
+
+
+def strip_em_dashes(text: str, persona: EmailPersona) -> str:
+    """Belt-and-braces: even with the prompt rule, small models leak
+    em-dashes occasionally.  Substitute with comma+space so the
+    output reads naturally; opt-in personas pass through unchanged.
+    """
+    if persona.uses_llms_heavily:
+        return text
+    return text.replace("—", ", ").replace("–", ", ")
--- a/decnet/realism/prompts/filebody.py
+++ b/decnet/realism/prompts/filebody.py
@@ -0,0 +1,91 @@
+"""Class-conditioned prompt builder for user-class file bodies.
+
+Stage 6 of the realism migration.  Only user-classes (``note``,
+``todo``, ``draft``, ``script``) get LLM enrichment — system-class
+content (cron logs, daemon logs, /tmp caches) is *supposed* to look
+formulaic, and an LLM-authored cron log is more suspicious than a
+templated one.
+
+The prompt asks for *short* output (LLM-authored ten-page essays in
+``~/notes.txt`` are an instant tell) and pins the exit shape so the
+worker doesn't need to scrape boilerplate.  Em-dash suppression
+flows through :mod:`decnet.realism.prompts._style`.
+"""
+from __future__ import annotations
+
+from decnet.realism.personas import EmailPersona
+from decnet.realism.prompts._style import em_dash_rule
+from decnet.realism.taxonomy import ContentClass
+
+
+_LANGUAGE_NAMES = {
+    "en": "English", "es": "Spanish", "pt": "Portuguese",
+    "fr": "French", "de": "German", "it": "Italian",
+    "nl": "Dutch", "ja": "Japanese", "zh": "Chinese",
+}
+
+
+def _lang_label(code: str) -> str:
+    return _LANGUAGE_NAMES.get((code or "en").lower(), code or "English")
+
+
+_CLASS_GUIDANCE: dict[ContentClass, str] = {
+    ContentClass.NOTE: (
+        "A personal note file the persona keeps on their dev box.  "
+        "2–6 short lines.  Mix of TODOs, half-formed thoughts, "
+        "shorthand reminders.  NOT a polished document.  No headers "
+        "or markdown sections."
+    ),
+    ContentClass.TODO: (
+        "A markdown TODO list the persona keeps on their dev box.  "
+        "3–8 items in `- [ ] item` / `- [x] item` form.  Some checked, "
+        "some not.  Items are short, work-flavoured, lowercase, no "
+        "prose paragraphs.  No headers.  No introductory sentence."
+    ),
+    ContentClass.DRAFT: (
+        "A short draft email or memo the persona is working on.  "
+        "2–4 short paragraphs, conversational tone.  No subject line, "
+        "no headers — this is the body in a notes file, not a sent "
+        "email.  Sign off the way the persona would in their voice."
+    ),
+    ContentClass.SCRIPT: (
+        "A short utility script the persona wrote.  Pick a plausible "
+        "interpreter (bash or python3) and start with the matching "
+        "shebang.  10–25 lines.  Real-feeling intent (a backup, a "
+        "log rotation, a cleanup).  Inline comments allowed but sparse."
+    ),
+}
+
+
+def build(
+    content_class: ContentClass,
+    persona: EmailPersona,
+) -> str:
+    """Return a prompt for one body of *content_class* by *persona*.
+
+    Output the LLM is expected to produce: *just the file body*, no
+    commentary, no markdown fences.  Caller substitutes em-dashes
+    server-side via :func:`decnet.realism.prompts._style.strip_em_dashes`
+    as a belt-and-braces guard.
+    """
+    guidance = _CLASS_GUIDANCE.get(content_class)
+    if guidance is None:
+        raise KeyError(
+            f"no filebody prompt registered for content_class={content_class!r}"
+        )
+    language = _lang_label(persona.language or "en")
+    return (
+        f"You are writing one short file the persona below would "
+        f"plausibly keep on their dev box.\n\n"
+        f"Persona:\n"
+        f"- Name: {persona.name}\n"
+        f"- Role: {persona.role}\n"
+        f"- Tone: {persona.tone_custom if persona.tone == 'custom' and persona.tone_custom else persona.tone}\n\n"
+        f"File class: {content_class.value}\n"
+        f"Guidance: {guidance}\n\n"
+        f"Hard rules:\n"
+        f"1. Write the file body in {language}. Do not translate or code-switch.\n"
+        f"2. {em_dash_rule(persona)}\n"
+        f"3. Output ONLY the file body. No commentary, no markdown "
+        f"   fences, no preamble like 'Here is the file:'.\n"
+    ).strip()