feat(realism): LLM enrichment for user-class file bodies

Stage 6 of the realism migration. User-class file bodies (note, todo, draft, script) optionally get LLM-authored content; system classes (cron / daemon logs, /tmp caches) stay template-only because formulaic *is* the right look for them. New surface: - realism.llm.circuit.LLMCircuitBreaker — process-local sliding-window breaker. 3 consecutive failures trip open; 60s cooldown to half-open; half-open success closes, failure re-opens. Protects the orchestrator tick from sustained Ollama wedges (per-call timeout already covers one-shot hangs). - realism.prompts._style — em-dash suppression lifted from the email prompt. Persona.uses_llms_heavily opts out per the feedback_em_dash_llm_tell.md memory. Includes strip_em_dashes belt-and-braces sub for output that slipped past the prompt rule. - realism.prompts.filebody — class-conditioned prompts (note / todo / draft / script) with persona context, language pinning, output shape rule. - realism.bodies.make_body_with_llm — async wrapper around make_body that calls the LLM when one is provided AND the breaker allows. Falls back to template on timeout / error / empty / system-class. Wiring: - scheduler.pick_file accepts optional llm + llm_breaker + llm_timeout. When the planner picks a create action and the content_class is a user-class, the body_hint is replaced with the LLM-authored body (or falls back to the deterministic body_hint). - orchestrator.worker constructs get_llm() at startup gated by DECNET_REALISM_LLM env var (any non-empty value enables; empty / "off" / "none" / "0" disables). Passes llm + breaker through every tick. - decnet orchestrate gains --llm/--no-llm flag overriding the env var.
2026-04-27 16:42:58 -04:00
parent b321e29002
commit 4e436da569
9 changed files with 625 additions and 11 deletions
--- a/decnet/orchestrator/worker.py
+++ b/decnet/orchestrator/worker.py
@@ -20,8 +20,10 @@ from __future__ import annotations
 import asyncio
 import contextlib
 import hashlib
+import os
 import secrets
 from datetime import datetime, timezone
+from typing import Any, Optional

 from decnet.bus.factory import get_bus
 from decnet.bus.publish import (
@@ -37,6 +39,7 @@ from decnet.orchestrator.emailgen import (
    scheduler as email_scheduler,
 )
 from decnet.orchestrator.emailgen.scheduler import EmailAction
+from decnet.realism.llm.circuit import LLMCircuitBreaker
 from decnet.web.db.repository import BaseRepository

 logger = get_logger("orchestrator")
@@ -65,14 +68,43 @@ async def orchestrator_worker(
    repo: BaseRepository,
    *,
    interval: int = 60,
+    llm_enabled: Optional[bool] = None,
 ) -> None:
    """Periodically inject synthetic activity into the running fleet.

    Runs as a long-lived asyncio task.  Honours the bus control topic
    (``system.orchestrator.control``) for graceful shutdown.
+
+    LLM enrichment for user-class file bodies is opt-in via the
+    ``DECNET_REALISM_LLM`` env var (set to ``ollama`` / ``fake`` /
+    empty).  Pass ``llm_enabled=False`` from the CLI to override
+    (``decnet orchestrate --no-llm``).  When the LLM is unreachable
+    or wedged, a process-local circuit breaker
+    (:class:`LLMCircuitBreaker`) trips after 3 consecutive failures
+    and the worker falls back to deterministic templates for 60
+    seconds before re-probing.
    """
    logger.info("orchestrator worker started interval=%ds", interval)

+    llm: Any = None
+    breaker: Optional[LLMCircuitBreaker] = None
+    if _llm_should_enable(llm_enabled):
+        try:
+            from decnet.realism.llm import get_llm
+            llm = get_llm()
+            breaker = LLMCircuitBreaker()
+            logger.info(
+                "orchestrator: LLM enrichment enabled backend=%s model=%s",
+                os.environ.get("DECNET_REALISM_LLM", "ollama"),
+                getattr(llm, "model", "?"),
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning(
+                "orchestrator: LLM init failed, continuing without "
+                "enrichment: %s", exc,
+            )
+            llm = None
+
    bus = None
    try:
        bus = get_bus(client_name="orchestrator")
@@ -98,7 +130,7 @@ async def orchestrator_worker(
            if shutdown.is_set():
                break
            try:
-                await _one_tick(repo, bus)
+                await _one_tick(repo, bus, llm=llm, breaker=breaker)
            except Exception as exc:  # noqa: BLE001
                logger.error("orchestrator tick failed: %s", exc)
            tick_n += 1
@@ -148,10 +180,29 @@ def _roll_action_kind(rng: secrets.SystemRandom) -> str:
    return _ACTION_WEIGHTS[-1][0]  # unreachable, satisfy mypy


+def _llm_should_enable(explicit: Optional[bool]) -> bool:
+    """Resolve the LLM-enabled flag from CLI / env / defaults.
+
+    *explicit* takes precedence (``--llm`` / ``--no-llm``).  When unset,
+    the env var ``DECNET_REALISM_LLM`` decides: any non-empty value
+    (``ollama`` / ``fake`` / etc.) enables; empty string or ``off`` /
+    ``none`` / ``0`` / ``false`` disables.
+    """
+    if explicit is not None:
+        return explicit
+    raw = os.environ.get("DECNET_REALISM_LLM", "").strip().lower()
+    if raw in ("", "off", "none", "0", "false", "disabled"):
+        return False
+    return True
+
+
 async def _pick_action(
    repo: BaseRepository,
    deckies: list[dict],
    rng: secrets.SystemRandom,
+    *,
+    llm: Any = None,
+    breaker: Optional[LLMCircuitBreaker] = None,
 ):
    """Roll an action-kind, then pick the matching action.

@@ -168,7 +219,10 @@ async def _pick_action(
        if kind == "traffic":
            action = scheduler.pick(deckies, rand=rng)
        elif kind == "file":
-            action = await scheduler.pick_file(deckies, repo, rand=rng)
+            action = await scheduler.pick_file(
+                deckies, repo, rand=rng,
+                llm=llm, llm_breaker=breaker,
+            )
        elif kind == "email":
            try:
                action = await email_scheduler.pick(repo, rand=rng)
@@ -182,11 +236,17 @@ async def _pick_action(
    return None


-async def _one_tick(repo: BaseRepository, bus) -> None:
+async def _one_tick(
+    repo: BaseRepository,
+    bus,
+    *,
+    llm: Any = None,
+    breaker: Optional[LLMCircuitBreaker] = None,
+) -> None:
    deckies = await repo.list_running_deckies()
    rng = secrets.SystemRandom()

-    action = await _pick_action(repo, deckies, rng)
+    action = await _pick_action(repo, deckies, rng, llm=llm, breaker=breaker)
    if action is None:
        ssh_eligible = sum(
            1 for d in deckies