feat(realism): LLM/breaker status on orchestrator heartbeat

Surfaces realism subsystem state on the existing worker heartbeat extra hook (system.orchestrator.health) — no new bus topic. Payload carries {llm_enabled, llm_backend, llm_model, llm_breaker_state}, so the dashboard's worker panel renders a live LLM badge with a colored breaker-state dot: closed (green) — LLM healthy half_open (amber) — cooldown elapsed; next call is a probe open (red) — short-circuiting to deterministic templates Heartbeat is the canonical worker self-report channel; piggybacking on extra(...) avoids a new topic family while keeping the snapshot recomputed each beat (30s).
2026-04-27 17:51:00 -04:00
parent 55e86f606c
commit 397a1a111e
3 changed files with 153 additions and 2 deletions
--- a/decnet/orchestrator/worker.py
+++ b/decnet/orchestrator/worker.py
@@ -116,7 +116,12 @@ async def orchestrator_worker(
        bus = None

    shutdown = asyncio.Event()
-    heartbeat_task = asyncio.create_task(run_health_heartbeat(bus, "orchestrator"))
+    heartbeat_task = asyncio.create_task(
+        run_health_heartbeat(
+            bus, "orchestrator",
+            extra=lambda: {"realism": _realism_health_snapshot(llm, breaker)},
+        )
+    )
    control_task = asyncio.create_task(
        run_control_listener(bus, "orchestrator", shutdown),
    )
@@ -180,6 +185,35 @@ def _roll_action_kind(rng: secrets.SystemRandom) -> str:
    return _ACTION_WEIGHTS[-1][0]  # unreachable, satisfy mypy


+def _realism_health_snapshot(
+    llm: Any, breaker: Optional[LLMCircuitBreaker],
+) -> dict[str, Any]:
+    """Snapshot of the orchestrator's realism subsystem for the
+    heartbeat ``extra`` payload.
+
+    Surfaces the LLM backend / model / circuit-breaker state so the
+    dashboard can render a status badge without reaching into worker
+    process memory. Read-only — the heartbeat ticks every 30s; this
+    snapshot is recomputed each tick.
+
+    When LLM is disabled (``llm is None``) the snapshot still
+    returns a dict so consumers can branch on ``llm_enabled`` alone.
+    """
+    if llm is None:
+        return {
+            "llm_enabled": False,
+            "llm_backend": None,
+            "llm_model": None,
+            "llm_breaker_state": None,
+        }
+    return {
+        "llm_enabled": True,
+        "llm_backend": os.environ.get("DECNET_REALISM_LLM", "ollama"),
+        "llm_model": getattr(llm, "model", None),
+        "llm_breaker_state": breaker.state if breaker is not None else None,
+    }
+
+
 def _llm_should_enable(explicit: Optional[bool]) -> bool:
    """Resolve the LLM-enabled flag from CLI / env / defaults.