feat(realism): LLM/breaker status on orchestrator heartbeat

Surfaces realism subsystem state on the existing worker heartbeat
extra hook (system.orchestrator.health) — no new bus topic. Payload
carries {llm_enabled, llm_backend, llm_model, llm_breaker_state}, so
the dashboard's worker panel renders a live LLM badge with a colored
breaker-state dot:

  closed (green)   — LLM healthy
  half_open (amber) — cooldown elapsed; next call is a probe
  open (red)       — short-circuiting to deterministic templates

Heartbeat is the canonical worker self-report channel; piggybacking on
extra(...) avoids a new topic family while keeping the snapshot
recomputed each beat (30s).
This commit is contained in:
2026-04-27 17:51:00 -04:00
parent 55e86f606c
commit 397a1a111e
3 changed files with 153 additions and 2 deletions

View File

@@ -116,7 +116,12 @@ async def orchestrator_worker(
bus = None
shutdown = asyncio.Event()
heartbeat_task = asyncio.create_task(run_health_heartbeat(bus, "orchestrator"))
heartbeat_task = asyncio.create_task(
run_health_heartbeat(
bus, "orchestrator",
extra=lambda: {"realism": _realism_health_snapshot(llm, breaker)},
)
)
control_task = asyncio.create_task(
run_control_listener(bus, "orchestrator", shutdown),
)
@@ -180,6 +185,35 @@ def _roll_action_kind(rng: secrets.SystemRandom) -> str:
return _ACTION_WEIGHTS[-1][0] # unreachable, satisfy mypy
def _realism_health_snapshot(
llm: Any, breaker: Optional[LLMCircuitBreaker],
) -> dict[str, Any]:
"""Snapshot of the orchestrator's realism subsystem for the
heartbeat ``extra`` payload.
Surfaces the LLM backend / model / circuit-breaker state so the
dashboard can render a status badge without reaching into worker
process memory. Read-only — the heartbeat ticks every 30s; this
snapshot is recomputed each tick.
When LLM is disabled (``llm is None``) the snapshot still
returns a dict so consumers can branch on ``llm_enabled`` alone.
"""
if llm is None:
return {
"llm_enabled": False,
"llm_backend": None,
"llm_model": None,
"llm_breaker_state": None,
}
return {
"llm_enabled": True,
"llm_backend": os.environ.get("DECNET_REALISM_LLM", "ollama"),
"llm_model": getattr(llm, "model", None),
"llm_breaker_state": breaker.state if breaker is not None else None,
}
def _llm_should_enable(explicit: Optional[bool]) -> bool:
"""Resolve the LLM-enabled flag from CLI / env / defaults.