feat(realism): LLM/breaker status on orchestrator heartbeat

Surfaces realism subsystem state on the existing worker heartbeat
extra hook (system.orchestrator.health) — no new bus topic. Payload
carries {llm_enabled, llm_backend, llm_model, llm_breaker_state}, so
the dashboard's worker panel renders a live LLM badge with a colored
breaker-state dot:

  closed (green)   — LLM healthy
  half_open (amber) — cooldown elapsed; next call is a probe
  open (red)       — short-circuiting to deterministic templates

Heartbeat is the canonical worker self-report channel; piggybacking on
extra(...) avoids a new topic family while keeping the snapshot
recomputed each beat (30s).
This commit is contained in:
2026-04-27 17:51:00 -04:00
parent 55e86f606c
commit 397a1a111e
3 changed files with 153 additions and 2 deletions

View File

@@ -116,7 +116,12 @@ async def orchestrator_worker(
bus = None
shutdown = asyncio.Event()
heartbeat_task = asyncio.create_task(run_health_heartbeat(bus, "orchestrator"))
heartbeat_task = asyncio.create_task(
run_health_heartbeat(
bus, "orchestrator",
extra=lambda: {"realism": _realism_health_snapshot(llm, breaker)},
)
)
control_task = asyncio.create_task(
run_control_listener(bus, "orchestrator", shutdown),
)
@@ -180,6 +185,35 @@ def _roll_action_kind(rng: secrets.SystemRandom) -> str:
return _ACTION_WEIGHTS[-1][0] # unreachable, satisfy mypy
def _realism_health_snapshot(
llm: Any, breaker: Optional[LLMCircuitBreaker],
) -> dict[str, Any]:
"""Snapshot of the orchestrator's realism subsystem for the
heartbeat ``extra`` payload.
Surfaces the LLM backend / model / circuit-breaker state so the
dashboard can render a status badge without reaching into worker
process memory. Read-only — the heartbeat ticks every 30s; this
snapshot is recomputed each tick.
When LLM is disabled (``llm is None``) the snapshot still
returns a dict so consumers can branch on ``llm_enabled`` alone.
"""
if llm is None:
return {
"llm_enabled": False,
"llm_backend": None,
"llm_model": None,
"llm_breaker_state": None,
}
return {
"llm_enabled": True,
"llm_backend": os.environ.get("DECNET_REALISM_LLM", "ollama"),
"llm_model": getattr(llm, "model", None),
"llm_breaker_state": breaker.state if breaker is not None else None,
}
def _llm_should_enable(explicit: Optional[bool]) -> bool:
"""Resolve the LLM-enabled flag from CLI / env / defaults.

View File

@@ -601,6 +601,54 @@ interface WorkersPanelProps {
pushToast: ReturnType<typeof useToast>['push'];
}
// Renders the LLM status of a realism-emitting worker (today: orchestrator).
// Sourced from the heartbeat ``extra.realism`` payload published by
// :func:`decnet.orchestrator.worker._realism_health_snapshot`.
const RealismBadge: React.FC<{
realism: {
llm_enabled?: boolean;
llm_backend?: string | null;
llm_model?: string | null;
llm_breaker_state?: 'closed' | 'open' | 'half_open' | null;
};
}> = ({ realism }) => {
if (!realism.llm_enabled) {
return (
<span
className="chip dim-chip"
style={{ marginLeft: 8 }}
title="LLM enrichment disabled (DECNET_REALISM_LLM unset or --no-llm)"
>
LLM OFF
</span>
);
}
const breaker = realism.llm_breaker_state ?? 'closed';
const breakerColor =
breaker === 'open' ? '#ff5555'
: breaker === 'half_open' ? '#ffaa00'
: 'var(--matrix)';
const tooltip = [
`Backend: ${realism.llm_backend ?? '?'}`,
realism.llm_model ? `Model: ${realism.llm_model}` : null,
`Circuit breaker: ${breaker}`,
].filter(Boolean).join('\n');
return (
<span
className="chip dim-chip"
style={{ marginLeft: 8, display: 'inline-flex', alignItems: 'center', gap: 4 }}
title={tooltip}
>
<span style={{
display: 'inline-block', width: 6, height: 6, borderRadius: '50%',
backgroundColor: breakerColor,
}} />
LLM {(realism.llm_backend ?? 'on').toUpperCase()}
</span>
);
};
const WorkersPanel: React.FC<WorkersPanelProps> = ({ pushToast }) => {
const [workers, setWorkers] = useState<WorkerStatusRow[] | null>(null);
const [busConnected, setBusConnected] = useState<boolean | null>(null);
@@ -833,10 +881,21 @@ const WorkersPanel: React.FC<WorkersPanelProps> = ({ pushToast }) => {
{workers.map((w) => {
const isStopping = !!stopping[w.name];
const canStop = w.status === 'ok' && !isStopping && !busOffline;
const realism = (w.extra && (w.extra as any).realism) as
| {
llm_enabled?: boolean;
llm_backend?: string | null;
llm_model?: string | null;
llm_breaker_state?: 'closed' | 'open' | 'half_open' | null;
}
| undefined;
return (
<tr key={w.name}>
<td><span className={dotClass(w.status)} /></td>
<td style={{ fontWeight: 700, letterSpacing: 1 }}>{w.name.toUpperCase()}</td>
<td style={{ fontWeight: 700, letterSpacing: 1 }}>
{w.name.toUpperCase()}
{realism && <RealismBadge realism={realism} />}
</td>
<td style={{
color: w.status === 'ok' ? 'var(--matrix)'
: w.status === 'stale' ? '#ffaa00'

View File

@@ -0,0 +1,58 @@
"""LLM status surfaces in the orchestrator's heartbeat ``extra``.
Exposes the realism subsystem's LLM backend / model / circuit-breaker
state so the dashboard can render a status badge without poking
worker process memory.
Pinned by `feedback_push_principled_answer.md`: heartbeat is the
canonical worker self-report channel, so this rides the existing
``run_health_heartbeat(extra=...)`` extension hook rather than carving
a new bus topic.
"""
from __future__ import annotations
from decnet.orchestrator.worker import _realism_health_snapshot
from decnet.realism.llm.circuit import LLMCircuitBreaker
class _FakeLLM:
model = "llama3.1:8b"
def test_snapshot_reports_disabled_when_no_llm():
snap = _realism_health_snapshot(llm=None, breaker=None)
assert snap == {
"llm_enabled": False,
"llm_backend": None,
"llm_model": None,
"llm_breaker_state": None,
}
def test_snapshot_carries_backend_model_breaker_state(monkeypatch):
monkeypatch.setenv("DECNET_REALISM_LLM", "ollama")
breaker = LLMCircuitBreaker(failure_threshold=2, cooldown_seconds=1.0)
snap = _realism_health_snapshot(llm=_FakeLLM(), breaker=breaker)
assert snap["llm_enabled"] is True
assert snap["llm_backend"] == "ollama"
assert snap["llm_model"] == "llama3.1:8b"
assert snap["llm_breaker_state"] == "closed"
def test_snapshot_reflects_open_breaker(monkeypatch):
monkeypatch.setenv("DECNET_REALISM_LLM", "ollama")
breaker = LLMCircuitBreaker(failure_threshold=2, cooldown_seconds=60.0)
breaker.record_failure()
breaker.record_failure()
snap = _realism_health_snapshot(llm=_FakeLLM(), breaker=breaker)
assert snap["llm_breaker_state"] == "open"
def test_snapshot_handles_llm_without_breaker(monkeypatch):
"""Defensive: if init left ``breaker=None`` for any reason, the
snapshot still publishes — just without breaker state."""
monkeypatch.setenv("DECNET_REALISM_LLM", "fake")
snap = _realism_health_snapshot(llm=_FakeLLM(), breaker=None)
assert snap["llm_enabled"] is True
assert snap["llm_backend"] == "fake"
assert snap["llm_breaker_state"] is None