feat(realism): LLM/breaker status on orchestrator heartbeat
Surfaces realism subsystem state on the existing worker heartbeat
extra hook (system.orchestrator.health) — no new bus topic. Payload
carries {llm_enabled, llm_backend, llm_model, llm_breaker_state}, so
the dashboard's worker panel renders a live LLM badge with a colored
breaker-state dot:
closed (green) — LLM healthy
half_open (amber) — cooldown elapsed; next call is a probe
open (red) — short-circuiting to deterministic templates
Heartbeat is the canonical worker self-report channel; piggybacking on
extra(...) avoids a new topic family while keeping the snapshot
recomputed each beat (30s).
This commit is contained in:
@@ -116,7 +116,12 @@ async def orchestrator_worker(
|
||||
bus = None
|
||||
|
||||
shutdown = asyncio.Event()
|
||||
heartbeat_task = asyncio.create_task(run_health_heartbeat(bus, "orchestrator"))
|
||||
heartbeat_task = asyncio.create_task(
|
||||
run_health_heartbeat(
|
||||
bus, "orchestrator",
|
||||
extra=lambda: {"realism": _realism_health_snapshot(llm, breaker)},
|
||||
)
|
||||
)
|
||||
control_task = asyncio.create_task(
|
||||
run_control_listener(bus, "orchestrator", shutdown),
|
||||
)
|
||||
@@ -180,6 +185,35 @@ def _roll_action_kind(rng: secrets.SystemRandom) -> str:
|
||||
return _ACTION_WEIGHTS[-1][0] # unreachable, satisfy mypy
|
||||
|
||||
|
||||
def _realism_health_snapshot(
|
||||
llm: Any, breaker: Optional[LLMCircuitBreaker],
|
||||
) -> dict[str, Any]:
|
||||
"""Snapshot of the orchestrator's realism subsystem for the
|
||||
heartbeat ``extra`` payload.
|
||||
|
||||
Surfaces the LLM backend / model / circuit-breaker state so the
|
||||
dashboard can render a status badge without reaching into worker
|
||||
process memory. Read-only — the heartbeat ticks every 30s; this
|
||||
snapshot is recomputed each tick.
|
||||
|
||||
When LLM is disabled (``llm is None``) the snapshot still
|
||||
returns a dict so consumers can branch on ``llm_enabled`` alone.
|
||||
"""
|
||||
if llm is None:
|
||||
return {
|
||||
"llm_enabled": False,
|
||||
"llm_backend": None,
|
||||
"llm_model": None,
|
||||
"llm_breaker_state": None,
|
||||
}
|
||||
return {
|
||||
"llm_enabled": True,
|
||||
"llm_backend": os.environ.get("DECNET_REALISM_LLM", "ollama"),
|
||||
"llm_model": getattr(llm, "model", None),
|
||||
"llm_breaker_state": breaker.state if breaker is not None else None,
|
||||
}
|
||||
|
||||
|
||||
def _llm_should_enable(explicit: Optional[bool]) -> bool:
|
||||
"""Resolve the LLM-enabled flag from CLI / env / defaults.
|
||||
|
||||
|
||||
@@ -601,6 +601,54 @@ interface WorkersPanelProps {
|
||||
pushToast: ReturnType<typeof useToast>['push'];
|
||||
}
|
||||
|
||||
|
||||
// Renders the LLM status of a realism-emitting worker (today: orchestrator).
|
||||
// Sourced from the heartbeat ``extra.realism`` payload published by
|
||||
// :func:`decnet.orchestrator.worker._realism_health_snapshot`.
|
||||
const RealismBadge: React.FC<{
|
||||
realism: {
|
||||
llm_enabled?: boolean;
|
||||
llm_backend?: string | null;
|
||||
llm_model?: string | null;
|
||||
llm_breaker_state?: 'closed' | 'open' | 'half_open' | null;
|
||||
};
|
||||
}> = ({ realism }) => {
|
||||
if (!realism.llm_enabled) {
|
||||
return (
|
||||
<span
|
||||
className="chip dim-chip"
|
||||
style={{ marginLeft: 8 }}
|
||||
title="LLM enrichment disabled (DECNET_REALISM_LLM unset or --no-llm)"
|
||||
>
|
||||
LLM OFF
|
||||
</span>
|
||||
);
|
||||
}
|
||||
const breaker = realism.llm_breaker_state ?? 'closed';
|
||||
const breakerColor =
|
||||
breaker === 'open' ? '#ff5555'
|
||||
: breaker === 'half_open' ? '#ffaa00'
|
||||
: 'var(--matrix)';
|
||||
const tooltip = [
|
||||
`Backend: ${realism.llm_backend ?? '?'}`,
|
||||
realism.llm_model ? `Model: ${realism.llm_model}` : null,
|
||||
`Circuit breaker: ${breaker}`,
|
||||
].filter(Boolean).join('\n');
|
||||
return (
|
||||
<span
|
||||
className="chip dim-chip"
|
||||
style={{ marginLeft: 8, display: 'inline-flex', alignItems: 'center', gap: 4 }}
|
||||
title={tooltip}
|
||||
>
|
||||
<span style={{
|
||||
display: 'inline-block', width: 6, height: 6, borderRadius: '50%',
|
||||
backgroundColor: breakerColor,
|
||||
}} />
|
||||
LLM {(realism.llm_backend ?? 'on').toUpperCase()}
|
||||
</span>
|
||||
);
|
||||
};
|
||||
|
||||
const WorkersPanel: React.FC<WorkersPanelProps> = ({ pushToast }) => {
|
||||
const [workers, setWorkers] = useState<WorkerStatusRow[] | null>(null);
|
||||
const [busConnected, setBusConnected] = useState<boolean | null>(null);
|
||||
@@ -833,10 +881,21 @@ const WorkersPanel: React.FC<WorkersPanelProps> = ({ pushToast }) => {
|
||||
{workers.map((w) => {
|
||||
const isStopping = !!stopping[w.name];
|
||||
const canStop = w.status === 'ok' && !isStopping && !busOffline;
|
||||
const realism = (w.extra && (w.extra as any).realism) as
|
||||
| {
|
||||
llm_enabled?: boolean;
|
||||
llm_backend?: string | null;
|
||||
llm_model?: string | null;
|
||||
llm_breaker_state?: 'closed' | 'open' | 'half_open' | null;
|
||||
}
|
||||
| undefined;
|
||||
return (
|
||||
<tr key={w.name}>
|
||||
<td><span className={dotClass(w.status)} /></td>
|
||||
<td style={{ fontWeight: 700, letterSpacing: 1 }}>{w.name.toUpperCase()}</td>
|
||||
<td style={{ fontWeight: 700, letterSpacing: 1 }}>
|
||||
{w.name.toUpperCase()}
|
||||
{realism && <RealismBadge realism={realism} />}
|
||||
</td>
|
||||
<td style={{
|
||||
color: w.status === 'ok' ? 'var(--matrix)'
|
||||
: w.status === 'stale' ? '#ffaa00'
|
||||
|
||||
58
tests/orchestrator/test_realism_health_snapshot.py
Normal file
58
tests/orchestrator/test_realism_health_snapshot.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""LLM status surfaces in the orchestrator's heartbeat ``extra``.
|
||||
|
||||
Exposes the realism subsystem's LLM backend / model / circuit-breaker
|
||||
state so the dashboard can render a status badge without poking
|
||||
worker process memory.
|
||||
|
||||
Pinned by `feedback_push_principled_answer.md`: heartbeat is the
|
||||
canonical worker self-report channel, so this rides the existing
|
||||
``run_health_heartbeat(extra=...)`` extension hook rather than carving
|
||||
a new bus topic.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.orchestrator.worker import _realism_health_snapshot
|
||||
from decnet.realism.llm.circuit import LLMCircuitBreaker
|
||||
|
||||
|
||||
class _FakeLLM:
|
||||
model = "llama3.1:8b"
|
||||
|
||||
|
||||
def test_snapshot_reports_disabled_when_no_llm():
|
||||
snap = _realism_health_snapshot(llm=None, breaker=None)
|
||||
assert snap == {
|
||||
"llm_enabled": False,
|
||||
"llm_backend": None,
|
||||
"llm_model": None,
|
||||
"llm_breaker_state": None,
|
||||
}
|
||||
|
||||
|
||||
def test_snapshot_carries_backend_model_breaker_state(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_REALISM_LLM", "ollama")
|
||||
breaker = LLMCircuitBreaker(failure_threshold=2, cooldown_seconds=1.0)
|
||||
snap = _realism_health_snapshot(llm=_FakeLLM(), breaker=breaker)
|
||||
assert snap["llm_enabled"] is True
|
||||
assert snap["llm_backend"] == "ollama"
|
||||
assert snap["llm_model"] == "llama3.1:8b"
|
||||
assert snap["llm_breaker_state"] == "closed"
|
||||
|
||||
|
||||
def test_snapshot_reflects_open_breaker(monkeypatch):
|
||||
monkeypatch.setenv("DECNET_REALISM_LLM", "ollama")
|
||||
breaker = LLMCircuitBreaker(failure_threshold=2, cooldown_seconds=60.0)
|
||||
breaker.record_failure()
|
||||
breaker.record_failure()
|
||||
snap = _realism_health_snapshot(llm=_FakeLLM(), breaker=breaker)
|
||||
assert snap["llm_breaker_state"] == "open"
|
||||
|
||||
|
||||
def test_snapshot_handles_llm_without_breaker(monkeypatch):
|
||||
"""Defensive: if init left ``breaker=None`` for any reason, the
|
||||
snapshot still publishes — just without breaker state."""
|
||||
monkeypatch.setenv("DECNET_REALISM_LLM", "fake")
|
||||
snap = _realism_health_snapshot(llm=_FakeLLM(), breaker=None)
|
||||
assert snap["llm_enabled"] is True
|
||||
assert snap["llm_backend"] == "fake"
|
||||
assert snap["llm_breaker_state"] is None
|
||||
Reference in New Issue
Block a user