Files
DECNET/tests/bus/test_heartbeat.py
anti 5c0631e12c feat(agent,forwarder,updater): publish system.<worker>.health heartbeats (DEBT-031 workers 7-9)
All three workers now share a run_health_heartbeat helper in
decnet.bus.publish.  Each publishes system.<worker>.health on a 30s tick
with {worker, ts} plus optional per-worker extras.  Subscribers can
watch system.*.health to see every DECNET worker on a host at once.

- agent: heartbeat runs inside the FastAPI lifespan alongside the
  existing master-facing heartbeat; bus-disabled path is a no-op.
- forwarder: heartbeat task spawned at run_forwarder entry, cancelled
  in the finally block so a crashed master loop never leaks the task.
- updater: new FastAPI lifespan hosts the heartbeat.

Heartbeat helper swallows extra() failures and is cancellation-safe so
lifespan teardown never hangs on it.
2026-04-21 17:02:10 -04:00

105 lines
3.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Shared ``run_health_heartbeat`` helper (DEBT-031 workers 79).
Three workers (agent, forwarder, updater) publish identical
``system.<worker>.health`` heartbeats. Rather than copy the loop three
times, ``decnet.bus.publish.run_health_heartbeat`` carries it. These
tests pin:
* topic is ``system.<worker>.health`` via the builder;
* payload carries worker name and monotonic-ish timestamp;
* ``extra()`` hook merges per-worker fields;
* ``None`` bus yields a benign no-op loop (still cancellable);
* ``extra()`` failure doesn't break the tick.
"""
from __future__ import annotations
import asyncio
import pytest
import pytest_asyncio
from decnet.bus.fake import FakeBus
from decnet.bus.publish import run_health_heartbeat
@pytest_asyncio.fixture
async def bus() -> FakeBus:
b = FakeBus()
await b.connect()
yield b
await b.close()
@pytest.mark.asyncio
async def test_heartbeat_publishes_under_system_worker_health(bus: FakeBus) -> None:
task = asyncio.create_task(
run_health_heartbeat(bus, "agent", interval=0.05),
)
try:
sub = bus.subscribe("system.*.health")
async with sub:
event = await asyncio.wait_for(sub.__anext__(), timeout=2.0)
finally:
task.cancel()
await asyncio.gather(task, return_exceptions=True)
assert event.topic == "system.agent.health"
assert event.type == "health"
assert event.payload["worker"] == "agent"
assert isinstance(event.payload["ts"], float)
@pytest.mark.asyncio
async def test_heartbeat_merges_extra_payload(bus: FakeBus) -> None:
task = asyncio.create_task(
run_health_heartbeat(
bus, "forwarder", interval=0.05,
extra=lambda: {"offset": 4096, "connected": True},
),
)
try:
sub = bus.subscribe("system.forwarder.health")
async with sub:
event = await asyncio.wait_for(sub.__anext__(), timeout=2.0)
finally:
task.cancel()
await asyncio.gather(task, return_exceptions=True)
assert event.payload["offset"] == 4096
assert event.payload["connected"] is True
assert event.payload["worker"] == "forwarder"
@pytest.mark.asyncio
async def test_heartbeat_survives_extra_failure(bus: FakeBus) -> None:
# An extra() that blows up must not abort the heartbeat loop.
def _boom():
raise RuntimeError("extras exploded")
task = asyncio.create_task(
run_health_heartbeat(bus, "updater", interval=0.05, extra=_boom),
)
try:
sub = bus.subscribe("system.updater.health")
async with sub:
event = await asyncio.wait_for(sub.__anext__(), timeout=2.0)
finally:
task.cancel()
await asyncio.gather(task, return_exceptions=True)
# Base payload still present despite extra() blowing up.
assert event.payload["worker"] == "updater"
@pytest.mark.asyncio
async def test_heartbeat_is_cancellable_with_none_bus() -> None:
# Bus-disabled path: loop runs but publishes nothing. Must still
# cancel cleanly so lifespan teardown doesn't hang.
task = asyncio.create_task(
run_health_heartbeat(None, "agent", interval=0.01),
)
await asyncio.sleep(0.05)
task.cancel()
await asyncio.gather(task, return_exceptions=True)
assert task.done()