merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

0
tests/bus/__init__.py Normal file
View File

59
tests/bus/conftest.py Normal file
View File

@@ -0,0 +1,59 @@
"""Shared fixtures for decnet.bus tests."""
from __future__ import annotations
import asyncio
import pathlib
from typing import AsyncIterator
import pytest
import pytest_asyncio
from decnet.bus.fake import FakeBus
from decnet.bus.unix_client import UnixSocketBus
from decnet.bus.unix_server import BusServer
@pytest_asyncio.fixture
async def fake_bus() -> AsyncIterator[FakeBus]:
bus = FakeBus()
await bus.connect()
try:
yield bus
finally:
await bus.close()
@pytest_asyncio.fixture
async def unix_bus(tmp_path: pathlib.Path) -> AsyncIterator[tuple[BusServer, UnixSocketBus]]:
"""Spin a BusServer on a tmp socket, yield (server, connected client).
Teardown closes both in the right order. No privileged group chown —
the fixture passes ``group=None`` so the socket stays owned by the
test-runner's process group.
"""
sock = tmp_path / "bus.sock"
server = BusServer(sock, group=None)
await server.start()
serve_task = asyncio.create_task(server.serve_forever())
client = UnixSocketBus(sock, client_name="test-client")
await client.connect()
try:
yield server, client
finally:
await client.close()
serve_task.cancel()
try:
await serve_task
except asyncio.CancelledError:
pass
await server.close()
@pytest.fixture
def bus_env_fake(monkeypatch: pytest.MonkeyPatch) -> None:
"""Point :func:`decnet.bus.factory.get_bus` at the in-process FakeBus."""
monkeypatch.setenv("DECNET_BUS_TYPE", "fake")
monkeypatch.setenv("DECNET_BUS_ENABLED", "true")
monkeypatch.delenv("DECNET_BUS_SOCKET", raising=False)

View File

@@ -0,0 +1,135 @@
"""Tests for the process-wide app-bus singleton.
Covers the retry-with-backoff behaviour of ``get_app_bus()`` — the
regression guard against the "one-shot veto" bug where a startup race
between ``decnet bus`` and the API's lifespan poisoned the singleton
for the entire process lifetime.
"""
from __future__ import annotations
import asyncio
import time
from typing import Any
from unittest.mock import AsyncMock, MagicMock
import pytest
import decnet.bus.app as app_module
@pytest.fixture(autouse=True)
def _reset_singleton() -> Any:
"""Reset the module-level singleton state between tests."""
app_module._shared = None
app_module._last_failure_ts = 0.0
yield
app_module._shared = None
app_module._last_failure_ts = 0.0
@pytest.mark.asyncio
async def test_first_call_succeeds_when_bus_connectable(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Happy path: connect succeeds, shared instance returned thereafter."""
fake_bus = MagicMock()
fake_bus.connect = AsyncMock()
monkeypatch.setattr(app_module, "get_bus", lambda **_kw: fake_bus)
result = await app_module.get_app_bus()
assert result is fake_bus
fake_bus.connect.assert_awaited_once()
# Subsequent call returns cached instance, no second connect.
result2 = await app_module.get_app_bus()
assert result2 is fake_bus
assert fake_bus.connect.await_count == 1
@pytest.mark.asyncio
async def test_connect_failure_backoff_prevents_hot_retry(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""After a failed connect, subsequent calls within the backoff
window return None WITHOUT re-attempting connect — the cost of
failure stays bounded."""
fake_bus = MagicMock()
fake_bus.connect = AsyncMock(side_effect=ConnectionError("socket gone"))
monkeypatch.setattr(app_module, "get_bus", lambda **_kw: fake_bus)
assert await app_module.get_app_bus() is None
assert fake_bus.connect.await_count == 1
# Second immediate call: still within backoff, no retry.
assert await app_module.get_app_bus() is None
assert fake_bus.connect.await_count == 1
# Third immediate call: same thing.
assert await app_module.get_app_bus() is None
assert fake_bus.connect.await_count == 1
@pytest.mark.asyncio
async def test_connect_retried_after_backoff_expires(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Once the backoff window expires, the next call tries connect()
again. This is the regression guard for the original 'one-shot veto'
bug — the whole point of the fix."""
fake_bus = MagicMock()
# First attempt fails, second succeeds.
fake_bus.connect = AsyncMock(
side_effect=[ConnectionError("socket gone"), None]
)
monkeypatch.setattr(app_module, "get_bus", lambda **_kw: fake_bus)
assert await app_module.get_app_bus() is None
assert fake_bus.connect.await_count == 1
# Simulate the backoff window elapsing by rewinding the recorded
# failure timestamp into the past.
app_module._last_failure_ts = time.monotonic() - (app_module._RETRY_BACKOFF + 0.1)
result = await app_module.get_app_bus()
assert result is fake_bus
assert fake_bus.connect.await_count == 2
@pytest.mark.asyncio
async def test_close_app_bus_clears_backoff_window(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""close_app_bus() after a failure (or after a successful bus) must
reset _last_failure_ts so the next get_app_bus() retries immediately
— otherwise tests that bring the app-bus up/down/up in one process
would see stale backoff."""
fake_bus = MagicMock()
fake_bus.connect = AsyncMock(side_effect=ConnectionError("x"))
fake_bus.close = AsyncMock()
monkeypatch.setattr(app_module, "get_bus", lambda **_kw: fake_bus)
assert await app_module.get_app_bus() is None
assert app_module._last_failure_ts > 0.0
await app_module.close_app_bus()
assert app_module._last_failure_ts == 0.0
# Next call retries immediately (no backoff wait).
fake_bus.connect.side_effect = None # make it succeed this time
assert await app_module.get_app_bus() is fake_bus
@pytest.mark.asyncio
async def test_concurrent_callers_do_not_stampede_connect(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""The lock must serialise concurrent callers so a just-started bus
doesn't get hammered with N parallel connect attempts."""
fake_bus = MagicMock()
fake_bus.connect = AsyncMock()
monkeypatch.setattr(app_module, "get_bus", lambda **_kw: fake_bus)
results = await asyncio.gather(
*[app_module.get_app_bus() for _ in range(10)]
)
assert all(r is fake_bus for r in results)
assert fake_bus.connect.await_count == 1

66
tests/bus/test_base.py Normal file
View File

@@ -0,0 +1,66 @@
"""Unit tests for :mod:`decnet.bus.base` — wildcard matching and the Event envelope."""
from __future__ import annotations
import pytest
from decnet.bus.base import EVENT_SCHEMA_VERSION, Event, matches
class TestMatches:
@pytest.mark.parametrize("pattern,topic", [
("topology.abc.mutation.applied", "topology.abc.mutation.applied"),
("topology.*.mutation.applied", "topology.abc.mutation.applied"),
("topology.*.mutation.*", "topology.abc.mutation.applied"),
("topology.>", "topology.abc.mutation.applied"),
("topology.>", "topology.abc.status"),
("decky.*.state", "decky.xyz.state"),
("system.bus.health", "system.bus.health"),
])
def test_matches_positive(self, pattern: str, topic: str) -> None:
assert matches(pattern, topic) is True
@pytest.mark.parametrize("pattern,topic", [
("topology.abc.mutation.applied", "topology.abc.mutation.failed"),
("topology.*", "topology.abc.mutation.applied"), # * is one token
("topology.>", "topology"), # > needs ≥1 trailing
("decky.*.state", "decky.state"), # missing middle token
("decky.*.state", "decky.xyz.status"),
("a.b.c", "a.b"),
("a.b", "a.b.c"),
])
def test_matches_negative(self, pattern: str, topic: str) -> None:
assert matches(pattern, topic) is False
class TestEvent:
def test_to_dict_round_trip(self) -> None:
event = Event(topic="topology.abc.status", payload={"status": "active"}, type="status")
data = event.to_dict()
assert data["v"] == EVENT_SCHEMA_VERSION
assert data["topic"] == "topology.abc.status"
assert data["payload"] == {"status": "active"}
assert data["type"] == "status"
assert isinstance(data["id"], str)
assert isinstance(data["ts"], float)
def test_from_dict_prefers_wire_fields_but_ignores_topic(self) -> None:
# The wire topic is the authoritative one (passed from the transport);
# a malicious "topic" field in the body must be ignored.
data = {
"v": 1, "id": "abc", "type": "status",
"topic": "attacker.spoofed", # ignored
"ts": 123.0,
"payload": {"x": 1},
}
event = Event.from_dict("topology.abc.status", data)
assert event.topic == "topology.abc.status"
assert event.payload == {"x": 1}
assert event.id == "abc"
assert event.ts == 123.0
def test_from_dict_tolerates_missing_fields(self) -> None:
event = Event.from_dict("system.log", {})
assert event.topic == "system.log"
assert event.payload == {}
assert event.v == EVENT_SCHEMA_VERSION
assert event.id # auto-generated

View File

@@ -0,0 +1,103 @@
"""Tests for graceful publish-on-closed-bus behaviour.
Regression guard for the 'publish on closed bus' log flood: when a
worker's private bus closes (shutdown) but stream threads keep calling
the publish closure, the bus must not raise a RuntimeError per call.
First drop warns loudly (bus is critical infra); subsequent drops on
the same instance are DEBUG to prevent the flood.
"""
from __future__ import annotations
import asyncio
import logging
import pathlib
from unittest.mock import MagicMock
import pytest
from decnet.bus.publish import make_thread_safe_publisher
from decnet.bus.unix_client import UnixSocketBus
def _make_closed_bus() -> UnixSocketBus:
"""Build a UnixSocketBus and flip _closed without touching sockets.
We don't need a live connection to test the closed-publish path —
the guard clause short-circuits before any I/O.
"""
bus = UnixSocketBus(pathlib.Path("/tmp/does-not-matter.sock"))
bus._closed = True
return bus
@pytest.mark.asyncio
async def test_publish_on_closed_bus_returns_silently(
caplog: pytest.LogCaptureFixture,
) -> None:
"""First post-close publish warns loudly; does not raise."""
bus = _make_closed_bus()
with caplog.at_level(logging.WARNING, logger="decnet.bus.client"):
await bus.publish("system.log", {"x": 1})
assert any(
rec.levelno == logging.WARNING
and "publish on closed bus dropped" in rec.getMessage()
for rec in caplog.records
), f"expected one WARNING, got: {[(r.levelname, r.getMessage()) for r in caplog.records]}"
@pytest.mark.asyncio
async def test_subsequent_closed_publishes_downgrade_to_debug(
caplog: pytest.LogCaptureFixture,
) -> None:
"""Only the first drop warns; the next N drops are DEBUG. This is
the regression guard against the log flood."""
bus = _make_closed_bus()
with caplog.at_level(logging.DEBUG, logger="decnet.bus.client"):
for _ in range(50):
await bus.publish("system.log", {"x": 1})
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
debugs = [r for r in caplog.records if r.levelno == logging.DEBUG]
assert len(warnings) == 1, (
f"expected exactly 1 WARNING across 50 publishes, got {len(warnings)}"
)
assert len(debugs) >= 49, (
f"expected >=49 DEBUG drops, got {len(debugs)}"
)
@pytest.mark.asyncio
async def test_thread_safe_publisher_short_circuits_on_closed_bus() -> None:
"""The sync shim returned by make_thread_safe_publisher must NOT
marshal a coroutine onto the loop when the bus is already closed."""
bus = _make_closed_bus()
loop = asyncio.get_running_loop()
publisher = make_thread_safe_publisher(bus, loop)
# Patch run_coroutine_threadsafe so we can detect if the shim tries
# to marshal anything.
import decnet.bus.publish as pub_mod
called = MagicMock()
orig = asyncio.run_coroutine_threadsafe
pub_mod.asyncio.run_coroutine_threadsafe = lambda coro, _loop: (called(), orig(coro, _loop))[1]
try:
publisher("system.log", {"x": 1})
publisher("system.log", {"x": 2})
publisher("system.log", {"x": 3})
finally:
pub_mod.asyncio.run_coroutine_threadsafe = orig
called.assert_not_called()
@pytest.mark.asyncio
async def test_thread_safe_publisher_noop_when_bus_is_none() -> None:
"""A None bus still yields a no-op callable (pre-existing contract)."""
loop = asyncio.get_running_loop()
publisher = make_thread_safe_publisher(None, loop)
# Should not raise, return None.
assert publisher("topic", {"x": 1}) is None

View File

@@ -0,0 +1,77 @@
"""Tests for :func:`run_control_listener`.
The listener is the worker-side half of the Workers panel stop flow:
consume ``system.<worker>.control`` messages, set a shutdown event on a
well-formed ``{"action": "stop"}``, and ignore everything else without
raising.
"""
from __future__ import annotations
import asyncio
import pytest
from decnet.bus import topics as _topics
from decnet.bus.fake import FakeBus
from decnet.bus.publish import run_control_listener
@pytest.mark.asyncio
async def test_control_listener_sets_shutdown_on_stop() -> None:
bus = FakeBus()
await bus.connect()
shutdown = asyncio.Event()
try:
task = asyncio.create_task(run_control_listener(bus, "mutator", shutdown))
# Give the subscribe() call a tick to register before we publish.
await asyncio.sleep(0)
await bus.publish(
_topics.system_control("mutator"),
{"action": _topics.WORKER_CONTROL_STOP, "requested_by": "admin"},
event_type="control",
)
await asyncio.wait_for(task, timeout=1.0)
assert shutdown.is_set()
finally:
await bus.close()
@pytest.mark.asyncio
async def test_control_listener_ignores_malformed() -> None:
bus = FakeBus()
await bus.connect()
shutdown = asyncio.Event()
try:
task = asyncio.create_task(run_control_listener(bus, "mutator", shutdown))
await asyncio.sleep(0)
# Unknown action, non-dict-ish field, missing action — none of
# these should raise or trigger shutdown.
await bus.publish(
_topics.system_control("mutator"),
{"action": "bogus"}, event_type="control",
)
await bus.publish(
_topics.system_control("mutator"),
{"requested_by": "admin"}, event_type="control",
)
# Now send a real stop to unblock the task so the test terminates.
await bus.publish(
_topics.system_control("mutator"),
{"action": _topics.WORKER_CONTROL_STOP}, event_type="control",
)
await asyncio.wait_for(task, timeout=1.0)
assert shutdown.is_set()
finally:
await bus.close()
@pytest.mark.asyncio
async def test_control_listener_none_bus_awaits_shutdown() -> None:
# With bus=None the listener degrades to awaiting the shutdown event
# directly — callers can create_task() unconditionally.
shutdown = asyncio.Event()
task = asyncio.create_task(run_control_listener(None, "mutator", shutdown))
await asyncio.sleep(0)
assert not task.done()
shutdown.set()
await asyncio.wait_for(task, timeout=1.0)

52
tests/bus/test_factory.py Normal file
View File

@@ -0,0 +1,52 @@
"""Tests for :func:`decnet.bus.factory.get_bus` dispatch."""
from __future__ import annotations
import pathlib
import pytest
from decnet.bus.factory import _default_socket_path, get_bus
from decnet.bus.fake import FakeBus, NullBus
from decnet.bus.unix_client import UnixSocketBus
def test_disabled_returns_null_bus(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("DECNET_BUS_ENABLED", "false")
monkeypatch.setenv("DECNET_BUS_TYPE", "unix") # ignored when disabled
bus = get_bus()
assert isinstance(bus, NullBus)
def test_fake_dispatch(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("DECNET_BUS_ENABLED", "true")
monkeypatch.setenv("DECNET_BUS_TYPE", "fake")
bus = get_bus()
assert isinstance(bus, FakeBus)
def test_unix_dispatch(monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path) -> None:
monkeypatch.setenv("DECNET_BUS_ENABLED", "true")
monkeypatch.setenv("DECNET_BUS_TYPE", "unix")
monkeypatch.setenv("DECNET_BUS_SOCKET", str(tmp_path / "b.sock"))
bus = get_bus()
assert isinstance(bus, UnixSocketBus)
def test_unknown_type_raises(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("DECNET_BUS_ENABLED", "true")
monkeypatch.setenv("DECNET_BUS_TYPE", "mqtt")
with pytest.raises(ValueError, match="Unsupported bus type"):
get_bus()
def test_default_socket_path_honors_env(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("DECNET_BUS_SOCKET", "/tmp/explicit.sock")
assert _default_socket_path() == "/tmp/explicit.sock"
def test_default_socket_path_falls_back_to_home(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.delenv("DECNET_BUS_SOCKET", raising=False)
# Force /run/decnet to look unusable.
monkeypatch.setattr("os.path.isdir", lambda p: False)
path = _default_socket_path()
assert path.endswith(".decnet/bus.sock")

108
tests/bus/test_fake_bus.py Normal file
View File

@@ -0,0 +1,108 @@
"""Tests for :class:`decnet.bus.fake.FakeBus` and :class:`NullBus`."""
from __future__ import annotations
import asyncio
import pytest
from decnet.bus.fake import FakeBus, NullBus
async def _collect(sub, n: int, timeout: float = 1.0) -> list:
out = []
try:
async with asyncio.timeout(timeout):
async for event in sub:
out.append(event)
if len(out) >= n:
break
except TimeoutError:
pass
return out
class TestFakeBus:
async def test_publish_delivers_to_exact_match(self, fake_bus: FakeBus) -> None:
sub = fake_bus.subscribe("topology.abc.status")
async with sub:
await fake_bus.publish("topology.abc.status", {"status": "active"})
events = await _collect(sub, 1)
assert len(events) == 1
assert events[0].payload == {"status": "active"}
async def test_publish_delivers_to_wildcard(self, fake_bus: FakeBus) -> None:
sub = fake_bus.subscribe("topology.*.mutation.*")
async with sub:
await fake_bus.publish("topology.t1.mutation.applied", {"id": 1})
await fake_bus.publish("topology.t2.mutation.failed", {"id": 2})
await fake_bus.publish("decky.x.state", {"state": "running"}) # should not match
events = await _collect(sub, 2)
assert len(events) == 2
assert {e.payload["id"] for e in events} == {1, 2}
async def test_multiple_subscribers_each_get_copy(self, fake_bus: FakeBus) -> None:
sub_a = fake_bus.subscribe("topology.>")
sub_b = fake_bus.subscribe("topology.>")
async with sub_a, sub_b:
await fake_bus.publish("topology.abc.status", {"status": "active"})
a = await _collect(sub_a, 1)
b = await _collect(sub_b, 1)
assert len(a) == 1
assert len(b) == 1
async def test_subscription_close_unblocks_iter(self, fake_bus: FakeBus) -> None:
sub = fake_bus.subscribe("topology.>")
async def consume() -> list:
out = []
async for event in sub:
out.append(event)
return out
task = asyncio.create_task(consume())
await asyncio.sleep(0.01) # let task block on queue.get()
await sub.aclose()
events = await asyncio.wait_for(task, timeout=0.5)
assert events == []
async def test_close_is_idempotent(self, fake_bus: FakeBus) -> None:
await fake_bus.close()
await fake_bus.close() # second call must not raise
async def test_publish_on_closed_raises(self, fake_bus: FakeBus) -> None:
await fake_bus.close()
with pytest.raises(RuntimeError):
await fake_bus.publish("x", {})
with pytest.raises(RuntimeError):
fake_bus.subscribe("x")
async def test_backpressure_drops_oldest(self) -> None:
bus = FakeBus(queue_size=2)
await bus.connect()
try:
sub = bus.subscribe("t")
# Don't consume; publish 5 — queue holds at most 2, oldest dropped.
for i in range(5):
await bus.publish("t", {"i": i})
events = await _collect(sub, 2, timeout=0.2)
assert len(events) == 2
# We kept the 2 most recent.
assert events[-1].payload["i"] == 4
finally:
await bus.close()
class TestNullBus:
async def test_publish_is_noop(self) -> None:
bus = NullBus()
await bus.connect()
await bus.publish("anything", {"x": 1})
await bus.close()
async def test_subscribe_yields_nothing(self) -> None:
bus = NullBus()
sub = bus.subscribe("topology.>")
async with sub:
# Iteration must stop immediately.
events = [e async for e in sub]
assert events == []

104
tests/bus/test_heartbeat.py Normal file
View File

@@ -0,0 +1,104 @@
"""Shared ``run_health_heartbeat`` helper (DEBT-031 workers 79).
Three workers (agent, forwarder, updater) publish identical
``system.<worker>.health`` heartbeats. Rather than copy the loop three
times, ``decnet.bus.publish.run_health_heartbeat`` carries it. These
tests pin:
* topic is ``system.<worker>.health`` via the builder;
* payload carries worker name and monotonic-ish timestamp;
* ``extra()`` hook merges per-worker fields;
* ``None`` bus yields a benign no-op loop (still cancellable);
* ``extra()`` failure doesn't break the tick.
"""
from __future__ import annotations
import asyncio
import pytest
import pytest_asyncio
from decnet.bus.fake import FakeBus
from decnet.bus.publish import run_health_heartbeat
@pytest_asyncio.fixture
async def bus() -> FakeBus:
b = FakeBus()
await b.connect()
yield b
await b.close()
@pytest.mark.asyncio
async def test_heartbeat_publishes_under_system_worker_health(bus: FakeBus) -> None:
task = asyncio.create_task(
run_health_heartbeat(bus, "agent", interval=0.05),
)
try:
sub = bus.subscribe("system.*.health")
async with sub:
event = await asyncio.wait_for(sub.__anext__(), timeout=2.0)
finally:
task.cancel()
await asyncio.gather(task, return_exceptions=True)
assert event.topic == "system.agent.health"
assert event.type == "health"
assert event.payload["worker"] == "agent"
assert isinstance(event.payload["ts"], float)
@pytest.mark.asyncio
async def test_heartbeat_merges_extra_payload(bus: FakeBus) -> None:
task = asyncio.create_task(
run_health_heartbeat(
bus, "forwarder", interval=0.05,
extra=lambda: {"offset": 4096, "connected": True},
),
)
try:
sub = bus.subscribe("system.forwarder.health")
async with sub:
event = await asyncio.wait_for(sub.__anext__(), timeout=2.0)
finally:
task.cancel()
await asyncio.gather(task, return_exceptions=True)
assert event.payload["offset"] == 4096
assert event.payload["connected"] is True
assert event.payload["worker"] == "forwarder"
@pytest.mark.asyncio
async def test_heartbeat_survives_extra_failure(bus: FakeBus) -> None:
# An extra() that blows up must not abort the heartbeat loop.
def _boom():
raise RuntimeError("extras exploded")
task = asyncio.create_task(
run_health_heartbeat(bus, "updater", interval=0.05, extra=_boom),
)
try:
sub = bus.subscribe("system.updater.health")
async with sub:
event = await asyncio.wait_for(sub.__anext__(), timeout=2.0)
finally:
task.cancel()
await asyncio.gather(task, return_exceptions=True)
# Base payload still present despite extra() blowing up.
assert event.payload["worker"] == "updater"
@pytest.mark.asyncio
async def test_heartbeat_is_cancellable_with_none_bus() -> None:
# Bus-disabled path: loop runs but publishes nothing. Must still
# cancel cleanly so lifespan teardown doesn't hang.
task = asyncio.create_task(
run_health_heartbeat(None, "agent", interval=0.01),
)
await asyncio.sleep(0.05)
task.cancel()
await asyncio.gather(task, return_exceptions=True)
assert task.done()

View File

@@ -0,0 +1,87 @@
"""Tests for the wire protocol framing."""
from __future__ import annotations
import asyncio
import struct
import pytest
from decnet.bus import protocol
def _reader_from(data: bytes) -> asyncio.StreamReader:
reader = asyncio.StreamReader()
reader.feed_data(data)
reader.feed_eof()
return reader
async def _read_one(data: bytes) -> protocol.Frame | None:
return await protocol.read_frame(_reader_from(data))
class TestEncodeDecode:
async def test_pub_round_trip(self) -> None:
data = protocol.encode(protocol.PUB, args="topology.abc.status", body={"payload": {"x": 1}})
frame = await _read_one(data)
assert frame is not None
assert frame.verb == protocol.PUB
assert frame.args == "topology.abc.status"
assert protocol.decode_body(frame.body) == {"payload": {"x": 1}}
async def test_sub_empty_body(self) -> None:
data = protocol.encode(protocol.SUB, args="topology.*.mutation.*")
frame = await _read_one(data)
assert frame is not None
assert frame.verb == protocol.SUB
assert frame.args == "topology.*.mutation.*"
assert frame.body == b""
async def test_bye_no_args(self) -> None:
data = protocol.encode(protocol.BYE)
frame = await _read_one(data)
assert frame is not None
assert frame.verb == protocol.BYE
assert frame.args == ""
assert frame.body == b""
async def test_clean_eof_returns_none(self) -> None:
assert await _read_one(b"") is None
class TestProtocolErrors:
def test_encode_rejects_unknown_verb(self) -> None:
with pytest.raises(protocol.ProtocolError):
protocol.encode("NOPE", args="x")
def test_encode_rejects_newline_in_args(self) -> None:
with pytest.raises(protocol.ProtocolError):
protocol.encode(protocol.PUB, args="bad\ntopic")
def test_encode_rejects_oversized_body(self) -> None:
big = {"payload": {"x": "a" * (protocol.MAX_BODY_BYTES + 1)}}
with pytest.raises(protocol.ProtocolError):
protocol.encode(protocol.PUB, args="t", body=big)
async def test_decode_rejects_unknown_verb(self) -> None:
bad = b"NOPE x\n" + struct.pack(">I", 0)
with pytest.raises(protocol.ProtocolError):
await _read_one(bad)
async def test_decode_rejects_oversized_body_length(self) -> None:
bad = b"PUB x\n" + struct.pack(">I", protocol.MAX_BODY_BYTES + 1)
with pytest.raises(protocol.ProtocolError):
await _read_one(bad)
async def test_decode_rejects_truncated_body(self) -> None:
bad = b"PUB x\n" + struct.pack(">I", 10) + b"short"
with pytest.raises(Exception): # IncompleteReadError bubbles up
await _read_one(bad)
def test_decode_body_rejects_non_object(self) -> None:
import orjson
with pytest.raises(protocol.ProtocolError):
protocol.decode_body(orjson.dumps([1, 2, 3]))
def test_decode_body_empty_returns_empty_dict(self) -> None:
assert protocol.decode_body(b"") == {}

64
tests/bus/test_publish.py Normal file
View File

@@ -0,0 +1,64 @@
"""Tests for :mod:`decnet.bus.publish`.
The whole point of ``publish_safely`` is that it never raises back at the
caller. These tests pin that contract: ``None`` bus is a no-op, a real
bus publishes, and a raising bus is swallowed + logged.
"""
from __future__ import annotations
import logging
import pytest
from decnet.bus.base import BaseBus, Event, Subscription
from decnet.bus.fake import FakeBus
from decnet.bus.publish import publish_safely
class _ExplodingBus(BaseBus):
"""Minimal bus whose ``publish`` always raises."""
async def connect(self) -> None: # pragma: no cover - trivial
return None
async def publish(self, topic, payload, *, event_type=""):
raise RuntimeError("transport exploded")
def subscribe(self, pattern: str) -> Subscription: # pragma: no cover
raise NotImplementedError
async def close(self) -> None: # pragma: no cover - trivial
return None
@pytest.mark.asyncio
async def test_publish_safely_none_bus_is_noop() -> None:
# Must not raise. A worker that couldn't connect at startup passes
# bus=None and expects every call to silently no-op.
await publish_safely(None, "system.log", {"msg": "hi"})
@pytest.mark.asyncio
async def test_publish_safely_delivers_on_live_bus() -> None:
bus = FakeBus()
await bus.connect()
try:
sub = bus.subscribe("system.log")
async with sub:
await publish_safely(bus, "system.log", {"msg": "hi"}, event_type="log")
event = await sub.__anext__()
assert isinstance(event, Event)
assert event.topic == "system.log"
assert event.type == "log"
assert event.payload == {"msg": "hi"}
finally:
await bus.close()
@pytest.mark.asyncio
async def test_publish_safely_swallows_transport_errors(caplog: pytest.LogCaptureFixture) -> None:
caplog.set_level(logging.WARNING, logger="bus.publish")
# The exploding bus would crash the caller without publish_safely.
# After wrapping, the caller sees nothing but a log line.
await publish_safely(_ExplodingBus(), "system.log", {"msg": "hi"})
assert any("bus publish failed" in rec.message for rec in caplog.records)

89
tests/bus/test_topics.py Normal file
View File

@@ -0,0 +1,89 @@
"""Tests for the topic hierarchy builders."""
from __future__ import annotations
import pytest
from decnet.bus import topics
def test_topology_mutation_builder() -> None:
topic = topics.topology_mutation("abc123", topics.MUTATION_APPLIED)
assert topic == "topology.abc123.mutation.applied"
def test_topology_status_builder() -> None:
assert topics.topology_status("t-1") == "topology.t-1.status"
def test_decky_builder() -> None:
assert topics.decky("d-42", topics.DECKY_STATE) == "decky.d-42.state"
assert topics.decky("d-42", topics.DECKY_TRAFFIC) == "decky.d-42.traffic"
def test_system_builder_allows_dotted_leaf() -> None:
# system.bus.health has a dot in the leaf — that's intentional and a
# legitimate hierarchy refinement, not a segment violation.
assert topics.system(topics.SYSTEM_BUS_HEALTH) == "system.bus.health"
assert topics.system(topics.SYSTEM_LOG) == "system.log"
def test_system_builder_rejects_empty() -> None:
with pytest.raises(ValueError):
topics.system("")
@pytest.mark.parametrize("bad", ["", "has.dot", "has*wildcard", "has>wild", "with space", "with\ttab"])
def test_segment_validation(bad: str) -> None:
with pytest.raises(ValueError):
topics.topology_mutation(bad, topics.MUTATION_APPLIED)
with pytest.raises(ValueError):
topics.topology_status(bad)
with pytest.raises(ValueError):
topics.decky(bad, topics.DECKY_STATE)
with pytest.raises(ValueError):
topics.system_health(bad)
def test_attacker_builder() -> None:
assert topics.attacker(topics.ATTACKER_OBSERVED) == "attacker.observed"
assert topics.attacker(topics.ATTACKER_SCORED) == "attacker.scored"
assert topics.attacker(topics.ATTACKER_FINGERPRINTED) == "attacker.fingerprinted"
# Dotted leaf is intentional — same as system.bus.health.
assert topics.attacker(topics.ATTACKER_SESSION_STARTED) == "attacker.session.started"
assert topics.attacker(topics.ATTACKER_SESSION_ENDED) == "attacker.session.ended"
def test_attacker_builder_rejects_empty() -> None:
with pytest.raises(ValueError):
topics.attacker("")
def test_system_health_builder() -> None:
assert topics.system_health("sniffer") == "system.sniffer.health"
assert topics.system_health("mutator") == "system.mutator.health"
def test_system_control_builder() -> None:
assert topics.system_control("mutator") == "system.mutator.control"
assert topics.system_control("collector") == "system.collector.control"
@pytest.mark.parametrize("bad", ["", "has.dot", "has*wildcard", "has>wild", "with space", "with\ttab"])
def test_system_control_rejects_bad_segments(bad: str) -> None:
with pytest.raises(ValueError):
topics.system_control(bad)
# ─── Identity resolution topics (commit 4 of IDENTITY_RESOLUTION.md) ─────────
def test_identity_builder() -> None:
assert topics.identity(topics.IDENTITY_FORMED) == "identity.formed"
assert topics.identity(topics.IDENTITY_OBSERVATION_LINKED) == "identity.observation.linked"
assert topics.identity(topics.IDENTITY_MERGED) == "identity.merged"
assert topics.identity(topics.IDENTITY_UNMERGED) == "identity.unmerged"
def test_identity_builder_rejects_empty() -> None:
with pytest.raises(ValueError):
topics.identity("")

View File

@@ -0,0 +1,131 @@
"""End-to-end tests for :class:`UnixSocketBus` against a real :class:`BusServer`.
These tests run in the dev loop (no pytest marker) because they only need
the tmp filesystem — no Docker, no external broker.
"""
from __future__ import annotations
import asyncio
import pathlib
import stat
import pytest
from decnet.bus.unix_client import UnixSocketBus
from decnet.bus.unix_server import BusServer
async def _drain(sub, n: int, timeout: float = 1.5) -> list:
out = []
try:
async with asyncio.timeout(timeout):
async for event in sub:
out.append(event)
if len(out) >= n:
break
except TimeoutError:
pass
return out
class TestEndToEnd:
async def test_pub_sub_exact(self, unix_bus) -> None:
server, client = unix_bus
sub = client.subscribe("topology.abc.status")
# Give the SUB frame a tick to register on the server.
await asyncio.sleep(0.05)
async with sub:
await client.publish("topology.abc.status", {"status": "active"})
events = await _drain(sub, 1)
# A publisher doesn't see its own events — use a second client.
assert events == []
async def test_pub_sub_across_two_clients(
self, tmp_path: pathlib.Path,
) -> None:
sock = tmp_path / "bus.sock"
server = BusServer(sock, group=None)
await server.start()
serve_task = asyncio.create_task(server.serve_forever())
publisher = UnixSocketBus(sock, client_name="publisher")
subscriber = UnixSocketBus(sock, client_name="subscriber")
await publisher.connect()
await subscriber.connect()
try:
sub = subscriber.subscribe("topology.*.mutation.*")
await asyncio.sleep(0.05) # let SUB register
async with sub:
await publisher.publish(
"topology.t1.mutation.applied", {"id": 1}, event_type="applied",
)
await publisher.publish(
"decky.xyz.state", {"state": "running"}, # should not match
)
await publisher.publish(
"topology.t2.mutation.failed", {"id": 2}, event_type="failed",
)
events = await _drain(sub, 2)
ids = {e.payload["id"] for e in events}
assert ids == {1, 2}
finally:
await publisher.close()
await subscriber.close()
serve_task.cancel()
try:
await serve_task
except asyncio.CancelledError:
pass
await server.close()
async def test_socket_file_mode(self, tmp_path: pathlib.Path) -> None:
sock = tmp_path / "bus.sock"
server = BusServer(sock, group=None)
await server.start()
try:
mode = stat.S_IMODE(sock.stat().st_mode)
assert mode == 0o660
finally:
await server.close()
async def test_server_close_wakes_subscribers(
self, tmp_path: pathlib.Path,
) -> None:
sock = tmp_path / "bus.sock"
server = BusServer(sock, group=None)
await server.start()
serve_task = asyncio.create_task(server.serve_forever())
client = UnixSocketBus(sock, client_name="watcher")
await client.connect()
sub = client.subscribe("system.>")
await asyncio.sleep(0.05)
async def consume() -> list:
out = []
async for event in sub:
out.append(event)
return out
consumer = asyncio.create_task(consume())
await asyncio.sleep(0.05)
serve_task.cancel()
try:
await serve_task
except asyncio.CancelledError:
pass
await server.close()
# The consumer must unblock within a reasonable time.
events = await asyncio.wait_for(consumer, timeout=1.0)
assert events == []
await client.close()
async def test_start_rejects_missing_parent(self, tmp_path: pathlib.Path) -> None:
sock = tmp_path / "nonexistent-dir" / "bus.sock"
server = BusServer(sock, group=None)
with pytest.raises(FileNotFoundError):
await server.start()

68
tests/bus/test_worker.py Normal file
View File

@@ -0,0 +1,68 @@
"""Tests for :func:`decnet.bus.worker.bus_worker` lifecycle + heartbeat."""
from __future__ import annotations
import asyncio
import pathlib
import pytest
from decnet.bus import topics
from decnet.bus.unix_client import UnixSocketBus
from decnet.bus.worker import bus_worker
class TestBusWorker:
async def test_worker_serves_and_heartbeats(
self, tmp_path: pathlib.Path,
) -> None:
sock = tmp_path / "bus.sock"
task = asyncio.create_task(
bus_worker(sock, group=None, heartbeat_interval=1),
)
# Wait for the socket to exist.
for _ in range(40):
if sock.exists():
break
await asyncio.sleep(0.05)
assert sock.exists(), "bus worker did not create socket"
client = UnixSocketBus(sock, client_name="hb-watcher")
await client.connect()
sub = client.subscribe(topics.system(topics.SYSTEM_BUS_HEALTH))
try:
async with sub:
async with asyncio.timeout(3.0):
async for event in sub:
assert event.topic == "system.bus.health"
assert "pid" in event.payload
break
finally:
await client.close()
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
async def test_worker_creates_home_fallback_parent(
self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch,
) -> None:
# Point Path.home() at tmp_path so the "auto-mkdir ~/.decnet" branch
# activates without touching the real home directory.
monkeypatch.setattr(pathlib.Path, "home", classmethod(lambda cls: tmp_path))
sock = tmp_path / ".decnet" / "bus.sock"
task = asyncio.create_task(
bus_worker(sock, group=None, heartbeat_interval=60),
)
try:
for _ in range(40):
if sock.exists():
break
await asyncio.sleep(0.05)
assert sock.exists()
finally:
task.cancel()
try:
await task
except asyncio.CancelledError:
pass