fix(collector): label-based fleet container discovery

The events watcher's start-event filter previously called
_load_service_container_names(), which reads decnet-state.json on
every event. decnet deploy writes that state file out-of-band
with docker compose up, so a container's start event could
arrive before the state was committed — the watcher then dropped
the event silently and never tailed the container's stdout. The
visible symptom was an empty Credentials view (and Logs/Bounty)
after a fresh deploy until the collector was manually restarted.

Fix: stamp decnet.fleet.{service,decky,service_name} labels on
every fleet service container at compose-time, and let the
collector recognize either the fleet or topology label without
touching the state file. The state-file name match remains as a
fallback for legacy containers that predate the new labels.
This commit is contained in:
2026-04-25 08:11:21 -04:00
parent 4ea4b0be53
commit 817ce32e6d
4 changed files with 129 additions and 20 deletions

View File

@@ -303,6 +303,54 @@ class TestTopologyLabelDiscovery:
assert is_service_container(c) is True
class TestFleetLabelDiscovery:
"""Fleet (legacy) containers stamped with ``decnet.fleet.service=true``
by ``decnet/composer.py`` must be picked up by the events watcher even
when ``decnet-state.json`` hasn't been refreshed yet — that's the race
that previously caused freshly-deployed containers to be silently
ignored."""
def _labelled(self, name: str, labels: dict):
return SimpleNamespace(
name=name,
attrs={"Config": {"Labels": labels}},
labels=labels,
)
def test_fleet_labelled_container_matches_without_state(self):
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
c = self._labelled(
"omega-decky-ssh",
{"decnet.fleet.service": "true", "decnet.fleet.decky": "omega-decky"},
)
assert is_service_container(c) is True
def test_fleet_labelled_event_matches_without_state(self):
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
attrs = {
"name": "omega-decky-ssh",
"decnet.fleet.service": "true",
"decnet.fleet.decky": "omega-decky",
}
assert is_service_event(attrs) is True
def test_unlabelled_event_falls_back_to_state(self):
"""Containers built before this label landed still match by name."""
with patch("decnet.collector.worker._load_service_container_names", return_value=_KNOWN_NAMES):
assert is_service_event({"name": "omega-decky-http"}) is True
def test_unrelated_label_does_not_match(self):
with patch("decnet.collector.worker._load_service_container_names", return_value=set()):
c = self._labelled(
"redis",
{"com.docker.compose.project": "redis", "decnet.fleet.service": "false"},
)
assert is_service_container(c) is False
assert is_service_event(
{"name": "redis", "decnet.fleet.service": "false"}
) is False
class TestLoadServiceContainerNames:
def test_with_valid_state(self, tmp_path, monkeypatch):
import decnet.config

View File

@@ -242,3 +242,28 @@ def test_multiple_deckies_different_build_bases():
assert base_img_01 == "debian:bookworm-slim"
assert base_img_02 == "ubuntu:22.04"
assert base_img_01 != base_img_02
# ---------------------------------------------------------------------------
# Fleet ownership labels — collector keys off these to recognize freshly-
# deployed containers without consulting decnet-state.json (the previous
# state-file lookup race silently dropped containers whose Docker start
# event arrived before the state write completed).
# ---------------------------------------------------------------------------
def test_service_container_carries_fleet_labels():
config = _make_config(["http"], distro="debian")
compose = generate_compose(config)
labels = compose["services"]["decky-01-http"]["labels"]
assert labels["decnet.fleet.service"] == "true"
assert labels["decnet.fleet.decky"] == "decky-01"
assert labels["decnet.fleet.service_name"] == "http"
def test_base_container_does_not_carry_service_label():
"""Base containers run sleep — they don't emit logs and must NOT be
streamed by the collector, so the service marker stays off them."""
config = _make_config(["http"], distro="debian")
compose = generate_compose(config)
base = compose["services"]["decky-01"]
assert "decnet.fleet.service" not in (base.get("labels") or {})