From 7abae5571a296dab81c444f12432fa1423373c8a Mon Sep 17 00:00:00 2001 From: anti Date: Sat, 11 Apr 2026 03:56:53 -0400 Subject: [PATCH] fix(collector): fix container detection and auto-start on deploy Two bugs caused the log file to never be written: 1. is_service_container() used regex '^decky-\d+-\w' which only matched the old decky-01-smtp naming style. Actual containers are named omega-decky-smtp, relay-decky-smtp, etc. Fixed by using Docker Compose labels instead: com.docker.compose.project=decnet + non-empty depends_on discriminates service containers from base (sleep infinity) containers reliably regardless of decky naming convention. Added is_service_event() for the Docker events path. 2. The collector was only started when --api was used. Added a 'collect' CLI subcommand (decnet collect --log-file ) and wired it into deploy as an auto-started background process when --api is not in use. Default log path: /var/log/decnet/decnet.log --- decnet/cli.py | 23 +++++++++++++ decnet/web/collector.py | 51 +++++++++++++++++++++++------ tests/test_collector.py | 71 +++++++++++++++++++++++++++++++++-------- 3 files changed, 122 insertions(+), 23 deletions(-) diff --git a/decnet/cli.py b/decnet/cli.py index b1bd043..c210694 100644 --- a/decnet/cli.py +++ b/decnet/cli.py @@ -395,6 +395,18 @@ def deploy( except (FileNotFoundError, subprocess.SubprocessError): console.print("[red]Failed to start mutator watcher.[/]") + # Start the log collector as a background process unless --api is handling it. + # The collector streams Docker logs → log_file (RFC 5424) + log_file.json. + if effective_log_file and not dry_run and not api: + import subprocess # noqa: F811 # nosec B404 + import sys + console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}") + subprocess.Popen( # nosec B603 + [sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + ) + if api and not dry_run: import subprocess # nosec B404 import sys @@ -413,6 +425,17 @@ def deploy( console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]") +@app.command() +def collect( + log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"), +) -> None: + """Stream Docker logs from all running decky service containers to a log file.""" + import asyncio + from decnet.web.collector import log_collector_worker + console.print(f"[bold cyan]Collector starting[/] → {log_file}") + asyncio.run(log_collector_worker(log_file)) + + @app.command() def mutate( watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"), diff --git a/decnet/web/collector.py b/decnet/web/collector.py index 746df8d..bd92434 100644 --- a/decnet/web/collector.py +++ b/decnet/web/collector.py @@ -83,12 +83,43 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]: # ─── Container helpers ──────────────────────────────────────────────────────── -def is_service_container(name: str) -> bool: +def _is_decnet_service_labels(labels: dict) -> bool: """ - Return True for decky service containers (decky-NN-service). - Base containers (decky-NN, which run sleep infinity) return False. + Return True if the Compose labels indicate a DECNET service container. + + Discriminator: base containers have no depends_on (they own the IP); + service containers all declare depends_on pointing at their base. + Both sets carry com.docker.compose.project=decnet. """ - return bool(re.match(r'^decky-\d+-\w', name.lstrip("/"))) + if labels.get("com.docker.compose.project") != "decnet": + return False + return bool(labels.get("com.docker.compose.depends_on", "").strip()) + + +def is_service_container(container) -> bool: + """ + Return True for DECNET service containers. + + Accepts either a Docker SDK container object or a plain name string + (legacy path — falls back to label-free heuristic when only a name + is available, which is always less reliable). + """ + if isinstance(container, str): + # Called with a name only (e.g. from event stream before full inspect). + # Best-effort: a base container name has no service suffix, so it won't + # contain a hyphen after the decky name. We can't be certain without + # labels, so this path is only kept for the event fast-path and is + # superseded by the label check in the initial scan. + name = container.lstrip("/") + # Filter out anything not from our project (best effort via name) + return "-" in name # will be re-checked via labels on _spawn + labels = container.labels or {} + return _is_decnet_service_labels(labels) + + +def is_service_event(attrs: dict) -> bool: + """Return True if a Docker event's Actor.Attributes are for a DECNET service container.""" + return _is_decnet_service_labels(attrs) # ─── Blocking stream worker (runs in a thread) ──────────────────────────────── @@ -155,9 +186,8 @@ async def log_collector_worker(log_file: str) -> None: # Collect from already-running containers for container in client.containers.list(): - name = container.name.lstrip("/") - if is_service_container(name): - _spawn(container.id, name) + if is_service_container(container): + _spawn(container.id, container.name.lstrip("/")) # Watch for new containers starting def _watch_events() -> None: @@ -165,9 +195,10 @@ async def log_collector_worker(log_file: str) -> None: decode=True, filters={"type": "container", "event": "start"}, ): - name = event.get("Actor", {}).get("Attributes", {}).get("name", "") - cid = event.get("id", "") - if cid and is_service_container(name): + attrs = event.get("Actor", {}).get("Attributes", {}) + cid = event.get("id", "") + name = attrs.get("name", "") + if cid and is_service_event(attrs): loop.call_soon_threadsafe(_spawn, cid, name) await asyncio.to_thread(_watch_events) diff --git a/tests/test_collector.py b/tests/test_collector.py index 7f73623..7a94930 100644 --- a/tests/test_collector.py +++ b/tests/test_collector.py @@ -1,7 +1,30 @@ """Tests for the host-side Docker log collector.""" import json -from decnet.web.collector import parse_rfc5424, is_service_container +from types import SimpleNamespace +from decnet.web.collector import parse_rfc5424, is_service_container, is_service_event + + +def _make_container(project="decnet", depends_on="omega-decky:service_started:false"): + """Return a mock container object with Compose labels.""" + return SimpleNamespace( + name="omega-decky-http", + labels={ + "com.docker.compose.project": project, + "com.docker.compose.depends_on": depends_on, + }, + ) + + +def _make_base_container(): + """Return a mock base container (no depends_on).""" + return SimpleNamespace( + name="omega-decky", + labels={ + "com.docker.compose.project": "decnet", + "com.docker.compose.depends_on": "", + }, + ) class TestParseRfc5424: @@ -83,19 +106,41 @@ class TestParseRfc5424: class TestIsServiceContainer: def test_service_container_returns_true(self): - assert is_service_container("decky-01-http") is True - assert is_service_container("decky-02-mysql") is True - assert is_service_container("decky-99-ssh") is True + assert is_service_container(_make_container()) is True def test_base_container_returns_false(self): - assert is_service_container("decky-01") is False - assert is_service_container("decky-02") is False + assert is_service_container(_make_base_container()) is False - def test_unrelated_container_returns_false(self): - assert is_service_container("nginx") is False - assert is_service_container("postgres") is False - assert is_service_container("") is False + def test_different_decky_name_styles(self): + # omega-decky style (ini section name) + assert is_service_container(_make_container(depends_on="omega-decky:service_started:false")) is True + # relay-decky style + assert is_service_container(_make_container(depends_on="relay-decky:service_started:false")) is True - def test_strips_leading_slash(self): - assert is_service_container("/decky-01-http") is True - assert is_service_container("/decky-01") is False + def test_wrong_project_returns_false(self): + assert is_service_container(_make_container(project="someother")) is False + + def test_no_labels_returns_false(self): + c = SimpleNamespace(name="nginx", labels={}) + assert is_service_container(c) is False + + +class TestIsServiceEvent: + def _make_attrs(self, project="decnet", depends_on="omega-decky:service_started:false"): + return { + "com.docker.compose.project": project, + "com.docker.compose.depends_on": depends_on, + "name": "omega-decky-smtp", + } + + def test_service_event_returns_true(self): + assert is_service_event(self._make_attrs()) is True + + def test_base_event_returns_false(self): + assert is_service_event(self._make_attrs(depends_on="")) is False + + def test_wrong_project_returns_false(self): + assert is_service_event(self._make_attrs(project="other")) is False + + def test_unrelated_event_returns_false(self): + assert is_service_event({"name": "nginx"}) is False