fix(collector): fix container detection and auto-start on deploy

Two bugs caused the log file to never be written:

1. is_service_container() used regex '^decky-\d+-\w' which only matched
   the old decky-01-smtp naming style. Actual containers are named
   omega-decky-smtp, relay-decky-smtp, etc. Fixed by using Docker Compose
   labels instead: com.docker.compose.project=decnet + non-empty
   depends_on discriminates service containers from base (sleep infinity)
   containers reliably regardless of decky naming convention.
   Added is_service_event() for the Docker events path.

2. The collector was only started when --api was used. Added a 'collect'
   CLI subcommand (decnet collect --log-file <path>) and wired it into
   deploy as an auto-started background process when --api is not in use.
   Default log path: /var/log/decnet/decnet.log
This commit is contained in:
2026-04-11 03:56:53 -04:00
parent 377ba0410c
commit 7abae5571a
3 changed files with 122 additions and 23 deletions

View File

@@ -395,6 +395,18 @@ def deploy(
except (FileNotFoundError, subprocess.SubprocessError):
console.print("[red]Failed to start mutator watcher.[/]")
# Start the log collector as a background process unless --api is handling it.
# The collector streams Docker logs → log_file (RFC 5424) + log_file.json.
if effective_log_file and not dry_run and not api:
import subprocess # noqa: F811 # nosec B404
import sys
console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
subprocess.Popen( # nosec B603
[sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
)
if api and not dry_run:
import subprocess # nosec B404
import sys
@@ -413,6 +425,17 @@ def deploy(
console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
@app.command()
def collect(
log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"),
) -> None:
"""Stream Docker logs from all running decky service containers to a log file."""
import asyncio
from decnet.web.collector import log_collector_worker
console.print(f"[bold cyan]Collector starting[/] → {log_file}")
asyncio.run(log_collector_worker(log_file))
@app.command()
def mutate(
watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"),

View File

@@ -83,12 +83,43 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
# ─── Container helpers ────────────────────────────────────────────────────────
def is_service_container(name: str) -> bool:
def _is_decnet_service_labels(labels: dict) -> bool:
"""
Return True for decky service containers (decky-NN-service).
Base containers (decky-NN, which run sleep infinity) return False.
Return True if the Compose labels indicate a DECNET service container.
Discriminator: base containers have no depends_on (they own the IP);
service containers all declare depends_on pointing at their base.
Both sets carry com.docker.compose.project=decnet.
"""
return bool(re.match(r'^decky-\d+-\w', name.lstrip("/")))
if labels.get("com.docker.compose.project") != "decnet":
return False
return bool(labels.get("com.docker.compose.depends_on", "").strip())
def is_service_container(container) -> bool:
"""
Return True for DECNET service containers.
Accepts either a Docker SDK container object or a plain name string
(legacy path — falls back to label-free heuristic when only a name
is available, which is always less reliable).
"""
if isinstance(container, str):
# Called with a name only (e.g. from event stream before full inspect).
# Best-effort: a base container name has no service suffix, so it won't
# contain a hyphen after the decky name. We can't be certain without
# labels, so this path is only kept for the event fast-path and is
# superseded by the label check in the initial scan.
name = container.lstrip("/")
# Filter out anything not from our project (best effort via name)
return "-" in name # will be re-checked via labels on _spawn
labels = container.labels or {}
return _is_decnet_service_labels(labels)
def is_service_event(attrs: dict) -> bool:
"""Return True if a Docker event's Actor.Attributes are for a DECNET service container."""
return _is_decnet_service_labels(attrs)
# ─── Blocking stream worker (runs in a thread) ────────────────────────────────
@@ -155,9 +186,8 @@ async def log_collector_worker(log_file: str) -> None:
# Collect from already-running containers
for container in client.containers.list():
name = container.name.lstrip("/")
if is_service_container(name):
_spawn(container.id, name)
if is_service_container(container):
_spawn(container.id, container.name.lstrip("/"))
# Watch for new containers starting
def _watch_events() -> None:
@@ -165,9 +195,10 @@ async def log_collector_worker(log_file: str) -> None:
decode=True,
filters={"type": "container", "event": "start"},
):
name = event.get("Actor", {}).get("Attributes", {}).get("name", "")
cid = event.get("id", "")
if cid and is_service_container(name):
attrs = event.get("Actor", {}).get("Attributes", {})
cid = event.get("id", "")
name = attrs.get("name", "")
if cid and is_service_event(attrs):
loop.call_soon_threadsafe(_spawn, cid, name)
await asyncio.to_thread(_watch_events)