Files
DECNET/decnet/web/collector.py
anti 25ba3fb56a feat: replace bind-mount log pipeline with Docker log streaming
Services now print RFC 5424 to stdout; Docker captures via json-file driver.
A new host-side collector (decnet.web.collector) streams docker logs from all
running decky service containers and writes RFC 5424 + parsed JSON to the host
log file. The existing ingester continues to tail the .json file unchanged.
rsyslog can consume the .log file independently — no DECNET involvement needed.

Removes: bind-mount volume injection, _LOG_NETWORK bridge, log_target config
field and --log-target CLI flag, TCP syslog forwarding from service templates.
2026-04-10 00:14:14 -04:00

181 lines
6.4 KiB
Python

"""
Host-side Docker log collector.
Streams stdout from all running decky service containers via the Docker SDK,
writes RFC 5424 lines to <log_file> and parsed JSON records to <log_file>.json.
The ingester tails the .json file; rsyslog can consume the .log file independently.
"""
import asyncio
import json
import logging
import re
from datetime import datetime
from pathlib import Path
from typing import Any, Optional
logger = logging.getLogger("decnet.web.collector")
# ─── RFC 5424 parser ──────────────────────────────────────────────────────────
_RFC5424_RE = re.compile(
r"^<\d+>1 "
r"(\S+) " # 1: TIMESTAMP
r"(\S+) " # 2: HOSTNAME (decky name)
r"(\S+) " # 3: APP-NAME (service)
r"- " # PROCID always NILVALUE
r"(\S+) " # 4: MSGID (event_type)
r"(.+)$", # 5: SD element + optional MSG
)
_SD_BLOCK_RE = re.compile(r'\[decnet@55555\s+(.*?)\]', re.DOTALL)
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip")
def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
"""
Parse an RFC 5424 DECNET log line into a structured dict.
Returns None if the line does not match the expected format.
"""
m = _RFC5424_RE.match(line)
if not m:
return None
ts_raw, decky, service, event_type, sd_rest = m.groups()
fields: dict[str, str] = {}
msg: str = ""
if sd_rest.startswith("-"):
msg = sd_rest[1:].lstrip()
elif sd_rest.startswith("["):
block = _SD_BLOCK_RE.search(sd_rest)
if block:
for k, v in _PARAM_RE.findall(block.group(1)):
fields[k] = v.replace('\\"', '"').replace("\\\\", "\\").replace("\\]", "]")
msg_match = re.search(r'\]\s+(.+)$', sd_rest)
if msg_match:
msg = msg_match.group(1).strip()
else:
msg = sd_rest
attacker_ip = "Unknown"
for fname in _IP_FIELDS:
if fname in fields:
attacker_ip = fields[fname]
break
try:
ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S")
except ValueError:
ts_formatted = ts_raw
return {
"timestamp": ts_formatted,
"decky": decky,
"service": service,
"event_type": event_type,
"attacker_ip": attacker_ip,
"fields": fields,
"msg": msg,
"raw_line": line,
}
# ─── Container helpers ────────────────────────────────────────────────────────
def is_service_container(name: str) -> bool:
"""
Return True for decky service containers (decky-NN-service).
Base containers (decky-NN, which run sleep infinity) return False.
"""
return bool(re.match(r'^decky-\d+-\w', name.lstrip("/")))
# ─── Blocking stream worker (runs in a thread) ────────────────────────────────
def _stream_container(container_id: str, log_path: Path, json_path: Path) -> None:
"""Stream logs from one container and append to the host log files."""
import docker # type: ignore[import]
try:
client = docker.from_env()
container = client.containers.get(container_id)
log_stream = container.logs(stream=True, follow=True, stdout=True, stderr=False)
buf = ""
with (
open(log_path, "a", encoding="utf-8") as lf,
open(json_path, "a", encoding="utf-8") as jf,
):
for chunk in log_stream:
buf += chunk.decode("utf-8", errors="replace")
while "\n" in buf:
line, buf = buf.split("\n", 1)
line = line.rstrip()
if not line:
continue
lf.write(line + "\n")
lf.flush()
parsed = parse_rfc5424(line)
if parsed:
jf.write(json.dumps(parsed) + "\n")
jf.flush()
except Exception as exc:
logger.debug("Log stream ended for container %s: %s", container_id, exc)
# ─── Async collector ──────────────────────────────────────────────────────────
async def log_collector_worker(log_file: str) -> None:
"""
Background task: streams Docker logs from all running decky service
containers, writing RFC 5424 lines to log_file and parsed JSON records
to log_file.json for the ingester to consume.
Watches Docker events to pick up containers started after initial scan.
"""
import docker # type: ignore[import]
log_path = Path(log_file)
json_path = log_path.with_suffix(".json")
log_path.parent.mkdir(parents=True, exist_ok=True)
active: dict[str, asyncio.Task[None]] = {}
loop = asyncio.get_running_loop()
def _spawn(container_id: str, container_name: str) -> None:
if container_id not in active or active[container_id].done():
active[container_id] = asyncio.ensure_future(
asyncio.to_thread(_stream_container, container_id, log_path, json_path),
loop=loop,
)
logger.info("Collecting logs from container: %s", container_name)
try:
client = docker.from_env()
# Collect from already-running containers
for container in client.containers.list():
name = container.name.lstrip("/")
if is_service_container(name):
_spawn(container.id, name)
# Watch for new containers starting
def _watch_events() -> None:
for event in client.events(
decode=True,
filters={"type": "container", "event": "start"},
):
name = event.get("Actor", {}).get("Attributes", {}).get("name", "")
cid = event.get("id", "")
if cid and is_service_container(name):
loop.call_soon_threadsafe(_spawn, cid, name)
await asyncio.to_thread(_watch_events)
except asyncio.CancelledError:
for task in active.values():
task.cancel()
raise
except Exception as exc:
logger.error("Collector error: %s", exc)