feat: replace bind-mount log pipeline with Docker log streaming

Services now print RFC 5424 to stdout; Docker captures via json-file driver.
A new host-side collector (decnet.web.collector) streams docker logs from all
running decky service containers and writes RFC 5424 + parsed JSON to the host
log file. The existing ingester continues to tail the .json file unchanged.
rsyslog can consume the .log file independently — no DECNET involvement needed.

Removes: bind-mount volume injection, _LOG_NETWORK bridge, log_target config
field and --log-target CLI flag, TCP syslog forwarding from service templates.
This commit is contained in:
2026-04-10 00:14:14 -04:00
parent 8d023147cc
commit 25ba3fb56a
11 changed files with 368 additions and 328 deletions

View File

@@ -243,8 +243,7 @@ def deploy(
randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
log_target: Optional[str] = typer.Option(None, "--log-target", help="Forward logs to ip:port (e.g. 192.168.1.5:5140)"),
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Write RFC 5424 syslog to this path inside containers (e.g. /var/log/decnet/decnet.log)"),
log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
@@ -298,7 +297,6 @@ def deploy(
)
)
effective_log_target = log_target or ini.log_target
effective_log_file = log_file
try:
decky_configs = _build_deckies_from_ini(
@@ -362,7 +360,6 @@ def deploy(
distros_explicit=distros_list, randomize_distros=randomize_distros,
archetype=arch, mutate_interval=mutate_interval,
)
effective_log_target = log_target
effective_log_file = log_file
# Handle automatic log file for API
@@ -376,18 +373,11 @@ def deploy(
subnet=subnet_cidr,
gateway=effective_gateway,
deckies=decky_configs,
log_target=effective_log_target,
log_file=effective_log_file,
ipvlan=ipvlan,
mutate_interval=mutate_interval,
)
if effective_log_target and not dry_run:
from decnet.logging.forwarder import probe_log_target
if not probe_log_target(effective_log_target):
console.print(f"[yellow]Warning: log target {effective_log_target} is unreachable. "
"Logs will be lost if it stays down.[/]")
from decnet.deployer import deploy as _deploy
_deploy(config, dry_run=dry_run, no_cache=no_cache)

View File

@@ -6,6 +6,12 @@ Network model:
All service containers for that decky share the base's network namespace
via `network_mode: "service:<base>"`. From the outside, every service on
a given decky appears to come from the same IP — exactly like a real host.
Logging model:
Service containers write RFC 5424 lines to stdout. Docker captures them
via the json-file driver. The host-side collector (decnet.web.collector)
streams those logs and writes them to the host log file for the ingester
and rsyslog to consume. No bind mounts or shared volumes are needed.
"""
from pathlib import Path
@@ -17,38 +23,19 @@ from decnet.network import MACVLAN_NETWORK_NAME
from decnet.os_fingerprint import get_os_sysctls
from decnet.services.registry import get_service
_CONTAINER_LOG_DIR = "/var/log/decnet"
_LOG_NETWORK = "decnet_logs"
def _resolve_log_file(log_file: str) -> tuple[str, str]:
"""
Return (host_dir, container_log_path) for a user-supplied log file path.
The host path is resolved to absolute so Docker can bind-mount it.
All containers share the same host directory, mounted at _CONTAINER_LOG_DIR.
"""
host_path = Path(log_file).resolve()
host_dir = str(host_path.parent)
container_path = f"{_CONTAINER_LOG_DIR}/{host_path.name}"
return host_dir, container_path
_DOCKER_LOGGING = {
"driver": "json-file",
"options": {
"max-size": "10m",
"max-file": "5",
},
}
def generate_compose(config: DecnetConfig) -> dict:
"""Build and return the full docker-compose data structure."""
services: dict = {}
log_host_dir: str | None = None
log_container_path: str | None = None
if config.log_file:
log_host_dir, log_container_path = _resolve_log_file(config.log_file)
# Ensure the host log directory exists and is writable by the container's
# non-root 'decnet' user (DEBT-019). mkdir respects umask, so chmod explicitly.
_log_dir = Path(log_host_dir)
_log_dir.mkdir(parents=True, exist_ok=True)
_log_dir.chmod(0o777)
for decky in config.deckies:
base_key = decky.name # e.g. "decky-01"
@@ -65,8 +52,6 @@ def generate_compose(config: DecnetConfig) -> dict:
}
},
}
if config.log_target:
base["networks"][_LOG_NETWORK] = {}
# Inject TCP/IP stack sysctls to spoof the claimed OS fingerprint.
# Only the base container needs this — service containers inherit the
@@ -80,9 +65,7 @@ def generate_compose(config: DecnetConfig) -> dict:
for svc_name in decky.services:
svc = get_service(svc_name)
svc_cfg = decky.service_config.get(svc_name, {})
fragment = svc.compose_fragment(
decky.name, log_target=config.log_target, service_cfg=svc_cfg
)
fragment = svc.compose_fragment(decky.name, service_cfg=svc_cfg)
# Inject the per-decky base image into build services so containers
# vary by distro and don't all fingerprint as debian:bookworm-slim.
@@ -91,12 +74,6 @@ def generate_compose(config: DecnetConfig) -> dict:
fragment.setdefault("environment", {})
fragment["environment"]["HOSTNAME"] = decky.hostname
if log_host_dir and log_container_path:
fragment["environment"]["DECNET_LOG_FILE"] = log_container_path
fragment.setdefault("volumes", [])
mount = f"{log_host_dir}:{_CONTAINER_LOG_DIR}"
if mount not in fragment["volumes"]:
fragment["volumes"].append(mount)
# Share the base container's network — no own IP needed
fragment["network_mode"] = f"service:{base_key}"
@@ -106,6 +83,9 @@ def generate_compose(config: DecnetConfig) -> dict:
fragment.pop("hostname", None)
fragment.pop("networks", None)
# Rotate Docker logs so disk usage is bounded
fragment["logging"] = _DOCKER_LOGGING
services[f"{decky.name}-{svc_name}"] = fragment
# Network definitions
@@ -114,8 +94,6 @@ def generate_compose(config: DecnetConfig) -> dict:
"external": True, # created by network.py before compose up
}
}
if config.log_target:
networks[_LOG_NETWORK] = {"driver": "bridge", "internal": True}
return {
"version": "3.8",

View File

@@ -7,7 +7,7 @@ import json
from pathlib import Path
from typing import Literal
from pydantic import BaseModel, field_validator
from pydantic import BaseModel, field_validator # field_validator used by DeckyConfig
from decnet.distros import random_hostname as _random_hostname
@@ -49,21 +49,10 @@ class DecnetConfig(BaseModel):
subnet: str
gateway: str
deckies: list[DeckyConfig]
log_target: str | None = None # "ip:port" or None
log_file: str | None = None # path for RFC 5424 syslog file output
log_file: str | None = None # host path where the collector writes the log file
ipvlan: bool = False # use IPvlan L2 instead of MACVLAN (WiFi-friendly)
mutate_interval: int | None = DEFAULT_MUTATE_INTERVAL # global automatic rotation interval in minutes
@field_validator("log_target")
@classmethod
def validate_log_target(cls, v: str | None) -> str | None:
if v is None:
return v
parts = v.rsplit(":", 1)
if len(parts) != 2 or not parts[1].isdigit():
raise ValueError("log_target must be in ip:port format, e.g. 192.168.1.5:5140")
return v
def save_state(config: DecnetConfig, compose_path: Path) -> None:
payload = {

View File

@@ -6,7 +6,6 @@ Format:
net=192.168.1.0/24
gw=192.168.1.1
interface=wlp6s0
log_target=192.168.1.5:5140 # optional
[hostname-1]
ip=192.168.1.82 # optional
@@ -71,7 +70,6 @@ class IniConfig:
subnet: str | None = None
gateway: str | None = None
interface: str | None = None
log_target: str | None = None
mutate_interval: int | None = None
deckies: list[DeckySpec] = field(default_factory=list)
custom_services: list[CustomServiceSpec] = field(default_factory=list)
@@ -117,7 +115,6 @@ def _parse_configparser(cp: configparser.ConfigParser) -> IniConfig:
cfg.subnet = g.get("net")
cfg.gateway = g.get("gw")
cfg.interface = g.get("interface")
cfg.log_target = g.get("log_target") or g.get("log-target")
from decnet.services.registry import all_services
known_services = set(all_services().keys())

View File

@@ -1,23 +1,26 @@
import asyncio
import logging
import os
from contextlib import asynccontextmanager
from typing import Any, AsyncGenerator, Optional
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from decnet.env import DECNET_CORS_ORIGINS, DECNET_DEVELOPER
from decnet.env import DECNET_CORS_ORIGINS, DECNET_DEVELOPER, DECNET_INGEST_LOG_FILE
from decnet.web.dependencies import repo
from decnet.web.collector import log_collector_worker
from decnet.web.ingester import log_ingestion_worker
from decnet.web.router import api_router
log = logging.getLogger(__name__)
ingestion_task: Optional[asyncio.Task[Any]] = None
collector_task: Optional[asyncio.Task[Any]] = None
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
global ingestion_task
global ingestion_task, collector_task
for attempt in range(1, 6):
try:
@@ -28,16 +31,24 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
if attempt == 5:
log.error("DB failed to initialize after 5 attempts — startup may be degraded")
await asyncio.sleep(0.5)
# Start background ingestion task
if ingestion_task is None or ingestion_task.done():
ingestion_task = asyncio.create_task(log_ingestion_worker(repo))
# Start Docker log collector (writes to log file; ingester reads from it)
_log_file = os.environ.get("DECNET_INGEST_LOG_FILE", DECNET_INGEST_LOG_FILE)
if _log_file and (collector_task is None or collector_task.done()):
collector_task = asyncio.create_task(log_collector_worker(_log_file))
else:
log.warning("DECNET_INGEST_LOG_FILE not set — Docker log collection disabled.")
yield
# Shutdown ingestion task
if ingestion_task:
ingestion_task.cancel()
# Shutdown background tasks
for task in (ingestion_task, collector_task):
if task:
task.cancel()
app: FastAPI = FastAPI(

180
decnet/web/collector.py Normal file
View File

@@ -0,0 +1,180 @@
"""
Host-side Docker log collector.
Streams stdout from all running decky service containers via the Docker SDK,
writes RFC 5424 lines to <log_file> and parsed JSON records to <log_file>.json.
The ingester tails the .json file; rsyslog can consume the .log file independently.
"""
import asyncio
import json
import logging
import re
from datetime import datetime
from pathlib import Path
from typing import Any, Optional
logger = logging.getLogger("decnet.web.collector")
# ─── RFC 5424 parser ──────────────────────────────────────────────────────────
_RFC5424_RE = re.compile(
r"^<\d+>1 "
r"(\S+) " # 1: TIMESTAMP
r"(\S+) " # 2: HOSTNAME (decky name)
r"(\S+) " # 3: APP-NAME (service)
r"- " # PROCID always NILVALUE
r"(\S+) " # 4: MSGID (event_type)
r"(.+)$", # 5: SD element + optional MSG
)
_SD_BLOCK_RE = re.compile(r'\[decnet@55555\s+(.*?)\]', re.DOTALL)
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "ip")
def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
"""
Parse an RFC 5424 DECNET log line into a structured dict.
Returns None if the line does not match the expected format.
"""
m = _RFC5424_RE.match(line)
if not m:
return None
ts_raw, decky, service, event_type, sd_rest = m.groups()
fields: dict[str, str] = {}
msg: str = ""
if sd_rest.startswith("-"):
msg = sd_rest[1:].lstrip()
elif sd_rest.startswith("["):
block = _SD_BLOCK_RE.search(sd_rest)
if block:
for k, v in _PARAM_RE.findall(block.group(1)):
fields[k] = v.replace('\\"', '"').replace("\\\\", "\\").replace("\\]", "]")
msg_match = re.search(r'\]\s+(.+)$', sd_rest)
if msg_match:
msg = msg_match.group(1).strip()
else:
msg = sd_rest
attacker_ip = "Unknown"
for fname in _IP_FIELDS:
if fname in fields:
attacker_ip = fields[fname]
break
try:
ts_formatted = datetime.fromisoformat(ts_raw).strftime("%Y-%m-%d %H:%M:%S")
except ValueError:
ts_formatted = ts_raw
return {
"timestamp": ts_formatted,
"decky": decky,
"service": service,
"event_type": event_type,
"attacker_ip": attacker_ip,
"fields": fields,
"msg": msg,
"raw_line": line,
}
# ─── Container helpers ────────────────────────────────────────────────────────
def is_service_container(name: str) -> bool:
"""
Return True for decky service containers (decky-NN-service).
Base containers (decky-NN, which run sleep infinity) return False.
"""
return bool(re.match(r'^decky-\d+-\w', name.lstrip("/")))
# ─── Blocking stream worker (runs in a thread) ────────────────────────────────
def _stream_container(container_id: str, log_path: Path, json_path: Path) -> None:
"""Stream logs from one container and append to the host log files."""
import docker # type: ignore[import]
try:
client = docker.from_env()
container = client.containers.get(container_id)
log_stream = container.logs(stream=True, follow=True, stdout=True, stderr=False)
buf = ""
with (
open(log_path, "a", encoding="utf-8") as lf,
open(json_path, "a", encoding="utf-8") as jf,
):
for chunk in log_stream:
buf += chunk.decode("utf-8", errors="replace")
while "\n" in buf:
line, buf = buf.split("\n", 1)
line = line.rstrip()
if not line:
continue
lf.write(line + "\n")
lf.flush()
parsed = parse_rfc5424(line)
if parsed:
jf.write(json.dumps(parsed) + "\n")
jf.flush()
except Exception as exc:
logger.debug("Log stream ended for container %s: %s", container_id, exc)
# ─── Async collector ──────────────────────────────────────────────────────────
async def log_collector_worker(log_file: str) -> None:
"""
Background task: streams Docker logs from all running decky service
containers, writing RFC 5424 lines to log_file and parsed JSON records
to log_file.json for the ingester to consume.
Watches Docker events to pick up containers started after initial scan.
"""
import docker # type: ignore[import]
log_path = Path(log_file)
json_path = log_path.with_suffix(".json")
log_path.parent.mkdir(parents=True, exist_ok=True)
active: dict[str, asyncio.Task[None]] = {}
loop = asyncio.get_running_loop()
def _spawn(container_id: str, container_name: str) -> None:
if container_id not in active or active[container_id].done():
active[container_id] = asyncio.ensure_future(
asyncio.to_thread(_stream_container, container_id, log_path, json_path),
loop=loop,
)
logger.info("Collecting logs from container: %s", container_name)
try:
client = docker.from_env()
# Collect from already-running containers
for container in client.containers.list():
name = container.name.lstrip("/")
if is_service_container(name):
_spawn(container.id, name)
# Watch for new containers starting
def _watch_events() -> None:
for event in client.events(
decode=True,
filters={"type": "container", "event": "start"},
):
name = event.get("Actor", {}).get("Attributes", {}).get("name", "")
cid = event.get("id", "")
if cid and is_service_container(name):
loop.call_soon_threadsafe(_spawn, cid, name)
await asyncio.to_thread(_watch_events)
except asyncio.CancelledError:
for task in active.values():
task.cancel()
raise
except Exception as exc:
logger.error("Collector error: %s", exc)

View File

@@ -50,7 +50,6 @@ async def api_deploy_deckies(req: DeployIniRequest, current_user: str = Depends(
subnet=subnet_cidr,
gateway=gateway,
deckies=[],
log_target=ini.log_target,
log_file=ingest_log_file,
ipvlan=False,
mutate_interval=ini.mutate_interval or DEFAULT_MUTATE_INTERVAL