feat(1.1): decnet supervise batch group + systemd unit (C5)

Hosts reconcile/enrich/orchestrate/mutate in one process via the
supervision primitive: one import floor, one shared repo/DB pool instead
of 4. Static group registry (membership is architectural, not a knob);
factories lazy-import only the hosted workers. systemd unit Conflicts=
the individual units it replaces and documents the union-of-privileges
cost. Worker code unchanged — any member is extractable by editing _build_specs.
This commit is contained in:
2026-06-17 16:50:09 -04:00
parent 12aaa9d820
commit 3a46864f30
4 changed files with 161 additions and 1 deletions

View File

@@ -37,6 +37,7 @@ from . import (
realism, realism,
reconciler, reconciler,
sniffer, sniffer,
supervise,
swarm, swarm,
swarmctl, swarmctl,
topology, topology,
@@ -61,7 +62,7 @@ for _mod in (
swarm, swarm,
deploy, lifecycle, workers, inventory, deploy, lifecycle, workers, inventory,
web, profiler, orchestrator, realism, reconciler, sniffer, db, web, profiler, orchestrator, realism, reconciler, sniffer, db,
topology, bus, geoip, init, webhook, canary, ttp, topology, bus, geoip, init, webhook, canary, ttp, supervise,
): ):
_mod.register(app) _mod.register(app)

84
decnet/cli/supervise.py Normal file
View File

@@ -0,0 +1,84 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""``decnet supervise <group>`` — host a co-resident group of workers in one
process, paying the import floor (and the DB connection pool) once instead of
once per worker. See ``development/RELEASE-1.1.md``.
Each worker keeps its own restart loop (see :mod:`decnet.supervisor`), so this
trades per-worker systemd granularity for RAM — a worker can always be pulled
back out to its own ``decnet <worker>`` unit by removing it from the group spec
below; nothing about the worker's own code changes.
"""
from __future__ import annotations
import typer
from . import utils as _utils
from .utils import console, log
# Groups are intentionally a small static registry, not config — the membership
# is an architectural decision, not an operator knob.
_GROUPS = ("batch",)
async def _build_specs(group: str):
"""Return ``[(name, factory), ...]`` for *group*, lazy-importing only the
workers it hosts and initializing the shared ``repo`` once.
Factories return a fresh coroutine each call so :func:`supervise` can restart
them. Intervals match the standalone units' defaults.
# ponytail: defaults hardcoded to match the per-worker units; add CLI knobs
# only if an operator actually needs to retune a consolidated group.
"""
if group == "batch":
from decnet.fleet.reconciler_worker import fleet_reconciler_worker
from decnet.intel.worker import run_intel_loop
from decnet.mutator import run_watch_loop
from decnet.orchestrator import orchestrator_worker
from decnet.web.dependencies import repo
await repo.initialize() # shared by every batch worker → one DB pool
return [
("reconcile", lambda: fleet_reconciler_worker(repo, interval=30)),
("enrich", lambda: run_intel_loop(repo, poll_interval_secs=60.0, ttl_hours=24)),
("orchestrate", lambda: orchestrator_worker(repo, interval=60, llm_enabled=None)),
("mutate", lambda: run_watch_loop(repo)),
]
raise ValueError(f"unknown supervise group: {group}")
def register(app: typer.Typer) -> None:
@app.command(name="supervise")
def supervise_cmd(
group: str = typer.Argument(
..., help=f"Worker group to host. One of: {', '.join(_GROUPS)}"
),
daemon: bool = typer.Option(
False, "--daemon", "-d", help="Detach to background as a daemon process"
),
) -> None:
"""Host a co-resident worker group in one process (shared import floor + DB pool)."""
import asyncio
from decnet.supervisor import run_group
if group not in _GROUPS:
console.print(
f"[red]unknown group {group!r}; known groups: {', '.join(_GROUPS)}[/]"
)
raise typer.Exit(2)
if daemon:
log.info("supervise %s daemonizing", group)
_utils._daemonize()
log.info("supervise group=%s starting", group)
console.print(f"[bold cyan]Supervisor starting[/] group={group}")
async def _run() -> None:
specs = await _build_specs(group)
await run_group(specs)
try:
asyncio.run(_run())
except KeyboardInterrupt:
console.print("\n[yellow]Supervisor stopped.[/]")

View File

@@ -0,0 +1,49 @@
[Unit]
Description=DECNET Batch Supervisor (hosts reconcile + enrich + orchestrate + mutate in one process)
Documentation=https://git.resacachile.cl/anti/DECNET/wiki/Workers#supervisor
After=network-online.target decnet-bus.service
Wants=network-online.target decnet-bus.service
# Replaces the individual decnet-reconciler / decnet-enrich / decnet-orchestrator
# / decnet-mutator units. Do NOT enable those alongside this one.
Conflicts=decnet-reconciler.service decnet-orchestrator.service decnet-mutator.service
[Service]
Type=simple
User={{ user }}
Group={{ group }}
WorkingDirectory={{ install_dir }}
EnvironmentFile=-{{ install_dir }}/.env.local
Environment=DECNET_SYSTEM_LOGS=/var/log/decnet/decnet.supervise-batch.log
ExecStart={{ venv_dir }}/bin/decnet supervise batch
StandardOutput=append:/var/log/decnet/decnet.supervise-batch.log
StandardError=append:/var/log/decnet/decnet.supervise-batch.log
# CONSOLIDATION COST: this unit holds the UNION of its members' privileges —
# docker socket (reconcile + mutate observe/mutate containers) AND network egress
# (enrich fans out to threat-intel providers). That is a wider blast radius than
# any single worker had. Acceptable for the batch group; weigh it before adding
# a member that needs materially more (e.g. raw sockets).
SupplementaryGroups=docker
CapabilityBoundingSet=
AmbientCapabilities=
# Security Hardening
NoNewPrivileges=yes
ProtectSystem=full
ProtectHome=read-only
PrivateTmp=yes
ProtectKernelTunables=yes
ProtectKernelModules=yes
ProtectControlGroups=yes
RestrictSUIDSGID=yes
LockPersonality=yes
ReadOnlyPaths=/var/lib/decnet
ReadWritePaths={{ install_dir }} /var/log/decnet
Restart=on-failure
RestartSec=5
TimeoutStopSec=20
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,26 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""CLI surface for ``decnet supervise`` (DECNET 1.1 consolidation)."""
from __future__ import annotations
from typer.testing import CliRunner
from decnet.cli import app
from decnet.cli.supervise import _GROUPS
runner = CliRunner()
def test_supervise_is_registered():
result = runner.invoke(app, ["supervise", "--help"])
assert result.exit_code == 0
assert "group" in result.stdout.lower()
def test_unknown_group_exits_2():
result = runner.invoke(app, ["supervise", "definitely-not-a-group"])
assert result.exit_code == 2
assert "unknown group" in result.stdout
def test_batch_group_is_known():
assert "batch" in _GROUPS