refactor(swarm): enroll bundle switches from exclude list to include list

Exclude lists fail open — anything new at the master's repo root (venvs,
logs, dev notes, .env.local, local DB dumps) silently leaks into every
agent bundle. On this box a stray .311 venv (335 MB) + logs/ (220 MB)
bloated the tarball to ~150 MB and blew test_enroll_bundle timeouts.

Replace _EXCLUDES + _is_excluded with _INCLUDED_ROOT_FILES +
_INCLUDED_DIRS + _EXCLUDED_DECNET_SUBTREES and iterate via os.walk with
in-place dirnames[:] pruning so master-only subtrees (decnet/web,
decnet/mutator, decnet/profiler) and __pycache__ aren't descended into
at all.

Bundle contents are now strictly: pyproject.toml + the decnet/ package
minus the three master-only subtrees. Synthetic entries (INI, certs,
systemd units) unchanged — they were always added inline, not from the
tree walk.

test_enroll_bundle.py: 20/20 pass in 24s (was timing out at 15s/test).
This commit is contained in:
2026-04-23 21:47:47 -04:00
parent ea95a009df
commit 0eb0b32c7a

View File

@@ -18,7 +18,6 @@ the embedded payload. Two URLs, one paste.
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import fnmatch
import io import io
import os import os
import pathlib import pathlib
@@ -44,46 +43,36 @@ BUNDLE_TTL = timedelta(minutes=5)
BUNDLE_DIR = pathlib.Path(os.environ.get("DECNET_ENROLL_BUNDLE_DIR", "/tmp/decnet-enroll")) # nosec B108 - short-lived 0600 bundle cache, env-overridable BUNDLE_DIR = pathlib.Path(os.environ.get("DECNET_ENROLL_BUNDLE_DIR", "/tmp/decnet-enroll")) # nosec B108 - short-lived 0600 bundle cache, env-overridable
SWEEP_INTERVAL_SECS = 30 SWEEP_INTERVAL_SECS = 30
# Paths excluded from the bundled tarball. Matches the intent of # Include list — explicit set of paths that ship to the agent. An
# decnet.swarm.tar_tree.DEFAULT_EXCLUDES but narrower — we never want # include list fails closed: anything new on the master (stray .env, dev
# tests, dev scaffolding, the master's DB, or the frontend source tree # venvs, data dumps, editor scratch dirs) cannot leak into the bundle
# shipped to an agent. # just because we forgot to exclude it.
_EXCLUDES: tuple[str, ...] = ( #
".venv", ".venv/*", "**/.venv/*", # What the agent actually needs:
"__pycache__", "**/__pycache__", "**/__pycache__/*", # * pyproject.toml at the repo root, so ``pip install`` works against
".git", ".git/*", # the bundle during enroll_bootstrap.sh.
".pytest_cache", ".pytest_cache/*", # * the ``decnet/`` package, MINUS the master-only subtrees called out
".mypy_cache", ".mypy_cache/*", # by _EXCLUDED_DECNET_SUBTREES — those never import on an agent host.
"*.egg-info", "*.egg-info/*", # Everything else the bootstrap needs (the INI, certs, systemd units) is
# setuptools build/ staging dir — created by `pip install` and leaks a # synthesized in-memory by ``_build_tarball`` below — it never hits the
# nested decnet_web/node_modules/ copy into the bundle otherwise. # filesystem walk.
"build", "build/*", "build/**",
"*.pyc", "*.pyo", # Top-level files shipped verbatim. Relative to the repo root.
"*.db", "*.db-wal", "*.db-shm", "decnet.db*", _INCLUDED_ROOT_FILES: tuple[str, ...] = ("pyproject.toml",)
"*.log",
"tests", "tests/*", # Top-level directories walked into the bundle. Relative to the repo root.
"development", "development/*", _INCLUDED_DIRS: tuple[str, ...] = ("decnet",)
"wiki-checkout", "wiki-checkout/*",
# Frontend is master-only; agents never serve UI. # Subtrees of an included directory that must NOT ship. Paths are
"decnet_web", "decnet_web/*", "decnet_web/**", # relative to the repo root, forward-slash separated.
# Master FastAPI app and everything under decnet/web/ — no agent-side # * ``decnet/web`` — FastAPI master app, unused by agents.
# code imports it. The agent/updater/forwarder/collector/prober/sniffer # * ``decnet/mutator`` — schedules respawns swarm-wide; master-only.
# entrypoints are all under decnet/agent, decnet/updater, decnet/swarm, # * ``decnet/profiler`` — rebuilds profiles against the master DB.
# decnet/collector, decnet/prober, decnet/sniffer. _EXCLUDED_DECNET_SUBTREES: frozenset[str] = frozenset({
"decnet/web", "decnet/web/*", "decnet/web/**", "decnet/web",
# Mutator + Profiler are master-only (mutator schedules respawns across "decnet/mutator",
# the swarm; profiler rebuilds attacker profiles against the master DB). "decnet/profiler",
"decnet/mutator", "decnet/mutator/*", "decnet/mutator/**", })
"decnet/profiler", "decnet/profiler/*", "decnet/profiler/**",
"decnet-state.json",
"master.log", "master.json",
"decnet.tar",
# Dev-host env/config leaks — these bake the master's absolute paths into
# the agent and point log handlers at directories that don't exist on the
# worker VM.
".env", ".env.*", "**/.env", "**/.env.*",
"decnet.ini", "**/decnet.ini",
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -176,15 +165,49 @@ def _repo_root() -> pathlib.Path:
return pathlib.Path(__file__).resolve().parents[4] return pathlib.Path(__file__).resolve().parents[4]
def _is_excluded(rel: str) -> bool: def _iter_included(root: pathlib.Path) -> "list[tuple[pathlib.Path, str]]":
parts = pathlib.PurePosixPath(rel).parts """Return ``(full_path, arcname)`` pairs for every file the agent needs.
for pat in _EXCLUDES:
if fnmatch.fnmatch(rel, pat): Walk is pruned in-place: ``__pycache__`` and the master-only subtrees
return True in :data:`_EXCLUDED_DECNET_SUBTREES` are skipped at the directory
for i in range(1, len(parts) + 1): level so we never descend into them (critical on dev boxes where
if fnmatch.fnmatch("/".join(parts[:i]), pat): ``decnet/web/`` pulls in a fat frontend tree via package-data).
return True """
return False found: list[tuple[pathlib.Path, str]] = []
# Top-level files.
for rel in _INCLUDED_ROOT_FILES:
p = root / rel
if p.is_file():
found.append((p, rel))
# Top-level dirs, pruned.
for top in _INCLUDED_DIRS:
start = root / top
if not start.is_dir():
continue
for dirpath, dirnames, filenames in os.walk(start, topdown=True, followlinks=False):
dir_path = pathlib.Path(dirpath)
rel_dir = dir_path.relative_to(root).as_posix()
# Prune excluded subtrees + cache dirs BEFORE descending.
dirnames[:] = [
d for d in dirnames
if d != "__pycache__"
and f"{rel_dir}/{d}" not in _EXCLUDED_DECNET_SUBTREES
]
for fn in filenames:
if fn.endswith((".pyc", ".pyo")):
continue
full = dir_path / fn
if full.is_symlink():
continue
found.append((full, f"{rel_dir}/{fn}"))
# Deterministic tarball ordering.
found.sort(key=lambda t: t[1])
return found
def _render_decnet_ini( def _render_decnet_ini(
@@ -231,7 +254,9 @@ def _build_tarball(
use_ipvlan: bool = False, use_ipvlan: bool = False,
) -> bytes: ) -> bytes:
"""Gzipped tarball with: """Gzipped tarball with:
- full repo source (minus excludes) - agent-required source (see :data:`_INCLUDED_DIRS` /
:data:`_INCLUDED_ROOT_FILES`; master-only decnet/ subtrees
pruned)
- etc/decnet/decnet.ini (pre-baked for mode=agent) - etc/decnet/decnet.ini (pre-baked for mode=agent)
- home/.decnet/agent/{ca.crt,worker.crt,worker.key} - home/.decnet/agent/{ca.crt,worker.crt,worker.key}
- home/.decnet/updater/{ca.crt,updater.crt,updater.key} (if updater_issued) - home/.decnet/updater/{ca.crt,updater.crt,updater.key} (if updater_issued)
@@ -240,13 +265,8 @@ def _build_tarball(
root = _repo_root() root = _repo_root()
buf = io.BytesIO() buf = io.BytesIO()
with tarfile.open(fileobj=buf, mode="w:gz") as tar: with tarfile.open(fileobj=buf, mode="w:gz") as tar:
for path in sorted(root.rglob("*")): for path, arcname in _iter_included(root):
rel = path.relative_to(root).as_posix() tar.add(path, arcname=arcname, recursive=False)
if _is_excluded(rel):
continue
if path.is_symlink() or path.is_dir():
continue
tar.add(path, arcname=rel, recursive=False)
_add_bytes( _add_bytes(
tar, tar,