refactor(swarm): enroll bundle switches from exclude list to include list
Exclude lists fail open — anything new at the master's repo root (venvs, logs, dev notes, .env.local, local DB dumps) silently leaks into every agent bundle. On this box a stray .311 venv (335 MB) + logs/ (220 MB) bloated the tarball to ~150 MB and blew test_enroll_bundle timeouts. Replace _EXCLUDES + _is_excluded with _INCLUDED_ROOT_FILES + _INCLUDED_DIRS + _EXCLUDED_DECNET_SUBTREES and iterate via os.walk with in-place dirnames[:] pruning so master-only subtrees (decnet/web, decnet/mutator, decnet/profiler) and __pycache__ aren't descended into at all. Bundle contents are now strictly: pyproject.toml + the decnet/ package minus the three master-only subtrees. Synthetic entries (INI, certs, systemd units) unchanged — they were always added inline, not from the tree walk. test_enroll_bundle.py: 20/20 pass in 24s (was timing out at 15s/test).
This commit is contained in:
@@ -18,7 +18,6 @@ the embedded payload. Two URLs, one paste.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import fnmatch
|
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
@@ -44,46 +43,36 @@ BUNDLE_TTL = timedelta(minutes=5)
|
|||||||
BUNDLE_DIR = pathlib.Path(os.environ.get("DECNET_ENROLL_BUNDLE_DIR", "/tmp/decnet-enroll")) # nosec B108 - short-lived 0600 bundle cache, env-overridable
|
BUNDLE_DIR = pathlib.Path(os.environ.get("DECNET_ENROLL_BUNDLE_DIR", "/tmp/decnet-enroll")) # nosec B108 - short-lived 0600 bundle cache, env-overridable
|
||||||
SWEEP_INTERVAL_SECS = 30
|
SWEEP_INTERVAL_SECS = 30
|
||||||
|
|
||||||
# Paths excluded from the bundled tarball. Matches the intent of
|
# Include list — explicit set of paths that ship to the agent. An
|
||||||
# decnet.swarm.tar_tree.DEFAULT_EXCLUDES but narrower — we never want
|
# include list fails closed: anything new on the master (stray .env, dev
|
||||||
# tests, dev scaffolding, the master's DB, or the frontend source tree
|
# venvs, data dumps, editor scratch dirs) cannot leak into the bundle
|
||||||
# shipped to an agent.
|
# just because we forgot to exclude it.
|
||||||
_EXCLUDES: tuple[str, ...] = (
|
#
|
||||||
".venv", ".venv/*", "**/.venv/*",
|
# What the agent actually needs:
|
||||||
"__pycache__", "**/__pycache__", "**/__pycache__/*",
|
# * pyproject.toml at the repo root, so ``pip install`` works against
|
||||||
".git", ".git/*",
|
# the bundle during enroll_bootstrap.sh.
|
||||||
".pytest_cache", ".pytest_cache/*",
|
# * the ``decnet/`` package, MINUS the master-only subtrees called out
|
||||||
".mypy_cache", ".mypy_cache/*",
|
# by _EXCLUDED_DECNET_SUBTREES — those never import on an agent host.
|
||||||
"*.egg-info", "*.egg-info/*",
|
# Everything else the bootstrap needs (the INI, certs, systemd units) is
|
||||||
# setuptools build/ staging dir — created by `pip install` and leaks a
|
# synthesized in-memory by ``_build_tarball`` below — it never hits the
|
||||||
# nested decnet_web/node_modules/ copy into the bundle otherwise.
|
# filesystem walk.
|
||||||
"build", "build/*", "build/**",
|
|
||||||
"*.pyc", "*.pyo",
|
# Top-level files shipped verbatim. Relative to the repo root.
|
||||||
"*.db", "*.db-wal", "*.db-shm", "decnet.db*",
|
_INCLUDED_ROOT_FILES: tuple[str, ...] = ("pyproject.toml",)
|
||||||
"*.log",
|
|
||||||
"tests", "tests/*",
|
# Top-level directories walked into the bundle. Relative to the repo root.
|
||||||
"development", "development/*",
|
_INCLUDED_DIRS: tuple[str, ...] = ("decnet",)
|
||||||
"wiki-checkout", "wiki-checkout/*",
|
|
||||||
# Frontend is master-only; agents never serve UI.
|
# Subtrees of an included directory that must NOT ship. Paths are
|
||||||
"decnet_web", "decnet_web/*", "decnet_web/**",
|
# relative to the repo root, forward-slash separated.
|
||||||
# Master FastAPI app and everything under decnet/web/ — no agent-side
|
# * ``decnet/web`` — FastAPI master app, unused by agents.
|
||||||
# code imports it. The agent/updater/forwarder/collector/prober/sniffer
|
# * ``decnet/mutator`` — schedules respawns swarm-wide; master-only.
|
||||||
# entrypoints are all under decnet/agent, decnet/updater, decnet/swarm,
|
# * ``decnet/profiler`` — rebuilds profiles against the master DB.
|
||||||
# decnet/collector, decnet/prober, decnet/sniffer.
|
_EXCLUDED_DECNET_SUBTREES: frozenset[str] = frozenset({
|
||||||
"decnet/web", "decnet/web/*", "decnet/web/**",
|
"decnet/web",
|
||||||
# Mutator + Profiler are master-only (mutator schedules respawns across
|
"decnet/mutator",
|
||||||
# the swarm; profiler rebuilds attacker profiles against the master DB).
|
"decnet/profiler",
|
||||||
"decnet/mutator", "decnet/mutator/*", "decnet/mutator/**",
|
})
|
||||||
"decnet/profiler", "decnet/profiler/*", "decnet/profiler/**",
|
|
||||||
"decnet-state.json",
|
|
||||||
"master.log", "master.json",
|
|
||||||
"decnet.tar",
|
|
||||||
# Dev-host env/config leaks — these bake the master's absolute paths into
|
|
||||||
# the agent and point log handlers at directories that don't exist on the
|
|
||||||
# worker VM.
|
|
||||||
".env", ".env.*", "**/.env", "**/.env.*",
|
|
||||||
"decnet.ini", "**/decnet.ini",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -176,15 +165,49 @@ def _repo_root() -> pathlib.Path:
|
|||||||
return pathlib.Path(__file__).resolve().parents[4]
|
return pathlib.Path(__file__).resolve().parents[4]
|
||||||
|
|
||||||
|
|
||||||
def _is_excluded(rel: str) -> bool:
|
def _iter_included(root: pathlib.Path) -> "list[tuple[pathlib.Path, str]]":
|
||||||
parts = pathlib.PurePosixPath(rel).parts
|
"""Return ``(full_path, arcname)`` pairs for every file the agent needs.
|
||||||
for pat in _EXCLUDES:
|
|
||||||
if fnmatch.fnmatch(rel, pat):
|
Walk is pruned in-place: ``__pycache__`` and the master-only subtrees
|
||||||
return True
|
in :data:`_EXCLUDED_DECNET_SUBTREES` are skipped at the directory
|
||||||
for i in range(1, len(parts) + 1):
|
level so we never descend into them (critical on dev boxes where
|
||||||
if fnmatch.fnmatch("/".join(parts[:i]), pat):
|
``decnet/web/`` pulls in a fat frontend tree via package-data).
|
||||||
return True
|
"""
|
||||||
return False
|
found: list[tuple[pathlib.Path, str]] = []
|
||||||
|
|
||||||
|
# Top-level files.
|
||||||
|
for rel in _INCLUDED_ROOT_FILES:
|
||||||
|
p = root / rel
|
||||||
|
if p.is_file():
|
||||||
|
found.append((p, rel))
|
||||||
|
|
||||||
|
# Top-level dirs, pruned.
|
||||||
|
for top in _INCLUDED_DIRS:
|
||||||
|
start = root / top
|
||||||
|
if not start.is_dir():
|
||||||
|
continue
|
||||||
|
for dirpath, dirnames, filenames in os.walk(start, topdown=True, followlinks=False):
|
||||||
|
dir_path = pathlib.Path(dirpath)
|
||||||
|
rel_dir = dir_path.relative_to(root).as_posix()
|
||||||
|
|
||||||
|
# Prune excluded subtrees + cache dirs BEFORE descending.
|
||||||
|
dirnames[:] = [
|
||||||
|
d for d in dirnames
|
||||||
|
if d != "__pycache__"
|
||||||
|
and f"{rel_dir}/{d}" not in _EXCLUDED_DECNET_SUBTREES
|
||||||
|
]
|
||||||
|
|
||||||
|
for fn in filenames:
|
||||||
|
if fn.endswith((".pyc", ".pyo")):
|
||||||
|
continue
|
||||||
|
full = dir_path / fn
|
||||||
|
if full.is_symlink():
|
||||||
|
continue
|
||||||
|
found.append((full, f"{rel_dir}/{fn}"))
|
||||||
|
|
||||||
|
# Deterministic tarball ordering.
|
||||||
|
found.sort(key=lambda t: t[1])
|
||||||
|
return found
|
||||||
|
|
||||||
|
|
||||||
def _render_decnet_ini(
|
def _render_decnet_ini(
|
||||||
@@ -231,7 +254,9 @@ def _build_tarball(
|
|||||||
use_ipvlan: bool = False,
|
use_ipvlan: bool = False,
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
"""Gzipped tarball with:
|
"""Gzipped tarball with:
|
||||||
- full repo source (minus excludes)
|
- agent-required source (see :data:`_INCLUDED_DIRS` /
|
||||||
|
:data:`_INCLUDED_ROOT_FILES`; master-only decnet/ subtrees
|
||||||
|
pruned)
|
||||||
- etc/decnet/decnet.ini (pre-baked for mode=agent)
|
- etc/decnet/decnet.ini (pre-baked for mode=agent)
|
||||||
- home/.decnet/agent/{ca.crt,worker.crt,worker.key}
|
- home/.decnet/agent/{ca.crt,worker.crt,worker.key}
|
||||||
- home/.decnet/updater/{ca.crt,updater.crt,updater.key} (if updater_issued)
|
- home/.decnet/updater/{ca.crt,updater.crt,updater.key} (if updater_issued)
|
||||||
@@ -240,13 +265,8 @@ def _build_tarball(
|
|||||||
root = _repo_root()
|
root = _repo_root()
|
||||||
buf = io.BytesIO()
|
buf = io.BytesIO()
|
||||||
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
|
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
|
||||||
for path in sorted(root.rglob("*")):
|
for path, arcname in _iter_included(root):
|
||||||
rel = path.relative_to(root).as_posix()
|
tar.add(path, arcname=arcname, recursive=False)
|
||||||
if _is_excluded(rel):
|
|
||||||
continue
|
|
||||||
if path.is_symlink() or path.is_dir():
|
|
||||||
continue
|
|
||||||
tar.add(path, arcname=rel, recursive=False)
|
|
||||||
|
|
||||||
_add_bytes(
|
_add_bytes(
|
||||||
tar,
|
tar,
|
||||||
|
|||||||
Reference in New Issue
Block a user