fix(swarm): ship update tarball from an explicit include-list, never secrets

tar_working_tree walked the whole working tree minus a blocklist that
omitted .env.local, *.key, *.pem, *.crt — so the JWT secret, Fernet key,
admin password, DB creds and TLS private keys fanned out to every worker
on each update push.

Invert to an allowlist (DEFAULT_INCLUDES = pyproject.toml + LICENSE +
README.md + decnet/), the exact surface 'pip install .' needs; decnet/
carries its own package-data. A defensive _HYGIENE_PATTERNS layer drops
secret-/churn-shaped files even if nested under decnet/. extra_excludes
can still narrow but can no longer widen past the allowlist.

Verified against the live repo: the bundle carries the package + metadata
and zero secret/db/log/pyc files, and pip-installs clean from the
extracted tree.
This commit is contained in:
2026-05-30 17:26:23 -04:00
parent a4193d7022
commit 28327a9b4e
2 changed files with 144 additions and 72 deletions

View File

@@ -1,5 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""tar_working_tree: exclude filter, tarball validity, git SHA detection."""
"""tar_working_tree: include allowlist, secret exclusion, tarball validity, git SHA."""
from __future__ import annotations
import io
@@ -14,49 +14,92 @@ def _tree_names(data: bytes) -> set[str]:
return {m.name for m in tar.getmembers()}
def test_tar_excludes_default_patterns(tmp_path: pathlib.Path) -> None:
(tmp_path / "decnet").mkdir()
(tmp_path / "decnet" / "keep.py").write_text("x = 1")
(tmp_path / ".venv").mkdir()
(tmp_path / ".venv" / "pyvenv.cfg").write_text("junk")
(tmp_path / ".git").mkdir()
(tmp_path / ".git" / "HEAD").write_text("ref: refs/heads/main\n")
(tmp_path / "decnet" / "__pycache__").mkdir()
(tmp_path / "decnet" / "__pycache__" / "keep.cpython-311.pyc").write_text("bytecode")
(tmp_path / "wiki-checkout").mkdir()
(tmp_path / "wiki-checkout" / "Home.md").write_text("# wiki")
(tmp_path / "run.db").write_text("sqlite")
(tmp_path / "master.log").write_text("log")
data = tar_working_tree(tmp_path)
names = _tree_names(data)
assert "decnet/keep.py" in names
assert all(".venv" not in n for n in names)
assert all(".git" not in n for n in names)
assert all("__pycache__" not in n for n in names)
assert all("wiki-checkout" not in n for n in names)
assert "run.db" not in names
assert "master.log" not in names
def _seed_tree(root: pathlib.Path) -> None:
"""A realistic master working tree: package + metadata + a pile of junk
and secrets that must NOT ship."""
(root / "decnet").mkdir()
(root / "decnet" / "__init__.py").write_text("")
(root / "decnet" / "agent.py").write_text("x = 1")
(root / "decnet" / "templates").mkdir()
(root / "decnet" / "templates" / "base.j2").write_text("data")
(root / "decnet" / "__pycache__").mkdir()
(root / "decnet" / "__pycache__" / "agent.cpython-311.pyc").write_text("bytecode")
(root / "pyproject.toml").write_text("[project]\nname='decnet'\n")
(root / "LICENSE").write_text("AGPL")
(root / "README.md").write_text("# decnet")
# ---- secrets / junk that the OLD exclude-list would have leaked ----
(root / ".env.local").write_text("DECNET_JWT_SECRET=topsecret")
(root / ".env").write_text("X=Y")
(root / "tls.key").write_text("-----BEGIN PRIVATE KEY-----")
(root / "ca.pem").write_text("-----BEGIN CERTIFICATE-----")
(root / "decnet.db").write_text("sqlite")
(root / "master.log").write_text("log")
(root / "decnet_web").mkdir() # dashboard source — not a package
(root / "decnet_web" / "app.tsx").write_text("ui")
(root / "tests").mkdir()
(root / "tests" / "test_x.py").write_text("assert True")
def test_tar_accepts_extra_excludes(tmp_path: pathlib.Path) -> None:
(tmp_path / "a.py").write_text("x")
(tmp_path / "secret.env").write_text("TOKEN=abc")
data = tar_working_tree(tmp_path, extra_excludes=["secret.env"])
names = _tree_names(data)
assert "a.py" in names
assert "secret.env" not in names
def test_tar_ships_only_the_package_and_metadata(tmp_path: pathlib.Path) -> None:
_seed_tree(tmp_path)
names = _tree_names(tar_working_tree(tmp_path))
assert "decnet/agent.py" in names
assert "decnet/__init__.py" in names
assert "decnet/templates/base.j2" in names # package-data ships
assert "pyproject.toml" in names
assert "LICENSE" in names
assert "README.md" in names
# Nothing outside the allowlist:
assert not any(n.startswith("decnet_web") for n in names)
assert not any(n.startswith("tests") for n in names)
def test_tar_never_ships_secrets_or_db_or_churn(tmp_path: pathlib.Path) -> None:
# The whole point of the include-list: these existed at the root and the
# bundle must not carry a single one of them.
_seed_tree(tmp_path)
names = _tree_names(tar_working_tree(tmp_path))
for forbidden in (".env.local", ".env", "tls.key", "ca.pem", "decnet.db", "master.log"):
assert forbidden not in names, f"leaked {forbidden}"
assert not any("__pycache__" in n or n.endswith(".pyc") for n in names)
def test_secret_nested_under_package_is_still_dropped(tmp_path: pathlib.Path) -> None:
# Defensive hygiene: even a secret-shaped file *inside* decnet/ is excluded.
_seed_tree(tmp_path)
(tmp_path / "decnet" / "worker.key").write_text("oops")
(tmp_path / "decnet" / ".env.prod").write_text("SECRET=1")
names = _tree_names(tar_working_tree(tmp_path))
assert "decnet/worker.key" not in names
assert "decnet/.env.prod" not in names
assert "decnet/agent.py" in names # real source still present
def test_extra_excludes_narrows_within_allowlist(tmp_path: pathlib.Path) -> None:
_seed_tree(tmp_path)
names = _tree_names(tar_working_tree(tmp_path, extra_excludes=["decnet/agent.py"]))
assert "decnet/agent.py" not in names
assert "decnet/__init__.py" in names
def test_extra_excludes_cannot_widen_beyond_allowlist(tmp_path: pathlib.Path) -> None:
# Passing a non-allowlisted include via extra_excludes is meaningless —
# excludes can only remove, never add. decnet_web stays out.
_seed_tree(tmp_path)
names = _tree_names(tar_working_tree(tmp_path, extra_excludes=[]))
assert not any(n.startswith("decnet_web") for n in names)
def test_tar_skips_symlinks(tmp_path: pathlib.Path) -> None:
(tmp_path / "real.txt").write_text("hi")
(tmp_path / "decnet").mkdir()
(tmp_path / "decnet" / "real.py").write_text("hi")
try:
(tmp_path / "link.txt").symlink_to(tmp_path / "real.txt")
(tmp_path / "decnet" / "link.py").symlink_to(tmp_path / "decnet" / "real.py")
except (OSError, NotImplementedError):
return # platform doesn't support symlinks — skip
names = _tree_names(tar_working_tree(tmp_path))
assert "real.txt" in names
assert "link.txt" not in names
assert "decnet/real.py" in names
assert "decnet/link.py" not in names
def test_detect_git_sha_from_ref(tmp_path: pathlib.Path) -> None: