fix(swarm): ship update tarball from an explicit include-list, never secrets
tar_working_tree walked the whole working tree minus a blocklist that omitted .env.local, *.key, *.pem, *.crt — so the JWT secret, Fernet key, admin password, DB creds and TLS private keys fanned out to every worker on each update push. Invert to an allowlist (DEFAULT_INCLUDES = pyproject.toml + LICENSE + README.md + decnet/), the exact surface 'pip install .' needs; decnet/ carries its own package-data. A defensive _HYGIENE_PATTERNS layer drops secret-/churn-shaped files even if nested under decnet/. extra_excludes can still narrow but can no longer widen past the allowlist. Verified against the live repo: the bundle carries the package + metadata and zero secret/db/log/pyc files, and pip-installs clean from the extracted tree.
This commit is contained in:
@@ -1,13 +1,20 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""Build a gzipped tarball of the master's working tree for pushing to workers.
|
"""Build a gzipped tarball of the installable DECNET package for workers.
|
||||||
|
|
||||||
Always excludes the obvious large / secret / churn paths: ``.venv/``,
|
The tarball is extracted and ``pip install``-ed on each worker, so it ships
|
||||||
``__pycache__/``, ``.git/``, ``wiki-checkout/``, ``*.db*``, ``*.log``. The
|
*only* what that build needs — enumerated by an INCLUDE allowlist, never a
|
||||||
caller can supply additional exclude globs.
|
blocklist. This is the trust-boundary rule: a bundle crossing to another host
|
||||||
|
enumerates what it carries, so a stray ``.env.local``, TLS private key, SQLite
|
||||||
|
DB, or the operator's whole working tree can never be swept in by an exclude
|
||||||
|
list that simply forgot a pattern.
|
||||||
|
|
||||||
Deliberately does NOT invoke git — the tree is what the operator has on
|
``DEFAULT_INCLUDES`` is the package surface (``decnet/`` + packaging metadata);
|
||||||
disk (staged + unstaged + untracked). That's the whole point; the scp
|
``_HYGIENE_PATTERNS`` is a defensive second layer that drops secret-/churn-
|
||||||
workflow we're replacing also shipped the live tree.
|
shaped files even if one somehow lives under an included directory. Callers may
|
||||||
|
pass ``extra_excludes`` to narrow further, but cannot add anything outside the
|
||||||
|
allowlist.
|
||||||
|
|
||||||
|
Deliberately does NOT invoke git — the included dirs are taken from disk as-is.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@@ -17,22 +24,26 @@ import pathlib
|
|||||||
import tarfile
|
import tarfile
|
||||||
from typing import Iterable, Optional
|
from typing import Iterable, Optional
|
||||||
|
|
||||||
DEFAULT_EXCLUDES = (
|
# The ONLY top-level paths shipped to a worker: the importable package plus the
|
||||||
".venv", ".venv/*",
|
# metadata `pip install .` needs (setuptools build-meta + license-files=LICENSE).
|
||||||
"**/.venv/*",
|
# decnet/ carries its own package-data (templates/, canary/*). Everything else
|
||||||
"__pycache__", "**/__pycache__", "**/__pycache__/*",
|
# in the working tree — secrets, DBs, logs, the dashboard source, tests, build
|
||||||
".git", ".git/*",
|
# artifacts — is excluded by construction.
|
||||||
"wiki-checkout", "wiki-checkout/*",
|
DEFAULT_INCLUDES = (
|
||||||
|
"pyproject.toml",
|
||||||
|
"LICENSE",
|
||||||
|
"README.md",
|
||||||
|
"decnet",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Defensive hygiene applied WITHIN an included path: never ship build churn or
|
||||||
|
# anything secret-shaped, matched on the basename so it catches any nesting.
|
||||||
|
_HYGIENE_PATTERNS = (
|
||||||
"*.pyc", "*.pyo",
|
"*.pyc", "*.pyo",
|
||||||
"*.db", "*.db-wal", "*.db-shm",
|
"*.db", "*.db-wal", "*.db-shm", "*.db-journal",
|
||||||
"*.log",
|
"*.log",
|
||||||
".pytest_cache", ".pytest_cache/*",
|
".env", ".env.*", "*.env",
|
||||||
".mypy_cache", ".mypy_cache/*",
|
"*.key", "*.pem", "*.crt", "*.p12", "*.pfx",
|
||||||
".tox", ".tox/*",
|
|
||||||
"*.egg-info", "*.egg-info/*",
|
|
||||||
"decnet-state.json",
|
|
||||||
"master.log", "master.json",
|
|
||||||
"decnet.db*",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -41,39 +52,57 @@ def _is_excluded(rel: str, patterns: Iterable[str]) -> bool:
|
|||||||
for pat in patterns:
|
for pat in patterns:
|
||||||
if fnmatch.fnmatch(rel, pat):
|
if fnmatch.fnmatch(rel, pat):
|
||||||
return True
|
return True
|
||||||
# Also match the pattern against every leading subpath — this is
|
# Also match the pattern against every leading subpath so a caller can
|
||||||
# what catches nested `.venv/...` without forcing callers to spell
|
# exclude a whole subtree without spelling out every `**/` glob.
|
||||||
# out every `**/` glob.
|
|
||||||
for i in range(1, len(parts) + 1):
|
for i in range(1, len(parts) + 1):
|
||||||
if fnmatch.fnmatch("/".join(parts[:i]), pat):
|
if fnmatch.fnmatch("/".join(parts[:i]), pat):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _hygiene_skip(rel: str) -> bool:
|
||||||
|
"""True for build-churn / secret-shaped files anywhere in the tree."""
|
||||||
|
p = pathlib.PurePosixPath(rel)
|
||||||
|
if "__pycache__" in p.parts:
|
||||||
|
return True
|
||||||
|
return any(fnmatch.fnmatch(p.name, pat) for pat in _HYGIENE_PATTERNS)
|
||||||
|
|
||||||
|
|
||||||
def tar_working_tree(
|
def tar_working_tree(
|
||||||
root: pathlib.Path,
|
root: pathlib.Path,
|
||||||
extra_excludes: Optional[Iterable[str]] = None,
|
extra_excludes: Optional[Iterable[str]] = None,
|
||||||
|
includes: Optional[Iterable[str]] = None,
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
"""Return the gzipped tarball bytes of ``root``.
|
"""Return the gzipped tarball of the installable package under ``root``.
|
||||||
|
|
||||||
Entries are added with paths relative to ``root`` (no leading ``/``,
|
Only paths in ``includes`` (default :data:`DEFAULT_INCLUDES`) are walked;
|
||||||
no ``..``). The updater rejects unsafe paths on the receiving side.
|
``extra_excludes`` narrows further but can never widen the set. Entries are
|
||||||
|
added with paths relative to ``root`` (no leading ``/``, no ``..``). The
|
||||||
|
updater rejects unsafe paths on the receiving side.
|
||||||
"""
|
"""
|
||||||
patterns = list(DEFAULT_EXCLUDES) + list(extra_excludes or ())
|
include_roots = list(includes) if includes is not None else list(DEFAULT_INCLUDES)
|
||||||
|
extra = list(extra_excludes or ())
|
||||||
buf = io.BytesIO()
|
buf = io.BytesIO()
|
||||||
|
|
||||||
|
def _admit(path: pathlib.Path) -> None:
|
||||||
|
rel = path.relative_to(root).as_posix()
|
||||||
|
if _hygiene_skip(rel) or _is_excluded(rel, extra):
|
||||||
|
return
|
||||||
|
tar.add(path, arcname=rel, recursive=False)
|
||||||
|
|
||||||
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
|
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
|
||||||
for path in sorted(root.rglob("*")):
|
for entry in include_roots:
|
||||||
rel = path.relative_to(root).as_posix()
|
base = root / entry
|
||||||
if _is_excluded(rel, patterns):
|
if not base.exists() or base.is_symlink():
|
||||||
continue
|
continue
|
||||||
if path.is_symlink():
|
if base.is_file():
|
||||||
# Symlinks inside a repo tree are rare and often break
|
_admit(base)
|
||||||
# portability; skip them rather than ship dangling links.
|
|
||||||
continue
|
continue
|
||||||
if path.is_dir():
|
for path in sorted(base.rglob("*")):
|
||||||
continue
|
# Skip symlinks (dangling/portability) and dirs (added implicitly).
|
||||||
tar.add(path, arcname=rel, recursive=False)
|
if path.is_symlink() or path.is_dir():
|
||||||
|
continue
|
||||||
|
_admit(path)
|
||||||
|
|
||||||
return buf.getvalue()
|
return buf.getvalue()
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""tar_working_tree: exclude filter, tarball validity, git SHA detection."""
|
"""tar_working_tree: include allowlist, secret exclusion, tarball validity, git SHA."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import io
|
import io
|
||||||
@@ -14,49 +14,92 @@ def _tree_names(data: bytes) -> set[str]:
|
|||||||
return {m.name for m in tar.getmembers()}
|
return {m.name for m in tar.getmembers()}
|
||||||
|
|
||||||
|
|
||||||
def test_tar_excludes_default_patterns(tmp_path: pathlib.Path) -> None:
|
def _seed_tree(root: pathlib.Path) -> None:
|
||||||
(tmp_path / "decnet").mkdir()
|
"""A realistic master working tree: package + metadata + a pile of junk
|
||||||
(tmp_path / "decnet" / "keep.py").write_text("x = 1")
|
and secrets that must NOT ship."""
|
||||||
(tmp_path / ".venv").mkdir()
|
(root / "decnet").mkdir()
|
||||||
(tmp_path / ".venv" / "pyvenv.cfg").write_text("junk")
|
(root / "decnet" / "__init__.py").write_text("")
|
||||||
(tmp_path / ".git").mkdir()
|
(root / "decnet" / "agent.py").write_text("x = 1")
|
||||||
(tmp_path / ".git" / "HEAD").write_text("ref: refs/heads/main\n")
|
(root / "decnet" / "templates").mkdir()
|
||||||
(tmp_path / "decnet" / "__pycache__").mkdir()
|
(root / "decnet" / "templates" / "base.j2").write_text("data")
|
||||||
(tmp_path / "decnet" / "__pycache__" / "keep.cpython-311.pyc").write_text("bytecode")
|
(root / "decnet" / "__pycache__").mkdir()
|
||||||
(tmp_path / "wiki-checkout").mkdir()
|
(root / "decnet" / "__pycache__" / "agent.cpython-311.pyc").write_text("bytecode")
|
||||||
(tmp_path / "wiki-checkout" / "Home.md").write_text("# wiki")
|
(root / "pyproject.toml").write_text("[project]\nname='decnet'\n")
|
||||||
(tmp_path / "run.db").write_text("sqlite")
|
(root / "LICENSE").write_text("AGPL")
|
||||||
(tmp_path / "master.log").write_text("log")
|
(root / "README.md").write_text("# decnet")
|
||||||
|
# ---- secrets / junk that the OLD exclude-list would have leaked ----
|
||||||
data = tar_working_tree(tmp_path)
|
(root / ".env.local").write_text("DECNET_JWT_SECRET=topsecret")
|
||||||
names = _tree_names(data)
|
(root / ".env").write_text("X=Y")
|
||||||
assert "decnet/keep.py" in names
|
(root / "tls.key").write_text("-----BEGIN PRIVATE KEY-----")
|
||||||
assert all(".venv" not in n for n in names)
|
(root / "ca.pem").write_text("-----BEGIN CERTIFICATE-----")
|
||||||
assert all(".git" not in n for n in names)
|
(root / "decnet.db").write_text("sqlite")
|
||||||
assert all("__pycache__" not in n for n in names)
|
(root / "master.log").write_text("log")
|
||||||
assert all("wiki-checkout" not in n for n in names)
|
(root / "decnet_web").mkdir() # dashboard source — not a package
|
||||||
assert "run.db" not in names
|
(root / "decnet_web" / "app.tsx").write_text("ui")
|
||||||
assert "master.log" not in names
|
(root / "tests").mkdir()
|
||||||
|
(root / "tests" / "test_x.py").write_text("assert True")
|
||||||
|
|
||||||
|
|
||||||
def test_tar_accepts_extra_excludes(tmp_path: pathlib.Path) -> None:
|
def test_tar_ships_only_the_package_and_metadata(tmp_path: pathlib.Path) -> None:
|
||||||
(tmp_path / "a.py").write_text("x")
|
_seed_tree(tmp_path)
|
||||||
(tmp_path / "secret.env").write_text("TOKEN=abc")
|
names = _tree_names(tar_working_tree(tmp_path))
|
||||||
data = tar_working_tree(tmp_path, extra_excludes=["secret.env"])
|
assert "decnet/agent.py" in names
|
||||||
names = _tree_names(data)
|
assert "decnet/__init__.py" in names
|
||||||
assert "a.py" in names
|
assert "decnet/templates/base.j2" in names # package-data ships
|
||||||
assert "secret.env" not in names
|
assert "pyproject.toml" in names
|
||||||
|
assert "LICENSE" in names
|
||||||
|
assert "README.md" in names
|
||||||
|
# Nothing outside the allowlist:
|
||||||
|
assert not any(n.startswith("decnet_web") for n in names)
|
||||||
|
assert not any(n.startswith("tests") for n in names)
|
||||||
|
|
||||||
|
|
||||||
|
def test_tar_never_ships_secrets_or_db_or_churn(tmp_path: pathlib.Path) -> None:
|
||||||
|
# The whole point of the include-list: these existed at the root and the
|
||||||
|
# bundle must not carry a single one of them.
|
||||||
|
_seed_tree(tmp_path)
|
||||||
|
names = _tree_names(tar_working_tree(tmp_path))
|
||||||
|
for forbidden in (".env.local", ".env", "tls.key", "ca.pem", "decnet.db", "master.log"):
|
||||||
|
assert forbidden not in names, f"leaked {forbidden}"
|
||||||
|
assert not any("__pycache__" in n or n.endswith(".pyc") for n in names)
|
||||||
|
|
||||||
|
|
||||||
|
def test_secret_nested_under_package_is_still_dropped(tmp_path: pathlib.Path) -> None:
|
||||||
|
# Defensive hygiene: even a secret-shaped file *inside* decnet/ is excluded.
|
||||||
|
_seed_tree(tmp_path)
|
||||||
|
(tmp_path / "decnet" / "worker.key").write_text("oops")
|
||||||
|
(tmp_path / "decnet" / ".env.prod").write_text("SECRET=1")
|
||||||
|
names = _tree_names(tar_working_tree(tmp_path))
|
||||||
|
assert "decnet/worker.key" not in names
|
||||||
|
assert "decnet/.env.prod" not in names
|
||||||
|
assert "decnet/agent.py" in names # real source still present
|
||||||
|
|
||||||
|
|
||||||
|
def test_extra_excludes_narrows_within_allowlist(tmp_path: pathlib.Path) -> None:
|
||||||
|
_seed_tree(tmp_path)
|
||||||
|
names = _tree_names(tar_working_tree(tmp_path, extra_excludes=["decnet/agent.py"]))
|
||||||
|
assert "decnet/agent.py" not in names
|
||||||
|
assert "decnet/__init__.py" in names
|
||||||
|
|
||||||
|
|
||||||
|
def test_extra_excludes_cannot_widen_beyond_allowlist(tmp_path: pathlib.Path) -> None:
|
||||||
|
# Passing a non-allowlisted include via extra_excludes is meaningless —
|
||||||
|
# excludes can only remove, never add. decnet_web stays out.
|
||||||
|
_seed_tree(tmp_path)
|
||||||
|
names = _tree_names(tar_working_tree(tmp_path, extra_excludes=[]))
|
||||||
|
assert not any(n.startswith("decnet_web") for n in names)
|
||||||
|
|
||||||
|
|
||||||
def test_tar_skips_symlinks(tmp_path: pathlib.Path) -> None:
|
def test_tar_skips_symlinks(tmp_path: pathlib.Path) -> None:
|
||||||
(tmp_path / "real.txt").write_text("hi")
|
(tmp_path / "decnet").mkdir()
|
||||||
|
(tmp_path / "decnet" / "real.py").write_text("hi")
|
||||||
try:
|
try:
|
||||||
(tmp_path / "link.txt").symlink_to(tmp_path / "real.txt")
|
(tmp_path / "decnet" / "link.py").symlink_to(tmp_path / "decnet" / "real.py")
|
||||||
except (OSError, NotImplementedError):
|
except (OSError, NotImplementedError):
|
||||||
return # platform doesn't support symlinks — skip
|
return # platform doesn't support symlinks — skip
|
||||||
names = _tree_names(tar_working_tree(tmp_path))
|
names = _tree_names(tar_working_tree(tmp_path))
|
||||||
assert "real.txt" in names
|
assert "decnet/real.py" in names
|
||||||
assert "link.txt" not in names
|
assert "decnet/link.py" not in names
|
||||||
|
|
||||||
|
|
||||||
def test_detect_git_sha_from_ref(tmp_path: pathlib.Path) -> None:
|
def test_detect_git_sha_from_ref(tmp_path: pathlib.Path) -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user