tar_working_tree walked the whole working tree minus a blocklist that omitted .env.local, *.key, *.pem, *.crt — so the JWT secret, Fernet key, admin password, DB creds and TLS private keys fanned out to every worker on each update push. Invert to an allowlist (DEFAULT_INCLUDES = pyproject.toml + LICENSE + README.md + decnet/), the exact surface 'pip install .' needs; decnet/ carries its own package-data. A defensive _HYGIENE_PATTERNS layer drops secret-/churn-shaped files even if nested under decnet/. extra_excludes can still narrow but can no longer widen past the allowlist. Verified against the live repo: the bundle carries the package + metadata and zero secret/db/log/pyc files, and pip-installs clean from the extracted tree.
128 lines
4.6 KiB
Python
128 lines
4.6 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Build a gzipped tarball of the installable DECNET package for workers.
|
|
|
|
The tarball is extracted and ``pip install``-ed on each worker, so it ships
|
|
*only* what that build needs — enumerated by an INCLUDE allowlist, never a
|
|
blocklist. This is the trust-boundary rule: a bundle crossing to another host
|
|
enumerates what it carries, so a stray ``.env.local``, TLS private key, SQLite
|
|
DB, or the operator's whole working tree can never be swept in by an exclude
|
|
list that simply forgot a pattern.
|
|
|
|
``DEFAULT_INCLUDES`` is the package surface (``decnet/`` + packaging metadata);
|
|
``_HYGIENE_PATTERNS`` is a defensive second layer that drops secret-/churn-
|
|
shaped files even if one somehow lives under an included directory. Callers may
|
|
pass ``extra_excludes`` to narrow further, but cannot add anything outside the
|
|
allowlist.
|
|
|
|
Deliberately does NOT invoke git — the included dirs are taken from disk as-is.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import fnmatch
|
|
import io
|
|
import pathlib
|
|
import tarfile
|
|
from typing import Iterable, Optional
|
|
|
|
# The ONLY top-level paths shipped to a worker: the importable package plus the
|
|
# metadata `pip install .` needs (setuptools build-meta + license-files=LICENSE).
|
|
# decnet/ carries its own package-data (templates/, canary/*). Everything else
|
|
# in the working tree — secrets, DBs, logs, the dashboard source, tests, build
|
|
# artifacts — is excluded by construction.
|
|
DEFAULT_INCLUDES = (
|
|
"pyproject.toml",
|
|
"LICENSE",
|
|
"README.md",
|
|
"decnet",
|
|
)
|
|
|
|
# Defensive hygiene applied WITHIN an included path: never ship build churn or
|
|
# anything secret-shaped, matched on the basename so it catches any nesting.
|
|
_HYGIENE_PATTERNS = (
|
|
"*.pyc", "*.pyo",
|
|
"*.db", "*.db-wal", "*.db-shm", "*.db-journal",
|
|
"*.log",
|
|
".env", ".env.*", "*.env",
|
|
"*.key", "*.pem", "*.crt", "*.p12", "*.pfx",
|
|
)
|
|
|
|
|
|
def _is_excluded(rel: str, patterns: Iterable[str]) -> bool:
|
|
parts = pathlib.PurePosixPath(rel).parts
|
|
for pat in patterns:
|
|
if fnmatch.fnmatch(rel, pat):
|
|
return True
|
|
# Also match the pattern against every leading subpath so a caller can
|
|
# exclude a whole subtree without spelling out every `**/` glob.
|
|
for i in range(1, len(parts) + 1):
|
|
if fnmatch.fnmatch("/".join(parts[:i]), pat):
|
|
return True
|
|
return False
|
|
|
|
|
|
def _hygiene_skip(rel: str) -> bool:
|
|
"""True for build-churn / secret-shaped files anywhere in the tree."""
|
|
p = pathlib.PurePosixPath(rel)
|
|
if "__pycache__" in p.parts:
|
|
return True
|
|
return any(fnmatch.fnmatch(p.name, pat) for pat in _HYGIENE_PATTERNS)
|
|
|
|
|
|
def tar_working_tree(
|
|
root: pathlib.Path,
|
|
extra_excludes: Optional[Iterable[str]] = None,
|
|
includes: Optional[Iterable[str]] = None,
|
|
) -> bytes:
|
|
"""Return the gzipped tarball of the installable package under ``root``.
|
|
|
|
Only paths in ``includes`` (default :data:`DEFAULT_INCLUDES`) are walked;
|
|
``extra_excludes`` narrows further but can never widen the set. Entries are
|
|
added with paths relative to ``root`` (no leading ``/``, no ``..``). The
|
|
updater rejects unsafe paths on the receiving side.
|
|
"""
|
|
include_roots = list(includes) if includes is not None else list(DEFAULT_INCLUDES)
|
|
extra = list(extra_excludes or ())
|
|
buf = io.BytesIO()
|
|
|
|
def _admit(path: pathlib.Path) -> None:
|
|
rel = path.relative_to(root).as_posix()
|
|
if _hygiene_skip(rel) or _is_excluded(rel, extra):
|
|
return
|
|
tar.add(path, arcname=rel, recursive=False)
|
|
|
|
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
|
|
for entry in include_roots:
|
|
base = root / entry
|
|
if not base.exists() or base.is_symlink():
|
|
continue
|
|
if base.is_file():
|
|
_admit(base)
|
|
continue
|
|
for path in sorted(base.rglob("*")):
|
|
# Skip symlinks (dangling/portability) and dirs (added implicitly).
|
|
if path.is_symlink() or path.is_dir():
|
|
continue
|
|
_admit(path)
|
|
|
|
return buf.getvalue()
|
|
|
|
|
|
def detect_git_sha(root: pathlib.Path) -> str:
|
|
"""Best-effort ``HEAD`` sha. Returns ``""`` if not a git repo."""
|
|
head = root / ".git" / "HEAD"
|
|
if not head.is_file():
|
|
return ""
|
|
try:
|
|
ref = head.read_text().strip()
|
|
except OSError:
|
|
return ""
|
|
if ref.startswith("ref: "):
|
|
ref_path = root / ".git" / ref[5:]
|
|
if ref_path.is_file():
|
|
try:
|
|
return ref_path.read_text().strip()
|
|
except OSError:
|
|
return ""
|
|
return ""
|
|
return ref
|