Files
DECNET/decnet/swarm/tar_tree.py
anti f2b3393669 chore: relicense to AGPL-3.0-or-later and add SPDX headers
Replaces LICENSE (GPLv3 -> AGPLv3) and prepends
`SPDX-License-Identifier: AGPL-3.0-or-later` to every source file
across decnet/, decnet_web/, tests/, scripts/, and tools/.

Rationale: closes the GPLv3 ASP loophole so any party operating a
modified DECNET as a network service must offer their modified
source. Personal copyright (Samuel Paschuan) + inbound=outbound
contributions make a future unilateral relicense infeasible.

- LICENSE: full AGPL-3.0 text (gnu.org/licenses/agpl-3.0.txt)
- COPYRIGHT: project copyright notice
- tools/add_spdx_headers.py: idempotent header injector
  (shebang- and PEP 263-aware)

Touches 1565 source files (.py, .ts, .tsx, .js, .jsx, .css, .sh).
No behavior change; comments only.
2026-05-22 21:04:16 -04:00

99 lines
3.1 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
"""Build a gzipped tarball of the master's working tree for pushing to workers.
Always excludes the obvious large / secret / churn paths: ``.venv/``,
``__pycache__/``, ``.git/``, ``wiki-checkout/``, ``*.db*``, ``*.log``. The
caller can supply additional exclude globs.
Deliberately does NOT invoke git — the tree is what the operator has on
disk (staged + unstaged + untracked). That's the whole point; the scp
workflow we're replacing also shipped the live tree.
"""
from __future__ import annotations
import fnmatch
import io
import pathlib
import tarfile
from typing import Iterable, Optional
DEFAULT_EXCLUDES = (
".venv", ".venv/*",
"**/.venv/*",
"__pycache__", "**/__pycache__", "**/__pycache__/*",
".git", ".git/*",
"wiki-checkout", "wiki-checkout/*",
"*.pyc", "*.pyo",
"*.db", "*.db-wal", "*.db-shm",
"*.log",
".pytest_cache", ".pytest_cache/*",
".mypy_cache", ".mypy_cache/*",
".tox", ".tox/*",
"*.egg-info", "*.egg-info/*",
"decnet-state.json",
"master.log", "master.json",
"decnet.db*",
)
def _is_excluded(rel: str, patterns: Iterable[str]) -> bool:
parts = pathlib.PurePosixPath(rel).parts
for pat in patterns:
if fnmatch.fnmatch(rel, pat):
return True
# Also match the pattern against every leading subpath — this is
# what catches nested `.venv/...` without forcing callers to spell
# out every `**/` glob.
for i in range(1, len(parts) + 1):
if fnmatch.fnmatch("/".join(parts[:i]), pat):
return True
return False
def tar_working_tree(
root: pathlib.Path,
extra_excludes: Optional[Iterable[str]] = None,
) -> bytes:
"""Return the gzipped tarball bytes of ``root``.
Entries are added with paths relative to ``root`` (no leading ``/``,
no ``..``). The updater rejects unsafe paths on the receiving side.
"""
patterns = list(DEFAULT_EXCLUDES) + list(extra_excludes or ())
buf = io.BytesIO()
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
for path in sorted(root.rglob("*")):
rel = path.relative_to(root).as_posix()
if _is_excluded(rel, patterns):
continue
if path.is_symlink():
# Symlinks inside a repo tree are rare and often break
# portability; skip them rather than ship dangling links.
continue
if path.is_dir():
continue
tar.add(path, arcname=rel, recursive=False)
return buf.getvalue()
def detect_git_sha(root: pathlib.Path) -> str:
"""Best-effort ``HEAD`` sha. Returns ``""`` if not a git repo."""
head = root / ".git" / "HEAD"
if not head.is_file():
return ""
try:
ref = head.read_text().strip()
except OSError:
return ""
if ref.startswith("ref: "):
ref_path = root / ".git" / ref[5:]
if ref_path.is_file():
try:
return ref_path.read_text().strip()
except OSError:
return ""
return ""
return ref