Replaces LICENSE (GPLv3 -> AGPLv3) and prepends `SPDX-License-Identifier: AGPL-3.0-or-later` to every source file across decnet/, decnet_web/, tests/, scripts/, and tools/. Rationale: closes the GPLv3 ASP loophole so any party operating a modified DECNET as a network service must offer their modified source. Personal copyright (Samuel Paschuan) + inbound=outbound contributions make a future unilateral relicense infeasible. - LICENSE: full AGPL-3.0 text (gnu.org/licenses/agpl-3.0.txt) - COPYRIGHT: project copyright notice - tools/add_spdx_headers.py: idempotent header injector (shebang- and PEP 263-aware) Touches 1565 source files (.py, .ts, .tsx, .js, .jsx, .css, .sh). No behavior change; comments only.
91 lines
3.1 KiB
Python
91 lines
3.1 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Filesystem store for operator-uploaded canary blobs.
|
|
|
|
Blobs live under ``/var/lib/decnet/canary/blobs/<sha256>`` (override
|
|
via ``DECNET_CANARY_BLOB_DIR``) and are deduplicated by content hash.
|
|
The DB table :class:`decnet.web.db.models.CanaryBlob` mirrors
|
|
metadata; the bytes are read on demand at instrumentation time, so
|
|
the API process never holds large operator uploads in memory longer
|
|
than the request itself.
|
|
|
|
Refcount-aware deletion is enforced at the DB layer (see
|
|
:meth:`decnet.web.db.repository.BaseRepository.delete_canary_blob`);
|
|
this module only provides write/read/unlink primitives keyed by
|
|
sha256.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Tuple
|
|
|
|
|
|
def blob_dir() -> Path:
|
|
"""Return the on-disk root for canary blobs.
|
|
|
|
Honors ``DECNET_CANARY_BLOB_DIR`` so tests can point at a tmp
|
|
path. The directory is created lazily on first write.
|
|
"""
|
|
raw = os.environ.get("DECNET_CANARY_BLOB_DIR", "/var/lib/decnet/canary/blobs")
|
|
return Path(raw)
|
|
|
|
|
|
def _path_for(sha256: str) -> Path:
|
|
# Two-level fan-out (``ab/cd/abcd...``) keeps any one directory
|
|
# from accumulating thousands of entries on busy fleets. Same
|
|
# shape as Git's loose-object store.
|
|
if len(sha256) < 4:
|
|
raise ValueError("sha256 must be at least 4 chars")
|
|
root = blob_dir()
|
|
return root / sha256[:2] / sha256[2:4] / sha256
|
|
|
|
|
|
def write_blob(content: bytes) -> Tuple[str, Path, int]:
|
|
"""Persist ``content`` under its sha256 path.
|
|
|
|
Idempotent: if the target file already exists with the same
|
|
bytes, no rewrite happens. Returns ``(sha256, path,
|
|
size_bytes)``.
|
|
"""
|
|
sha = hashlib.sha256(content).hexdigest()
|
|
target = _path_for(sha)
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
if not target.exists():
|
|
# Atomic-ish: write to a temp sibling and rename. Avoids the
|
|
# half-written-file race a concurrent reader would otherwise
|
|
# see if we wrote in place.
|
|
tmp = target.with_suffix(target.suffix + ".part")
|
|
tmp.write_bytes(content)
|
|
os.replace(tmp, target)
|
|
return sha, target, len(content)
|
|
|
|
|
|
def read_blob(sha256: str) -> bytes:
|
|
"""Read the bytes for a stored blob.
|
|
|
|
Raises :class:`FileNotFoundError` when the on-disk row was unlinked
|
|
out of band (operator pruned ``/var/lib/decnet`` by hand) — the
|
|
caller (instrumenter dispatch) surfaces it as a 410-ish error so
|
|
the operator can re-upload.
|
|
"""
|
|
return _path_for(sha256).read_bytes()
|
|
|
|
|
|
def unlink_blob(sha256: str) -> bool:
|
|
"""Delete the on-disk bytes for ``sha256``.
|
|
|
|
Returns True if a file was removed, False if it was already gone.
|
|
The DB row deletion happens in
|
|
:meth:`SQLModelRepository.delete_canary_blob`; this function is
|
|
a best-effort companion called *after* the DB delete commits so
|
|
a crash between them leaves a recoverable orphan, never a
|
|
dangling DB reference.
|
|
"""
|
|
target = _path_for(sha256)
|
|
try:
|
|
target.unlink()
|
|
except FileNotFoundError:
|
|
return False
|
|
return True
|