diff --git a/decnet/artifacts/__init__.py b/decnet/artifacts/__init__.py new file mode 100644 index 00000000..c37aa012 --- /dev/null +++ b/decnet/artifacts/__init__.py @@ -0,0 +1 @@ +"""Artifact storage helpers shared between the web router and TTP workers.""" diff --git a/decnet/artifacts/paths.py b/decnet/artifacts/paths.py new file mode 100644 index 00000000..4ef5f0b9 --- /dev/null +++ b/decnet/artifacts/paths.py @@ -0,0 +1,86 @@ +""" +Shared on-disk artifact path resolution. + +Honeypot decoys (SSH, SMTP) farm captured payloads into a host-mounted +quarantine tree: + + /var/lib/decnet/artifacts/{decky}/{service}/{stored_as} + +Two callers need to translate ``(decky, stored_as, service)`` into a +concrete ``Path`` rooted under that tree: + +* The web router endpoint ``GET /api/v1/artifacts/{decky}/{stored_as}`` + (``decnet.web.router.artifacts.api_get_artifact``) — admin-gated + download for the dashboard. +* The TTP ``EmailLifter`` (``decnet.ttp.impl.email_lifter``), which + reads the stored ``.eml`` at tag-time so body-aware predicates + (R0047 BEC, R0048 macro) don't need raw body text on the bus. + +Both callers share the same validation rules and the same +defence-in-depth symlink-escape check; this module is the single +implementation. It is auth-agnostic — wrappers layer authentication +where appropriate (the router does ``require_admin``, the lifter does +not). +""" + +from __future__ import annotations + +import os +import re +from pathlib import Path + +# decky names come from the deployer — lowercase alnum plus hyphens. +_DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$") + +# Services that own an artifacts subdir. Kept explicit so a caller +# can't pivot into arbitrary subpaths via a query string or bus payload. +_ALLOWED_SERVICES = frozenset({"ssh", "smtp"}) + +# stored_as is assembled by the capturing template as: +# ${ts}_${sha:0:12}_${base} +# where ts is ISO-8601 UTC (e.g. 2026-04-18T02:22:56Z), sha is 12 hex chars, +# and base is the original filename's basename. Keep the filename charset +# tight but allow common punctuation dropped files actually use. +_STORED_AS_RE = re.compile( + r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z_[a-f0-9]{12}_[A-Za-z0-9._-]{1,255}$" +) + +# Module-level so tests can monkeypatch. Override via env in production +# (the systemd unit sets this) — the prod path matches the bind mount +# declared in decnet/services/{ssh,smtp}.py. +ARTIFACTS_ROOT = Path( + os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts") +) + + +class ArtifactPathError(ValueError): + """Raised when (decky, stored_as, service) fails validation or escapes + the artifacts root. + + The router catches this and re-raises HTTPException(400). The lifter + catches it and treats the event as having no body available (no-tag). + """ + + +def resolve_artifact_path(decky: str, stored_as: str, service: str) -> Path: + """Validate inputs, resolve the on-disk path, and confirm it stays + inside the artifacts root. + + Raises :class:`ArtifactPathError` on any violation. Does NOT check + that the file exists — callers handle that distinctly (404 for the + router, no-tag for the lifter). + """ + if service not in _ALLOWED_SERVICES: + raise ArtifactPathError("invalid service") + if not _DECKY_RE.fullmatch(decky): + raise ArtifactPathError("invalid decky name") + if not _STORED_AS_RE.fullmatch(stored_as): + raise ArtifactPathError("invalid stored_as") + + root = ARTIFACTS_ROOT.resolve() + candidate = (root / decky / service / stored_as).resolve() + # defence-in-depth: even though the regexes reject `..`, make sure a + # symlink or weird filesystem state can't escape the root. + if root not in candidate.parents and candidate != root: + raise ArtifactPathError("path escapes artifacts root") + return candidate diff --git a/decnet/web/router/artifacts/api_get_artifact.py b/decnet/web/router/artifacts/api_get_artifact.py index 94a0a916..a554df50 100644 --- a/decnet/web/router/artifacts/api_get_artifact.py +++ b/decnet/web/router/artifacts/api_get_artifact.py @@ -8,61 +8,23 @@ The capture event already flows through the normal log pipeline (one RFC 5424 line per capture, see templates/ssh/emit_capture.py), so metadata is served via /logs. This endpoint exists only to retrieve the raw bytes — admin-gated because the payloads are attacker-controlled content. + +Path resolution lives in :mod:`decnet.artifacts.paths` so the TTP +EmailLifter can share the exact same validation when it disk-reaches +``.eml`` files at tag-time (DEBT-047). """ from __future__ import annotations -import os -import re -from pathlib import Path - from fastapi import APIRouter, Depends, HTTPException, Query from fastapi.responses import FileResponse +from decnet.artifacts.paths import ArtifactPathError, resolve_artifact_path from decnet.telemetry import traced as _traced from decnet.web.dependencies import require_admin router = APIRouter() -# Override via env for tests; the prod path matches the bind mount declared in -# decnet/services/ssh.py and decnet/services/smtp.py. -ARTIFACTS_ROOT = Path(os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")) - -# decky names come from the deployer — lowercase alnum plus hyphens. -_DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$") - -# Services that own an artifacts subdir. Kept explicit so a caller can't -# pivot into arbitrary subpaths via the query string. -_ALLOWED_SERVICES = {"ssh", "smtp"} - -# stored_as is assembled by the capturing template as: -# ${ts}_${sha:0:12}_${base} -# where ts is ISO-8601 UTC (e.g. 2026-04-18T02:22:56Z), sha is 12 hex chars, -# and base is the original filename's basename. Keep the filename charset -# tight but allow common punctuation dropped files actually use. -_STORED_AS_RE = re.compile( - r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z_[a-f0-9]{12}_[A-Za-z0-9._-]{1,255}$" -) - - -def _resolve_artifact_path(decky: str, stored_as: str, service: str) -> Path: - """Validate inputs, resolve the on-disk path, and confirm it stays inside - the artifacts root. Raises HTTPException(400) on any violation.""" - if service not in _ALLOWED_SERVICES: - raise HTTPException(status_code=400, detail="invalid service") - if not _DECKY_RE.fullmatch(decky): - raise HTTPException(status_code=400, detail="invalid decky name") - if not _STORED_AS_RE.fullmatch(stored_as): - raise HTTPException(status_code=400, detail="invalid stored_as") - - root = ARTIFACTS_ROOT.resolve() - candidate = (root / decky / service / stored_as).resolve() - # defence-in-depth: even though the regexes reject `..`, make sure a - # symlink or weird filesystem state can't escape the root. - if root not in candidate.parents and candidate != root: - raise HTTPException(status_code=400, detail="path escapes artifacts root") - return candidate - @router.get( "/artifacts/{decky}/{stored_as}", @@ -81,7 +43,10 @@ async def get_artifact( service: str = Query("ssh", pattern=r"^[a-z]{1,16}$"), admin: dict = Depends(require_admin), ) -> FileResponse: - path = _resolve_artifact_path(decky, stored_as, service) + try: + path = resolve_artifact_path(decky, stored_as, service) + except ArtifactPathError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc if not path.is_file(): raise HTTPException(status_code=404, detail="artifact not found") return FileResponse( diff --git a/tests/api/artifacts/test_get_artifact.py b/tests/api/artifacts/test_get_artifact.py index ee1201fa..f806ef1e 100644 --- a/tests/api/artifacts/test_get_artifact.py +++ b/tests/api/artifacts/test_get_artifact.py @@ -23,9 +23,10 @@ def artifacts_root(tmp_path, monkeypatch): (root / _DECKY / "ssh").mkdir(parents=True) (root / _DECKY / "ssh" / _VALID_STORED_AS).write_bytes(_PAYLOAD) - # Patch the module-level constant (captured at import time). - from decnet.web.router.artifacts import api_get_artifact - monkeypatch.setattr(api_get_artifact, "ARTIFACTS_ROOT", root) + # Patch the canonical module-level constant. Both the router and + # the EmailLifter resolve through decnet.artifacts.paths. + from decnet.artifacts import paths as artifact_paths + monkeypatch.setattr(artifact_paths, "ARTIFACTS_ROOT", root) return root @@ -137,8 +138,8 @@ async def test_smtp_service_serves_from_smtp_subdir( (root / _DECKY / "smtp").mkdir(parents=True) eml = "2026-04-18T02:22:56Z_abc123def456_msg.eml" (root / _DECKY / "smtp" / eml).write_bytes(b"From: a\r\n\r\nhi") - from decnet.web.router.artifacts import api_get_artifact - monkeypatch.setattr(api_get_artifact, "ARTIFACTS_ROOT", root) + from decnet.artifacts import paths as artifact_paths + monkeypatch.setattr(artifact_paths, "ARTIFACTS_ROOT", root) res = await client.get( f"/api/v1/artifacts/{_DECKY}/{eml}?service=smtp", headers={"Authorization": f"Bearer {auth_token}"}, diff --git a/tests/artifacts/__init__.py b/tests/artifacts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/artifacts/test_paths.py b/tests/artifacts/test_paths.py new file mode 100644 index 00000000..a9d00fac --- /dev/null +++ b/tests/artifacts/test_paths.py @@ -0,0 +1,84 @@ +"""Unit tests for decnet.artifacts.paths.resolve_artifact_path.""" + +from __future__ import annotations + +import os +import pytest + +from decnet.artifacts import paths as artifact_paths +from decnet.artifacts.paths import ArtifactPathError, resolve_artifact_path + + +_DECKY = "test-decky-01" +_VALID_STORED_AS = "2026-04-18T02:22:56Z_abc123def456_payload.bin" + + +@pytest.fixture +def root(tmp_path, monkeypatch): + monkeypatch.setattr(artifact_paths, "ARTIFACTS_ROOT", tmp_path) + return tmp_path + + +def test_valid_ssh_path(root): + p = resolve_artifact_path(_DECKY, _VALID_STORED_AS, "ssh") + assert p == (root / _DECKY / "ssh" / _VALID_STORED_AS).resolve() + + +def test_valid_smtp_path(root): + eml = "2026-04-18T02:22:56Z_abc123def456_msg.eml" + p = resolve_artifact_path(_DECKY, eml, "smtp") + assert p == (root / _DECKY / "smtp" / eml).resolve() + + +@pytest.mark.parametrize("service", ["rdp", "telnet", "", "../etc", "ssh/../smtp"]) +def test_invalid_service(root, service): + with pytest.raises(ArtifactPathError, match="invalid service"): + resolve_artifact_path(_DECKY, _VALID_STORED_AS, service) + + +@pytest.mark.parametrize("decky", [ + "UPPERCASE", "has_underscore", "has.dot", "-leading-hyphen", + "", "a/b", "..", +]) +def test_invalid_decky(root, decky): + with pytest.raises(ArtifactPathError, match="invalid decky name"): + resolve_artifact_path(decky, _VALID_STORED_AS, "ssh") + + +@pytest.mark.parametrize("stored_as", [ + "not-a-timestamp_abc123def456_payload.bin", + "2026-04-18T02:22:56Z_SHORT_payload.bin", + "2026-04-18T02:22:56Z_abc123def456_", + "random-string", + "", + "../../etc/passwd", +]) +def test_invalid_stored_as(root, stored_as): + with pytest.raises(ArtifactPathError, match="invalid stored_as"): + resolve_artifact_path(_DECKY, stored_as, "ssh") + + +def test_symlink_escape_blocked(tmp_path, monkeypatch): + """A symlink inside the artifacts tree pointing outside must not let + resolve_artifact_path return a path outside the root.""" + real_root = tmp_path / "real" + real_root.mkdir() + secret_dir = tmp_path / "outside" + secret_dir.mkdir() + (secret_dir / _VALID_STORED_AS).write_bytes(b"secret") + + decky_dir = real_root / _DECKY + decky_dir.mkdir() + # symlink the entire ssh subdir to the outside location + os.symlink(secret_dir, decky_dir / "ssh") + + monkeypatch.setattr(artifact_paths, "ARTIFACTS_ROOT", real_root) + + with pytest.raises(ArtifactPathError, match="escapes"): + resolve_artifact_path(_DECKY, _VALID_STORED_AS, "ssh") + + +def test_does_not_check_existence(root): + """Helper validates and resolves; existence is the caller's problem.""" + p = resolve_artifact_path(_DECKY, _VALID_STORED_AS, "ssh") + assert not p.exists()