Files
DECNET/decnet/web/router/artifacts/api_get_artifact.py
anti c50448995b feat(smtp): capture full messages + attachments to disk
SMTP template now writes each accepted DATA body as a .eml file into a
bind-mounted per-decky quarantine dir and emits a `message_stored` log
with sha256, size, decoded headers, and an attachment manifest
(filename + sha256 + size + content-type). Attachment hashing uses the
*decoded* payload so operators can match against VT / MalwareBazaar
directly. Body accumulator is capped at SMTP_MAX_BODY_BYTES (default
10 MB, matching the EHLO SIZE advert) so a streaming client can't OOM
the container.

The existing /api/v1/artifacts/{decky}/{stored_as} endpoint now takes
an optional ?service= query param (defaults to ssh for back-compat)
and can serve .eml files out of the smtp subdir. Forensic metadata
rides the normal log pipeline, same as SSH file_captured.
2026-04-22 22:17:50 -04:00

92 lines
3.4 KiB
Python

"""
Artifact download endpoint.
SSH deckies farm attacker file drops into a host-mounted quarantine:
/var/lib/decnet/artifacts/{decky}/ssh/{stored_as}
The capture event already flows through the normal log pipeline (one
RFC 5424 line per capture, see templates/ssh/emit_capture.py), so metadata
is served via /logs. This endpoint exists only to retrieve the raw bytes —
admin-gated because the payloads are attacker-controlled content.
"""
from __future__ import annotations
import os
import re
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import FileResponse
from decnet.telemetry import traced as _traced
from decnet.web.dependencies import require_admin
router = APIRouter()
# Override via env for tests; the prod path matches the bind mount declared in
# decnet/services/ssh.py and decnet/services/smtp.py.
ARTIFACTS_ROOT = Path(os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts"))
# decky names come from the deployer — lowercase alnum plus hyphens.
_DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
# Services that own an artifacts subdir. Kept explicit so a caller can't
# pivot into arbitrary subpaths via the query string.
_ALLOWED_SERVICES = {"ssh", "smtp"}
# stored_as is assembled by the capturing template as:
# ${ts}_${sha:0:12}_${base}
# where ts is ISO-8601 UTC (e.g. 2026-04-18T02:22:56Z), sha is 12 hex chars,
# and base is the original filename's basename. Keep the filename charset
# tight but allow common punctuation dropped files actually use.
_STORED_AS_RE = re.compile(
r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z_[a-f0-9]{12}_[A-Za-z0-9._-]{1,255}$"
)
def _resolve_artifact_path(decky: str, stored_as: str, service: str) -> Path:
"""Validate inputs, resolve the on-disk path, and confirm it stays inside
the artifacts root. Raises HTTPException(400) on any violation."""
if service not in _ALLOWED_SERVICES:
raise HTTPException(status_code=400, detail="invalid service")
if not _DECKY_RE.fullmatch(decky):
raise HTTPException(status_code=400, detail="invalid decky name")
if not _STORED_AS_RE.fullmatch(stored_as):
raise HTTPException(status_code=400, detail="invalid stored_as")
root = ARTIFACTS_ROOT.resolve()
candidate = (root / decky / service / stored_as).resolve()
# defence-in-depth: even though the regexes reject `..`, make sure a
# symlink or weird filesystem state can't escape the root.
if root not in candidate.parents and candidate != root:
raise HTTPException(status_code=400, detail="path escapes artifacts root")
return candidate
@router.get(
"/artifacts/{decky}/{stored_as}",
tags=["Artifacts"],
responses={
400: {"description": "Invalid decky, service, or stored_as parameter"},
401: {"description": "Could not validate credentials"},
403: {"description": "Admin access required"},
404: {"description": "Artifact not found"},
},
)
@_traced("api.get_artifact")
async def get_artifact(
decky: str,
stored_as: str,
service: str = Query("ssh", pattern=r"^[a-z]{1,16}$"),
admin: dict = Depends(require_admin),
) -> FileResponse:
path = _resolve_artifact_path(decky, stored_as, service)
if not path.is_file():
raise HTTPException(status_code=404, detail="artifact not found")
return FileResponse(
path=str(path),
media_type="application/octet-stream",
filename=stored_as,
)