feat(deckies): generic file drops on fleet + MazeNET deckies

Extracts the docker-exec-with-base64-stdin pattern out of canary/planter
and orchestrator/drivers/ssh into a shared decnet.decky_io package.
Both consumers now delegate; the canary planter test still proves the
contract end-to-end.

Adds POST/DELETE /api/v1/deckies/files for arbitrary file drops.
Container resolution is shared with the canary path: topology_id absent
means fleet (<name>-ssh), present routes through resolve_decky_container
which picks <name>-ssh when the topology decky exposes ssh, else the
topology base container decnet_t_<id8>_<name>.

Path validation rejects relative paths and '..' traversal at the request
model layer.  Bad base64 → 400; unknown topology → 404; decky not in
topology → 422; docker exec failure → 409.
This commit is contained in:
2026-04-28 22:43:34 -04:00
parent 3fe999d706
commit 0bc4b05c73
19 changed files with 1047 additions and 176 deletions

View File

@@ -63,6 +63,10 @@ from .deploy import (
MutateIntervalRequest,
PurgeResponse,
)
from .decky import (
DeckyFileDeleteRequest,
DeckyFileDropRequest,
)
from .fleet import (
LOCAL_HOST_SENTINEL,
FleetDecky,
@@ -222,6 +226,8 @@ __all__ = [
"PurgeResponse",
# fleet
"LOCAL_HOST_SENTINEL",
"DeckyFileDeleteRequest",
"DeckyFileDropRequest",
"FleetDecky",
# health
"ComponentHealth",

View File

@@ -0,0 +1,61 @@
"""DTOs for cross-cutting decky operations (file drops, etc.).
These don't bind to a single table — fleet deckies and MazeNET
(topology) deckies share the request shape, with ``topology_id``
discriminating. Following ``feedback_models_single_source`` we put
the request/response shapes alongside the rest of the API contracts
under ``decnet.web.db.models``.
"""
from __future__ import annotations
from typing import Optional
from pydantic import BaseModel, Field as PydanticField, field_validator
class DeckyFileDropRequest(BaseModel):
"""Drop arbitrary bytes at an absolute path inside a decky container.
``content_b64`` is the base64-encoded payload. Binary-safe.
``mode`` defaults to ``0o644`` (octal int). ``mtime_offset`` is a
seconds offset from now applied via ``touch -d`` so realistic-aged
files don't all stamp at wall-clock-now.
"""
decky_name: str = PydanticField(..., min_length=1)
topology_id: Optional[str] = None
path: str = PydanticField(..., min_length=1)
content_b64: str
mode: int = 0o644
mtime_offset: int = 0
@field_validator("path")
@classmethod
def _abs_no_traversal(cls, v: str) -> str:
if not v.startswith("/"):
raise ValueError("path must be absolute (start with '/')")
# Defense in depth: even though we run as root inside the
# container, ``..`` segments make the on-disk location depend
# on the cwd at exec-time and surprise both operators and the
# auditor reading the placement_path field later.
for seg in v.split("/"):
if seg == "..":
raise ValueError("path must not contain '..' segments")
return v
class DeckyFileDeleteRequest(BaseModel):
"""Best-effort ``rm -f`` of an absolute path inside a decky container."""
decky_name: str = PydanticField(..., min_length=1)
topology_id: Optional[str] = None
path: str = PydanticField(..., min_length=1)
@field_validator("path")
@classmethod
def _abs_no_traversal(cls, v: str) -> str:
if not v.startswith("/"):
raise ValueError("path must be absolute (start with '/')")
for seg in v.split("/"):
if seg == "..":
raise ValueError("path must not contain '..' segments")
return v

View File

@@ -50,6 +50,7 @@ from .swarm_mgmt import swarm_mgmt_router
from .system import system_router
from .topology import topology_router
from .canary import canary_router
from .deckies import deckies_router
from .webhooks import webhooks_router
api_router = APIRouter(
@@ -156,6 +157,7 @@ api_router.include_router(topology_router)
# Canary tokens — operator-facing CRUD (worker hosts the
# attacker-facing surface separately via `decnet canary`).
api_router.include_router(canary_router)
api_router.include_router(deckies_router)
# External webhook subscriptions (SIEM/SOAR egress)
api_router.include_router(webhooks_router)

View File

@@ -66,26 +66,20 @@ async def _resolve_topology_target(
) -> str:
"""Validate (topology_id, decky_name) and return the docker container.
404 if the topology doesn't exist; 422 if the named decky isn't in it.
Hoisted into ``decky_io/resolve.py`` in workstream 2 so the file-drop
endpoint can share it; for now it's local to the canary router.
Delegates to :func:`decnet.decky_io.resolve_decky_container` and
translates its ``LookupError`` into HTTP 404/422 — 404 when the
topology itself is missing, 422 when the named decky isn't in it.
"""
from decnet.topology.persistence import hydrate
hydrated = await hydrate(repo, topology_id)
if hydrated is None:
raise HTTPException(status_code=404, detail="topology not found")
for decky in hydrated["deckies"]:
cfg = decky.get("decky_config") or {}
name = cfg.get("name") or decky.get("name")
if name == decky_name:
services = decky.get("services") or []
return planter.resolve_topology_container(
topology_id, decky_name, services,
)
raise HTTPException(
status_code=422,
detail=f"decky {decky_name!r} is not in topology {topology_id!r}",
)
from decnet.decky_io import resolve_decky_container
try:
return await resolve_decky_container(
repo, decky_name, topology_id=topology_id,
)
except LookupError as exc:
msg = str(exc)
if "topology" in msg and "not found" in msg:
raise HTTPException(status_code=404, detail=msg) from exc
raise HTTPException(status_code=422, detail=msg) from exc
def _trigger_row_to_response(row: dict[str, Any]) -> CanaryTriggerResponse:

View File

@@ -0,0 +1,21 @@
"""Cross-cutting decky operation endpoints.
These routes apply to both fleet and MazeNET (topology) deckies; the
MazeNET case is selected by passing ``topology_id`` in the request body.
Compare with:
* :mod:`decnet.web.router.fleet` — fleet-only CRUD (deploy, mutate,
list).
* :mod:`decnet.web.router.topology` — topology-only CRUD.
"""
from __future__ import annotations
from fastapi import APIRouter
from .api_file_drop import router as file_drop_router
deckies_router = APIRouter()
deckies_router.include_router(file_drop_router)
__all__ = ["deckies_router"]

View File

@@ -0,0 +1,126 @@
"""POST/DELETE /api/v1/deckies/files — generic file drops on deckies.
Wraps :func:`decnet.decky_io.write_file_to_container` /
:func:`decnet.decky_io.delete_file_from_container` so admins can drop
arbitrary bytes at arbitrary paths inside a running decky container —
fleet OR MazeNET — without going through the canary surface.
Auth: ``require_admin`` everywhere (matches every other write op on
deckies; see :mod:`decnet.web.router.fleet.api_mutate_decky`).
Container resolution mirrors the canary path: ``topology_id`` absent
means fleet (``<decky>-ssh``), present routes through
:func:`decnet.decky_io.resolve_decky_container` for the MazeNET
``<decky>-ssh`` / ``decnet_t_<id8>_<decky>`` distinction.
"""
from __future__ import annotations
import base64
from datetime import datetime, timedelta, timezone
from fastapi import APIRouter, Depends, HTTPException
from decnet.decky_io import (
delete_file_from_container,
resolve_decky_container,
write_file_to_container,
)
from decnet.logging import get_logger
from decnet.web.db.models import (
DeckyFileDeleteRequest,
DeckyFileDropRequest,
MessageResponse,
)
from decnet.web.dependencies import repo, require_admin
log = get_logger("api.deckies.files")
router = APIRouter(prefix="/deckies/files", tags=["Deckies"])
async def _resolve_container_or_4xx(
decky_name: str, topology_id: str | None,
) -> str:
"""Resolve to a docker container, mapping LookupError → 404/422."""
try:
return await resolve_decky_container(
repo, decky_name, topology_id=topology_id,
)
except LookupError as exc:
msg = str(exc)
if topology_id and "topology" in msg and "not found" in msg:
raise HTTPException(status_code=404, detail=msg) from exc
raise HTTPException(status_code=422, detail=msg) from exc
@router.post(
"",
response_model=MessageResponse,
status_code=201,
responses={
400: {"description": "Invalid request body (bad base64, etc.)"},
401: {"description": "Could not validate credentials"},
403: {"description": "Insufficient permissions"},
404: {"description": "Topology not found"},
409: {"description": "docker exec failed (container down or path unwritable)"},
422: {"description": "Path validation failed or decky not in topology"},
},
)
async def api_drop_file(
req: DeckyFileDropRequest,
admin: dict = Depends(require_admin),
) -> MessageResponse:
try:
content = base64.b64decode(req.content_b64, validate=True)
except (ValueError, TypeError) as exc:
raise HTTPException(
status_code=400, detail=f"content_b64 is not valid base64: {exc}",
) from exc
container = await _resolve_container_or_4xx(req.decky_name, req.topology_id)
mtime = (
datetime.now(timezone.utc) + timedelta(seconds=req.mtime_offset)
if req.mtime_offset
else None
)
success, error = await write_file_to_container(
container, req.path, content, mode=req.mode, mtime=mtime,
)
if not success:
raise HTTPException(status_code=409, detail=error or "docker exec failed")
log.info(
"decky.file.drop decky=%s topology=%s container=%s path=%s bytes=%d by=%s",
req.decky_name, req.topology_id, container, req.path,
len(content), admin.get("uuid", "unknown"),
)
return MessageResponse(message="ok")
@router.delete(
"",
response_model=MessageResponse,
responses={
401: {"description": "Could not validate credentials"},
403: {"description": "Insufficient permissions"},
404: {"description": "Topology not found"},
422: {"description": "Path validation failed or decky not in topology"},
},
)
async def api_delete_file(
req: DeckyFileDeleteRequest,
admin: dict = Depends(require_admin),
) -> MessageResponse:
container = await _resolve_container_or_4xx(req.decky_name, req.topology_id)
success, error = await delete_file_from_container(container, req.path)
# ``rm -f`` returns 0 even when the file is already gone, so a
# False here means the docker exec itself failed. Don't 404 — the
# caller asked us to ensure absence and we couldn't reach the
# container. Surface it as 409.
if not success:
raise HTTPException(status_code=409, detail=error or "docker exec failed")
log.info(
"decky.file.delete decky=%s topology=%s container=%s path=%s by=%s",
req.decky_name, req.topology_id, container, req.path,
admin.get("uuid", "unknown"),
)
return MessageResponse(message="ok")