feat(deckies): generic file drops on fleet + MazeNET deckies

Extracts the docker-exec-with-base64-stdin pattern out of canary/planter
and orchestrator/drivers/ssh into a shared decnet.decky_io package.
Both consumers now delegate; the canary planter test still proves the
contract end-to-end.

Adds POST/DELETE /api/v1/deckies/files for arbitrary file drops.
Container resolution is shared with the canary path: topology_id absent
means fleet (<name>-ssh), present routes through resolve_decky_container
which picks <name>-ssh when the topology decky exposes ssh, else the
topology base container decnet_t_<id8>_<name>.

Path validation rejects relative paths and '..' traversal at the request
model layer.  Bad base64 → 400; unknown topology → 404; decky not in
topology → 422; docker exec failure → 409.
This commit is contained in:
2026-04-28 22:43:34 -04:00
parent 3fe999d706
commit 0bc4b05c73
19 changed files with 1047 additions and 176 deletions

View File

@@ -20,11 +20,8 @@ shape but speaks bytes-via-base64 over the wire.
"""
from __future__ import annotations
import asyncio
import base64
import os
import shlex
import time
from datetime import datetime, timedelta, timezone
from secrets import token_urlsafe
from typing import Any, Iterable, Optional
@@ -34,13 +31,16 @@ from decnet.bus.factory import get_bus
from decnet.canary.base import CanaryArtifact, CanaryContext
from decnet.canary.factory import get_generator
from decnet.canary.paths import default_path_for
from decnet.decky_io import (
delete_file_from_container,
resolve_topology_container,
write_file_to_container,
)
from decnet.logging import get_logger
from decnet.web.db.repository import BaseRepository
log = get_logger("canary.planter")
_DOCKER = "docker"
_TIMEOUT = 8.0
# Container suffix — matches the orchestrator SSH driver's convention
# (``<decky_name>-ssh``). Canary placement always happens through the
# ssh container because every decky has one and it carries the most
@@ -52,77 +52,16 @@ def _container_for(decky_name: str) -> str:
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
def resolve_topology_container(
topology_id: str, decky_name: str, services: Iterable[str],
) -> str:
"""Container name to docker-exec into for a MazeNET decky.
The ssh service container (when present) wins because it carries the
most realistic filesystem layout — same rationale as the fleet path.
Otherwise we target the base container, whose name is set by
:func:`decnet.topology.compose._container_name`.
"""
if "ssh" in set(services):
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
return f"decnet_t_{topology_id[:8]}_{decky_name}"
def _dirname(path: str) -> str:
idx = path.rfind("/")
if idx <= 0:
return "/"
return path[:idx]
async def _run(
argv: list[str], *, stdin_bytes: Optional[bytes] = None,
) -> tuple[int, str, str]:
try:
proc = await asyncio.create_subprocess_exec(
*argv,
stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
except FileNotFoundError as exc:
return 127, "", f"argv[0] not found: {exc}"
try:
stdout, stderr = await asyncio.wait_for(
proc.communicate(input=stdin_bytes), timeout=_TIMEOUT,
)
except asyncio.TimeoutError:
try:
proc.kill()
except ProcessLookupError:
pass
return 124, "", "timeout"
return (
proc.returncode if proc.returncode is not None else -1,
stdout.decode("utf-8", "replace"),
stderr.decode("utf-8", "replace"),
)
def _build_plant_command(artifact: CanaryArtifact) -> tuple[str, bytes]:
"""Compose the ``sh -c`` script + stdin payload for one artifact.
Binary safety: we base64-encode on the host and stream the result
over stdin to ``base64 -d`` inside the container, so the bytes
never touch the argv (kernel ARG_MAX would reject anything larger
than ~128KB-2MB depending on the host). Both ``base64`` (coreutils)
and ``touch -d @<unix_ts>`` are present on every Linux base image
we ship, so there's no per-distro branching.
"""
encoded = base64.b64encode(artifact.content)
mtime = int(time.time() + artifact.mtime_offset)
mode_str = oct(artifact.mode)[2:]
parts = [
f"mkdir -p {shlex.quote(_dirname(artifact.path))}",
f"base64 -d > {shlex.quote(artifact.path)}",
f"chmod {mode_str} {shlex.quote(artifact.path)}",
f"touch -d @{mtime} {shlex.quote(artifact.path)}",
]
return " && ".join(parts), encoded
# resolve_topology_container is re-exported from decky_io for back-compat
# with callers (tests, deploy hook) that imported it from this module
# before the decky_io extraction.
__all__ = [
"plant",
"revoke",
"resolve_topology_container",
"seed_baseline",
"seed_baseline_topology",
]
async def _publish(
@@ -173,14 +112,12 @@ async def plant(
await repo.update_canary_token_state(token_uuid, "failed", err)
return False, err
sh_cmd, stdin_payload = _build_plant_command(artifact)
target_container = container or _container_for(decky_name)
# ``-i`` keeps stdin attached so base64 -d inside the container can
# consume the encoded payload streamed from the host.
argv = [_DOCKER, "exec", "-i", target_container, "sh", "-c", sh_cmd]
rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload)
success = rc == 0
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
mtime = datetime.now(timezone.utc) + timedelta(seconds=artifact.mtime_offset)
success, error = await write_file_to_container(
target_container, artifact.path, artifact.content,
mode=artifact.mode, mtime=mtime,
)
if repo is not None:
if success:
@@ -199,8 +136,8 @@ async def plant(
if not success:
log.warning(
"canary.plant failed decky=%s token=%s rc=%d stderr=%r",
decky_name, token_uuid, rc, stderr[:120],
"canary.plant failed decky=%s token=%s container=%s err=%r",
decky_name, token_uuid, target_container, error,
)
return success, error
@@ -221,12 +158,10 @@ async def revoke(
the file is gone after the call (whether we deleted it or it was
already missing); only docker / container-down errors return False.
"""
sh_cmd = f"rm -f {shlex.quote(placement_path)}"
target_container = container or _container_for(decky_name)
argv = [_DOCKER, "exec", target_container, "sh", "-c", sh_cmd]
rc, _stdout, stderr = await _run(argv)
success = rc == 0
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
success, error = await delete_file_from_container(
target_container, placement_path,
)
if repo is not None:
await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)

View File

@@ -0,0 +1,39 @@
"""Shared primitives for writing/deleting files inside running deckies.
The canary planter and the orchestrator SSH driver both need to drop
bytes into a decky container's filesystem, then sometimes unlink them.
The ARG_MAX-safe ``base64 -d``-via-stdin trick lived in two places
before this module existed.
Public API:
* :func:`write_file_to_container` — write bytes at a path, set mode,
optionally backdate mtime.
* :func:`delete_file_from_container` — best-effort ``rm -f``.
* :func:`resolve_topology_container` — pick the right docker container
for a MazeNET decky based on its services list.
* :func:`resolve_decky_container` — async helper that takes
``(decky_name, topology_id?)``, hydrates the topology when needed,
and returns the docker container name.
Container resolution conventions are documented in
:mod:`decnet.topology.compose`; we mirror them here without taking
a runtime dependency on the compose generator.
"""
from __future__ import annotations
from .resolve import (
resolve_decky_container,
resolve_topology_container,
)
from .write import (
delete_file_from_container,
write_file_to_container,
)
__all__ = [
"delete_file_from_container",
"resolve_decky_container",
"resolve_topology_container",
"write_file_to_container",
]

View File

@@ -0,0 +1,72 @@
"""Decky-name → docker container name resolution.
Two scopes:
* **Fleet**: every fleet decky has a ``ssh`` service container named
``<decky_name>-ssh`` (see :mod:`decnet.services.ssh`). We always
target it because it carries the most realistic filesystem layout.
* **MazeNET (topology)**: same ``<name>-ssh`` convention when the
decky exposes the ssh service; otherwise the decky's base container
named ``decnet_t_<topology_id8>_<decky_name>`` (matches
:func:`decnet.topology.compose._container_name`).
Keeping resolution centralised here means new ``docker exec`` callers
(file drops, future bulk planters, etc.) never need to learn the
naming conventions — they just call :func:`resolve_decky_container`.
"""
from __future__ import annotations
from typing import Any, Iterable, Optional
_SSH_CONTAINER_SUFFIX = "-ssh"
def resolve_topology_container(
topology_id: str, decky_name: str, services: Iterable[str],
) -> str:
"""Container name for a MazeNET decky.
See module docstring for the convention. Pure function — no I/O.
"""
if "ssh" in set(services):
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
return f"decnet_t_{topology_id[:8]}_{decky_name}"
async def resolve_decky_container(
repo: Any,
decky_name: str,
*,
topology_id: Optional[str] = None,
) -> str:
"""Resolve the docker container name for *decky_name*.
Fleet path (``topology_id is None``): returns ``<decky_name>-ssh``
unconditionally. No DB lookup — the caller is responsible for
knowing the decky exists; if it doesn't, the subsequent
``docker exec`` returns a clear error.
Topology path: hydrates the topology, looks up the decky's services
list, delegates to :func:`resolve_topology_container`.
Raises:
LookupError — when ``topology_id`` is set but the topology or
its named decky doesn't exist. Callers translate this into
404/422 at the API layer.
"""
if topology_id is None:
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
from decnet.topology.persistence import hydrate
hydrated = await hydrate(repo, topology_id)
if hydrated is None:
raise LookupError(f"topology {topology_id!r} not found")
for decky in hydrated["deckies"]:
cfg = decky.get("decky_config") or {}
name = cfg.get("name") or decky.get("name")
if name == decky_name:
services = decky.get("services") or []
return resolve_topology_container(topology_id, decky_name, services)
raise LookupError(
f"decky {decky_name!r} is not in topology {topology_id!r}"
)

124
decnet/decky_io/write.py Normal file
View File

@@ -0,0 +1,124 @@
"""``docker exec``-driven file write/delete inside a decky container.
The write path streams a base64-encoded payload over stdin to
``base64 -d`` inside the container, so binary content of any size up
to docker's stream limits is safe — interpolating bytes into argv
would trip ARG_MAX (~128 KB on most kernels) for any non-trivial blob.
"""
from __future__ import annotations
import asyncio
import base64
import shlex
from datetime import datetime, timezone
from typing import Optional
from decnet.logging import get_logger
log = get_logger("decky_io.write")
_DOCKER = "docker"
_DEFAULT_TIMEOUT = 8.0
def _dirname(path: str) -> str:
idx = path.rfind("/")
if idx <= 0:
return "/"
return path[:idx]
async def _run(
argv: list[str],
*,
stdin_bytes: Optional[bytes] = None,
timeout: float = _DEFAULT_TIMEOUT,
) -> tuple[int, str, str]:
try:
proc = await asyncio.create_subprocess_exec(
*argv,
stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
except FileNotFoundError as exc:
return 127, "", f"argv[0] not found: {exc}"
try:
stdout, stderr = await asyncio.wait_for(
proc.communicate(input=stdin_bytes), timeout=timeout,
)
except asyncio.TimeoutError:
try:
proc.kill()
except ProcessLookupError:
pass
return 124, "", "timeout"
return (
proc.returncode if proc.returncode is not None else -1,
stdout.decode("utf-8", "replace"),
stderr.decode("utf-8", "replace"),
)
async def write_file_to_container(
container: str,
path: str,
content: bytes,
*,
mode: int = 0o644,
mtime: Optional[datetime] = None,
timeout: float = _DEFAULT_TIMEOUT,
) -> tuple[bool, Optional[str]]:
"""Write *content* to *path* inside *container* via ``docker exec``.
The directory above *path* is created if missing; *mode* is applied
after the write; when *mtime* is provided the file is backdated via
``touch -d`` (UTC ISO 8601).
Returns ``(success, error_or_none)``. ``error`` is the trimmed
docker stderr on rc != 0, or a short "rc=<n>" if stderr was empty.
"""
if not path:
return False, "empty path"
encoded = base64.b64encode(content)
parts = [
f"mkdir -p {shlex.quote(_dirname(path))}",
f"base64 -d > {shlex.quote(path)}",
f"chmod {mode:o} {shlex.quote(path)}",
]
if mtime is not None:
ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
parts.append(f"touch -d {shlex.quote(ts)} {shlex.quote(path)}")
sh_cmd = " && ".join(parts)
argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
rc, _stdout, stderr = await _run(argv, stdin_bytes=encoded, timeout=timeout)
success = rc == 0
if success:
return True, None
err = stderr.strip()[:256] or f"rc={rc}"
log.warning(
"decky_io.write failed container=%s path=%s rc=%d stderr=%r",
container, path, rc, stderr[:120],
)
return False, err
async def delete_file_from_container(
container: str,
path: str,
*,
timeout: float = _DEFAULT_TIMEOUT,
) -> tuple[bool, Optional[str]]:
"""Best-effort ``rm -f`` of *path* inside *container*.
Returns ``(success, error_or_none)``. ``rm -f`` returns rc=0 even
when the file is already gone, so a True result here means "the
file is not present after this call", regardless of who unlinked it.
"""
sh_cmd = f"rm -f {shlex.quote(path)}"
argv = [_DOCKER, "exec", container, "sh", "-c", sh_cmd]
rc, _stdout, stderr = await _run(argv, timeout=timeout)
if rc == 0:
return True, None
return False, stderr.strip()[:256] or f"rc={rc}"

View File

@@ -18,11 +18,8 @@ or IP can't escape into a shell.
from __future__ import annotations
import asyncio
import shlex
from typing import Any
import base64
from datetime import datetime, timezone
from datetime import datetime
from decnet.logging import get_logger
from decnet.orchestrator.drivers.base import ActivityDriver, ActivityResult
@@ -226,36 +223,24 @@ class SSHDriver(ActivityDriver):
) -> ActivityResult:
"""Write *content* to *path* inside *decky_name*'s ssh container.
Streams base64 via stdin (mirrors :mod:`decnet.canary.planter`'s
ARG_MAX-safe write — see commit c17b9e0). Sets file mode and,
when *mtime* is provided, ``touch -d`` to backdate the file so
it doesn't all stamp at wall-clock-now (the realism failure
this migration is fixing).
Delegates to :func:`decnet.decky_io.write_file_to_container`,
which carries the ARG_MAX-safe base64-via-stdin trick. Sets
file mode and, when *mtime* is provided, ``touch -d`` to
backdate the file (otherwise everything stamps at wall-clock-now
— the realism failure this path was originally fixing).
"""
from decnet.decky_io import write_file_to_container
container = _container_for(decky_name)
b64 = base64.b64encode(content).decode("ascii")
# touch -d accepts ISO 8601; we always emit UTC so the
# container's local TZ doesn't drift the mtime.
if mtime is not None:
ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
touch_cmd = f"touch -d {shlex.quote(ts)} {shlex.quote(path)}"
else:
touch_cmd = f"touch {shlex.quote(path)}"
sh_cmd = (
f"mkdir -p {shlex.quote(_dirname(path))} && "
f"base64 -d > {shlex.quote(path)} && "
f"chmod {mode:o} {shlex.quote(path)} && "
f"{touch_cmd}"
success, error = await write_file_to_container(
container, path, content, mode=mode, mtime=mtime, timeout=_TIMEOUT,
)
argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
rc, _stdout, stderr = await _run_with_stdin(argv, b64.encode("ascii"))
success = rc == 0
payload: dict[str, Any] = {
"dst_decky": decky_name,
"path": path,
"bytes": len(content),
"rc": rc,
"stderr": stderr.strip()[:256] if not success else None,
"rc": 0 if success else 1,
"stderr": error if not success else None,
}
return ActivityResult(success=success, payload=payload)
@@ -283,11 +268,3 @@ class SSHDriver(ActivityDriver):
)
def _dirname(path: str) -> str:
"""Pure-string dirname. We can't trust ``os.path.dirname`` on the
host to share the destination container's separator semantics, but
deckies are POSIX so a plain ``rfind('/')`` suffices."""
idx = path.rfind("/")
if idx <= 0:
return "/"
return path[:idx]

View File

@@ -63,6 +63,10 @@ from .deploy import (
MutateIntervalRequest,
PurgeResponse,
)
from .decky import (
DeckyFileDeleteRequest,
DeckyFileDropRequest,
)
from .fleet import (
LOCAL_HOST_SENTINEL,
FleetDecky,
@@ -222,6 +226,8 @@ __all__ = [
"PurgeResponse",
# fleet
"LOCAL_HOST_SENTINEL",
"DeckyFileDeleteRequest",
"DeckyFileDropRequest",
"FleetDecky",
# health
"ComponentHealth",

View File

@@ -0,0 +1,61 @@
"""DTOs for cross-cutting decky operations (file drops, etc.).
These don't bind to a single table — fleet deckies and MazeNET
(topology) deckies share the request shape, with ``topology_id``
discriminating. Following ``feedback_models_single_source`` we put
the request/response shapes alongside the rest of the API contracts
under ``decnet.web.db.models``.
"""
from __future__ import annotations
from typing import Optional
from pydantic import BaseModel, Field as PydanticField, field_validator
class DeckyFileDropRequest(BaseModel):
"""Drop arbitrary bytes at an absolute path inside a decky container.
``content_b64`` is the base64-encoded payload. Binary-safe.
``mode`` defaults to ``0o644`` (octal int). ``mtime_offset`` is a
seconds offset from now applied via ``touch -d`` so realistic-aged
files don't all stamp at wall-clock-now.
"""
decky_name: str = PydanticField(..., min_length=1)
topology_id: Optional[str] = None
path: str = PydanticField(..., min_length=1)
content_b64: str
mode: int = 0o644
mtime_offset: int = 0
@field_validator("path")
@classmethod
def _abs_no_traversal(cls, v: str) -> str:
if not v.startswith("/"):
raise ValueError("path must be absolute (start with '/')")
# Defense in depth: even though we run as root inside the
# container, ``..`` segments make the on-disk location depend
# on the cwd at exec-time and surprise both operators and the
# auditor reading the placement_path field later.
for seg in v.split("/"):
if seg == "..":
raise ValueError("path must not contain '..' segments")
return v
class DeckyFileDeleteRequest(BaseModel):
"""Best-effort ``rm -f`` of an absolute path inside a decky container."""
decky_name: str = PydanticField(..., min_length=1)
topology_id: Optional[str] = None
path: str = PydanticField(..., min_length=1)
@field_validator("path")
@classmethod
def _abs_no_traversal(cls, v: str) -> str:
if not v.startswith("/"):
raise ValueError("path must be absolute (start with '/')")
for seg in v.split("/"):
if seg == "..":
raise ValueError("path must not contain '..' segments")
return v

View File

@@ -50,6 +50,7 @@ from .swarm_mgmt import swarm_mgmt_router
from .system import system_router
from .topology import topology_router
from .canary import canary_router
from .deckies import deckies_router
from .webhooks import webhooks_router
api_router = APIRouter(
@@ -156,6 +157,7 @@ api_router.include_router(topology_router)
# Canary tokens — operator-facing CRUD (worker hosts the
# attacker-facing surface separately via `decnet canary`).
api_router.include_router(canary_router)
api_router.include_router(deckies_router)
# External webhook subscriptions (SIEM/SOAR egress)
api_router.include_router(webhooks_router)

View File

@@ -66,26 +66,20 @@ async def _resolve_topology_target(
) -> str:
"""Validate (topology_id, decky_name) and return the docker container.
404 if the topology doesn't exist; 422 if the named decky isn't in it.
Hoisted into ``decky_io/resolve.py`` in workstream 2 so the file-drop
endpoint can share it; for now it's local to the canary router.
Delegates to :func:`decnet.decky_io.resolve_decky_container` and
translates its ``LookupError`` into HTTP 404/422 — 404 when the
topology itself is missing, 422 when the named decky isn't in it.
"""
from decnet.topology.persistence import hydrate
hydrated = await hydrate(repo, topology_id)
if hydrated is None:
raise HTTPException(status_code=404, detail="topology not found")
for decky in hydrated["deckies"]:
cfg = decky.get("decky_config") or {}
name = cfg.get("name") or decky.get("name")
if name == decky_name:
services = decky.get("services") or []
return planter.resolve_topology_container(
topology_id, decky_name, services,
)
raise HTTPException(
status_code=422,
detail=f"decky {decky_name!r} is not in topology {topology_id!r}",
)
from decnet.decky_io import resolve_decky_container
try:
return await resolve_decky_container(
repo, decky_name, topology_id=topology_id,
)
except LookupError as exc:
msg = str(exc)
if "topology" in msg and "not found" in msg:
raise HTTPException(status_code=404, detail=msg) from exc
raise HTTPException(status_code=422, detail=msg) from exc
def _trigger_row_to_response(row: dict[str, Any]) -> CanaryTriggerResponse:

View File

@@ -0,0 +1,21 @@
"""Cross-cutting decky operation endpoints.
These routes apply to both fleet and MazeNET (topology) deckies; the
MazeNET case is selected by passing ``topology_id`` in the request body.
Compare with:
* :mod:`decnet.web.router.fleet` — fleet-only CRUD (deploy, mutate,
list).
* :mod:`decnet.web.router.topology` — topology-only CRUD.
"""
from __future__ import annotations
from fastapi import APIRouter
from .api_file_drop import router as file_drop_router
deckies_router = APIRouter()
deckies_router.include_router(file_drop_router)
__all__ = ["deckies_router"]

View File

@@ -0,0 +1,126 @@
"""POST/DELETE /api/v1/deckies/files — generic file drops on deckies.
Wraps :func:`decnet.decky_io.write_file_to_container` /
:func:`decnet.decky_io.delete_file_from_container` so admins can drop
arbitrary bytes at arbitrary paths inside a running decky container —
fleet OR MazeNET — without going through the canary surface.
Auth: ``require_admin`` everywhere (matches every other write op on
deckies; see :mod:`decnet.web.router.fleet.api_mutate_decky`).
Container resolution mirrors the canary path: ``topology_id`` absent
means fleet (``<decky>-ssh``), present routes through
:func:`decnet.decky_io.resolve_decky_container` for the MazeNET
``<decky>-ssh`` / ``decnet_t_<id8>_<decky>`` distinction.
"""
from __future__ import annotations
import base64
from datetime import datetime, timedelta, timezone
from fastapi import APIRouter, Depends, HTTPException
from decnet.decky_io import (
delete_file_from_container,
resolve_decky_container,
write_file_to_container,
)
from decnet.logging import get_logger
from decnet.web.db.models import (
DeckyFileDeleteRequest,
DeckyFileDropRequest,
MessageResponse,
)
from decnet.web.dependencies import repo, require_admin
log = get_logger("api.deckies.files")
router = APIRouter(prefix="/deckies/files", tags=["Deckies"])
async def _resolve_container_or_4xx(
decky_name: str, topology_id: str | None,
) -> str:
"""Resolve to a docker container, mapping LookupError → 404/422."""
try:
return await resolve_decky_container(
repo, decky_name, topology_id=topology_id,
)
except LookupError as exc:
msg = str(exc)
if topology_id and "topology" in msg and "not found" in msg:
raise HTTPException(status_code=404, detail=msg) from exc
raise HTTPException(status_code=422, detail=msg) from exc
@router.post(
"",
response_model=MessageResponse,
status_code=201,
responses={
400: {"description": "Invalid request body (bad base64, etc.)"},
401: {"description": "Could not validate credentials"},
403: {"description": "Insufficient permissions"},
404: {"description": "Topology not found"},
409: {"description": "docker exec failed (container down or path unwritable)"},
422: {"description": "Path validation failed or decky not in topology"},
},
)
async def api_drop_file(
req: DeckyFileDropRequest,
admin: dict = Depends(require_admin),
) -> MessageResponse:
try:
content = base64.b64decode(req.content_b64, validate=True)
except (ValueError, TypeError) as exc:
raise HTTPException(
status_code=400, detail=f"content_b64 is not valid base64: {exc}",
) from exc
container = await _resolve_container_or_4xx(req.decky_name, req.topology_id)
mtime = (
datetime.now(timezone.utc) + timedelta(seconds=req.mtime_offset)
if req.mtime_offset
else None
)
success, error = await write_file_to_container(
container, req.path, content, mode=req.mode, mtime=mtime,
)
if not success:
raise HTTPException(status_code=409, detail=error or "docker exec failed")
log.info(
"decky.file.drop decky=%s topology=%s container=%s path=%s bytes=%d by=%s",
req.decky_name, req.topology_id, container, req.path,
len(content), admin.get("uuid", "unknown"),
)
return MessageResponse(message="ok")
@router.delete(
"",
response_model=MessageResponse,
responses={
401: {"description": "Could not validate credentials"},
403: {"description": "Insufficient permissions"},
404: {"description": "Topology not found"},
422: {"description": "Path validation failed or decky not in topology"},
},
)
async def api_delete_file(
req: DeckyFileDeleteRequest,
admin: dict = Depends(require_admin),
) -> MessageResponse:
container = await _resolve_container_or_4xx(req.decky_name, req.topology_id)
success, error = await delete_file_from_container(container, req.path)
# ``rm -f`` returns 0 even when the file is already gone, so a
# False here means the docker exec itself failed. Don't 404 — the
# caller asked us to ensure absence and we couldn't reach the
# container. Surface it as 409.
if not success:
raise HTTPException(status_code=409, detail=error or "docker exec failed")
log.info(
"decky.file.delete decky=%s topology=%s container=%s path=%s by=%s",
req.decky_name, req.topology_id, container, req.path,
admin.get("uuid", "unknown"),
)
return MessageResponse(message="ok")

View File

View File

@@ -0,0 +1,252 @@
"""End-to-end coverage for /api/v1/deckies/files via the live FastAPI app.
The docker subprocess is stubbed; everything else (DB, repo, auth)
runs for real.
"""
from __future__ import annotations
import asyncio
import base64
from unittest.mock import patch
import httpx
import pytest
_BASE = "/api/v1/deckies/files"
class _FakeProc:
def __init__(self, rc: int = 0, stderr: bytes = b"") -> None:
self.returncode = rc
self._stderr = stderr
async def communicate(self, input: bytes | None = None) -> tuple[bytes, bytes]:
return b"", self._stderr
def kill(self) -> None: # pragma: no cover
pass
def _patch_subprocess_capture(rc: int = 0, stderr: bytes = b""):
captured: list[list[str]] = []
async def _fake(*argv, **kw):
captured.append(list(argv))
return _FakeProc(rc, stderr)
return patch.object(asyncio, "create_subprocess_exec", _fake), captured
def _hdr(token: str) -> dict[str, str]:
return {"Authorization": f"Bearer {token}"}
def _hydrate_returning(deckies: list[dict]):
async def _fake(_repo, _topo_id):
return {
"topology": {"id": _topo_id},
"lans": [], "edges": [], "deckies": deckies,
}
return _fake
# ---------------- POST: drop file -----------------------------------------
@pytest.mark.asyncio
async def test_drop_file_on_fleet_decky_uses_ssh_container(
client: httpx.AsyncClient, auth_token: str
) -> None:
patcher, captured = _patch_subprocess_capture()
body_b64 = base64.b64encode(b"hello world").decode()
with patcher:
res = await client.post(
_BASE,
json={
"decky_name": "web1",
"path": "/root/note.txt",
"content_b64": body_b64,
},
headers=_hdr(auth_token),
)
assert res.status_code == 201, res.text
# docker exec -i web1-ssh sh -c <script>
assert captured and captured[0][3] == "web1-ssh"
@pytest.mark.asyncio
async def test_drop_file_on_topology_decky_with_ssh_service(
client: httpx.AsyncClient, auth_token: str, monkeypatch
) -> None:
monkeypatch.setattr(
"decnet.topology.persistence.hydrate",
_hydrate_returning([{
"uuid": "u1", "name": "web1",
"decky_config": {"name": "web1"},
"services": ["ssh", "http"],
}]),
)
patcher, captured = _patch_subprocess_capture()
with patcher:
res = await client.post(
_BASE,
json={
"decky_name": "web1",
"topology_id": "abcdef0123456789",
"path": "/etc/synthetic.conf",
"content_b64": base64.b64encode(b"x").decode(),
},
headers=_hdr(auth_token),
)
assert res.status_code == 201, res.text
assert captured[0][3] == "web1-ssh"
@pytest.mark.asyncio
async def test_drop_file_on_topology_decky_without_ssh_uses_base_container(
client: httpx.AsyncClient, auth_token: str, monkeypatch
) -> None:
monkeypatch.setattr(
"decnet.topology.persistence.hydrate",
_hydrate_returning([{
"uuid": "u1", "name": "router",
"decky_config": {"name": "router"},
"services": ["dns"],
}]),
)
patcher, captured = _patch_subprocess_capture()
with patcher:
res = await client.post(
_BASE,
json={
"decky_name": "router",
"topology_id": "fedcba9876543210",
"path": "/etc/synthetic.conf",
"content_b64": base64.b64encode(b"x").decode(),
},
headers=_hdr(auth_token),
)
assert res.status_code == 201, res.text
assert captured[0][3] == "decnet_t_fedcba98_router"
@pytest.mark.asyncio
async def test_drop_file_404_when_topology_unknown(
client: httpx.AsyncClient, auth_token: str, monkeypatch
) -> None:
async def _none(_repo, _topo_id):
return None
monkeypatch.setattr("decnet.topology.persistence.hydrate", _none)
res = await client.post(
_BASE,
json={
"decky_name": "web1", "topology_id": "ghost",
"path": "/etc/x.conf",
"content_b64": base64.b64encode(b"x").decode(),
},
headers=_hdr(auth_token),
)
assert res.status_code == 404
@pytest.mark.asyncio
async def test_drop_file_422_for_relative_path(
client: httpx.AsyncClient, auth_token: str
) -> None:
res = await client.post(
_BASE,
json={
"decky_name": "web1",
"path": "etc/x.conf",
"content_b64": base64.b64encode(b"x").decode(),
},
headers=_hdr(auth_token),
)
assert res.status_code == 422
@pytest.mark.asyncio
async def test_drop_file_422_for_traversal(
client: httpx.AsyncClient, auth_token: str
) -> None:
res = await client.post(
_BASE,
json={
"decky_name": "web1",
"path": "/etc/../root/.ssh/authorized_keys",
"content_b64": base64.b64encode(b"x").decode(),
},
headers=_hdr(auth_token),
)
assert res.status_code == 422
@pytest.mark.asyncio
async def test_drop_file_400_on_bad_base64(
client: httpx.AsyncClient, auth_token: str
) -> None:
res = await client.post(
_BASE,
json={
"decky_name": "web1",
"path": "/etc/x.conf",
"content_b64": "%%%not-base64%%%",
},
headers=_hdr(auth_token),
)
assert res.status_code == 400
@pytest.mark.asyncio
async def test_drop_file_409_when_docker_exec_fails(
client: httpx.AsyncClient, auth_token: str
) -> None:
patcher, _captured = _patch_subprocess_capture(
rc=1, stderr=b"container not running",
)
with patcher:
res = await client.post(
_BASE,
json={
"decky_name": "web1",
"path": "/etc/x.conf",
"content_b64": base64.b64encode(b"x").decode(),
},
headers=_hdr(auth_token),
)
assert res.status_code == 409
# ---------------- DELETE --------------------------------------------------
@pytest.mark.asyncio
async def test_delete_file_round_trip(
client: httpx.AsyncClient, auth_token: str
) -> None:
patcher, captured = _patch_subprocess_capture()
with patcher:
res = await client.request(
"DELETE", _BASE,
json={"decky_name": "web1", "path": "/etc/x.conf"},
headers=_hdr(auth_token),
)
assert res.status_code == 200, res.text
# docker exec web1-ssh sh -c "rm -f /etc/x.conf"
assert captured[0][2] == "web1-ssh"
assert "rm -f /etc/x.conf" in captured[0][5]
# ---------------- auth ----------------------------------------------------
@pytest.mark.asyncio
async def test_unauthenticated_drop_rejected(
client: httpx.AsyncClient,
) -> None:
res = await client.post(_BASE, json={
"decky_name": "web1", "path": "/x",
"content_b64": base64.b64encode(b"x").decode(),
})
assert res.status_code in (401, 403)

View File

@@ -115,9 +115,9 @@ async def test_plant_argv_and_base64_round_trip(repo: SQLiteRepository, fake_bus
assert stdin_seen[0] == base64.b64encode(art.content)
assert "base64 -d > /home/admin/.aws/credentials" in script
assert base64.b64encode(art.content).decode() not in script
# touch -d @<mtime> with negative offset → an int strictly less than now.
m = re.search(r"touch -d @(\d+) ", script)
assert m and int(m.group(1)) > 0
# touch -d 'YYYY-MM-DD HH:MM:SS UTC' — backdated via mtime_offset.
m = re.search(r"touch -d '(\d{4}-\d{2}-\d{2}) ", script)
assert m
# State transitioned to planted.
row = await repo.get_canary_token("tok-1")
assert row["state"] == "planted" and row["last_error"] is None

View File

View File

@@ -0,0 +1,93 @@
"""Unit coverage for decnet.decky_io.resolve — container-name helpers."""
from __future__ import annotations
import pytest
from decnet.decky_io import (
resolve_decky_container,
resolve_topology_container,
)
def test_resolve_topology_container_prefers_ssh_service() -> None:
assert resolve_topology_container(
"abc123def456", "web1", services=["ssh", "http"],
) == "web1-ssh"
def test_resolve_topology_container_falls_back_to_base_when_no_ssh() -> None:
assert resolve_topology_container(
"abc123def456789", "router", services=["dns"],
) == "decnet_t_abc123de_router"
@pytest.mark.asyncio
async def test_resolve_decky_container_fleet_path_returns_ssh_suffix() -> None:
# Fleet path needs no I/O — repo can be anything.
container = await resolve_decky_container(None, "web1")
assert container == "web1-ssh"
@pytest.mark.asyncio
async def test_resolve_decky_container_topology_path_uses_services_list(
monkeypatch,
) -> None:
async def _fake_hydrate(_repo, _topo_id):
return {
"topology": {"id": _topo_id},
"lans": [],
"deckies": [
{
"uuid": "u1", "name": "web1",
"decky_config": {"name": "web1"},
"services": ["ssh"],
},
{
"uuid": "u2", "name": "router",
"decky_config": {"name": "router"},
"services": ["dns"],
},
],
"edges": [],
}
monkeypatch.setattr(
"decnet.topology.persistence.hydrate", _fake_hydrate,
)
assert await resolve_decky_container(
None, "web1", topology_id="abcdef0123456789",
) == "web1-ssh"
assert await resolve_decky_container(
None, "router", topology_id="abcdef0123456789",
) == "decnet_t_abcdef01_router"
@pytest.mark.asyncio
async def test_resolve_decky_container_raises_when_topology_missing(
monkeypatch,
) -> None:
async def _none(_repo, _topo_id):
return None
monkeypatch.setattr("decnet.topology.persistence.hydrate", _none)
with pytest.raises(LookupError, match="topology .* not found"):
await resolve_decky_container(None, "x", topology_id="ghost")
@pytest.mark.asyncio
async def test_resolve_decky_container_raises_when_decky_not_in_topology(
monkeypatch,
) -> None:
async def _fake(_repo, _topo_id):
return {
"topology": {"id": _topo_id},
"lans": [], "edges": [],
"deckies": [{
"uuid": "u1", "name": "other",
"decky_config": {"name": "other"},
"services": [],
}],
}
monkeypatch.setattr("decnet.topology.persistence.hydrate", _fake)
with pytest.raises(LookupError, match="not in topology"):
await resolve_decky_container(
None, "missing", topology_id="abcdef0123456789",
)

View File

@@ -0,0 +1,140 @@
"""Unit coverage for decnet.decky_io.write — the docker-exec wrapper.
Mirrors the canary planter's subprocess-mock pattern: we patch
:func:`asyncio.create_subprocess_exec` so the tests don't require a
docker daemon, then assert argv shape, stdin payload, and the
``mtime`` / ``mode`` knobs land in the rendered ``sh -c`` script.
"""
from __future__ import annotations
import asyncio
import base64
import re
from datetime import datetime, timezone
from unittest.mock import patch
import pytest
from decnet.decky_io import (
delete_file_from_container,
write_file_to_container,
)
class _FakeProc:
def __init__(self, rc: int = 0, stderr: bytes = b"") -> None:
self.returncode = rc
self._stderr = stderr
async def communicate(self, input: bytes | None = None) -> tuple[bytes, bytes]:
return b"", self._stderr
def kill(self) -> None: # pragma: no cover
pass
def _patch_subprocess(rc: int = 0, stderr: bytes = b""):
captured: list[list[str]] = []
stdin_seen: list[bytes | None] = []
async def _fake(*argv, **kw):
captured.append(list(argv))
proc = _FakeProc(rc, stderr)
orig = proc.communicate
async def communicate(input: bytes | None = None) -> tuple[bytes, bytes]:
stdin_seen.append(input)
return await orig(None)
proc.communicate = communicate # type: ignore[assignment]
return proc
return patch.object(asyncio, "create_subprocess_exec", _fake), captured, stdin_seen
@pytest.mark.asyncio
async def test_write_file_emits_correct_docker_argv_and_sh_script() -> None:
patcher, captured, stdin_seen = _patch_subprocess(rc=0)
with patcher:
success, error = await write_file_to_container(
"web1-ssh", "/etc/secrets.json", b'{"key":"value"}',
mode=0o600,
)
assert success is True and error is None
argv = captured[0]
assert argv[:4] == ["docker", "exec", "-i", "web1-ssh"]
assert argv[4:6] == ["sh", "-c"]
script = argv[6]
# Composed in fixed order: mkdir -p, base64 -d > path, chmod, [touch].
assert "mkdir -p /etc" in script
assert "base64 -d > /etc/secrets.json" in script
assert "chmod 600 /etc/secrets.json" in script
# Without explicit mtime, no touch -d is emitted.
assert "touch -d" not in script
# Stdin carries the base64 payload — never the argv (ARG_MAX safety).
assert stdin_seen[0] == base64.b64encode(b'{"key":"value"}')
@pytest.mark.asyncio
async def test_write_file_round_trips_arbitrary_binary() -> None:
patcher, _captured, stdin_seen = _patch_subprocess(rc=0)
payload = bytes(range(256)) * 8 # 2 KB of every byte value
with patcher:
success, _err = await write_file_to_container(
"web1-ssh", "/tmp/bin.dat", payload,
)
assert success is True
assert base64.b64decode(stdin_seen[0]) == payload
@pytest.mark.asyncio
async def test_write_file_backdates_mtime_via_iso_touch() -> None:
patcher, captured, _stdin = _patch_subprocess(rc=0)
mtime = datetime(2026, 4, 20, 11, 30, 0, tzinfo=timezone.utc)
with patcher:
await write_file_to_container(
"web1-ssh", "/etc/x.conf", b"hello", mtime=mtime,
)
script = captured[0][6]
assert "touch -d '2026-04-20 11:30:00 UTC' /etc/x.conf" in script
@pytest.mark.asyncio
async def test_write_file_returns_failure_with_stderr_on_nonzero_rc() -> None:
patcher, _captured, _stdin = _patch_subprocess(rc=125, stderr=b"container down")
with patcher:
success, error = await write_file_to_container(
"web1-ssh", "/etc/x.conf", b"y",
)
assert success is False
assert error and "container down" in error
@pytest.mark.asyncio
async def test_write_file_rejects_empty_path() -> None:
success, error = await write_file_to_container(
"web1-ssh", "", b"y",
)
assert success is False and error == "empty path"
@pytest.mark.asyncio
async def test_delete_file_emits_rm_minus_f() -> None:
patcher, captured, _stdin = _patch_subprocess(rc=0)
with patcher:
success, _err = await delete_file_from_container(
"web1-ssh", "/etc/secrets.json",
)
assert success is True
argv = captured[0]
assert argv[:3] == ["docker", "exec", "web1-ssh"]
assert "rm -f /etc/secrets.json" in argv[5]
@pytest.mark.asyncio
async def test_delete_file_returns_failure_on_docker_error() -> None:
patcher, _captured, _stdin = _patch_subprocess(rc=1, stderr=b"oops")
with patcher:
success, error = await delete_file_from_container(
"web1-ssh", "/etc/x.conf",
)
assert success is False and error == "oops"

View File

@@ -58,15 +58,30 @@ async def test_traffic_failure_when_banner_missing(monkeypatch):
async def test_file_action_invokes_docker_exec_on_dst(monkeypatch):
captured: list[tuple[list[str], bytes | None]] = []
async def fake_run_with_stdin(argv, stdin_bytes):
captured.append((argv, stdin_bytes))
return 0, "", ""
class _FakeProc:
returncode = 0
async def communicate(self, input=None):
return b"", b""
def kill(self): # pragma: no cover
pass
async def fake_create(*argv, **kw):
captured.append((list(argv), None))
proc = _FakeProc()
orig = proc.communicate
async def communicate(input=None):
captured[-1] = (captured[-1][0], input)
return await orig(None)
proc.communicate = communicate
return proc
# plant_file streams base64 content via stdin to avoid ARG_MAX
# (mirrors decnet.canary.planter; see commit c17b9e0). The test
# patches _run_with_stdin instead of _run because that's the
# codepath FileAction now exercises.
monkeypatch.setattr(ssh_driver, "_run_with_stdin", fake_run_with_stdin)
# (mirrors decnet.canary.planter; see commit c17b9e0). The driver
# now delegates to decky_io.write_file_to_container, which calls
# asyncio.create_subprocess_exec — patch that.
import asyncio as _asyncio
monkeypatch.setattr(_asyncio, "create_subprocess_exec", fake_create)
drv = ssh_driver.SSHDriver()
action = FileAction(
dst_uuid="u2", dst_name="decky-02",
@@ -107,13 +122,21 @@ async def test_run_handles_missing_docker_binary(monkeypatch):
@pytest.mark.asyncio
async def test_plant_file_applies_mtime_via_touch_d(monkeypatch):
from datetime import datetime, timezone
captured: list[tuple[list[str], bytes | None]] = []
captured: list[list[str]] = []
async def fake_run_with_stdin(argv, stdin_bytes):
captured.append((argv, stdin_bytes))
return 0, "", ""
class _FakeProc:
returncode = 0
async def communicate(self, input=None):
return b"", b""
def kill(self): # pragma: no cover
pass
monkeypatch.setattr(ssh_driver, "_run_with_stdin", fake_run_with_stdin)
async def fake_create(*argv, **kw):
captured.append(list(argv))
return _FakeProc()
import asyncio as _asyncio
monkeypatch.setattr(_asyncio, "create_subprocess_exec", fake_create)
drv = ssh_driver.SSHDriver()
mtime = datetime(2026, 4, 20, 11, 30, 0, tzinfo=timezone.utc)
result = await drv.plant_file(
@@ -121,7 +144,7 @@ async def test_plant_file_applies_mtime_via_touch_d(monkeypatch):
mode=0o644, mtime=mtime,
)
assert result.success is True
sh_cmd = captured[0][0][6]
sh_cmd = captured[0][6]
# Backdated mtime appears in the touch -d argument.
assert "touch -d '2026-04-20 11:30:00 UTC'" in sh_cmd
assert "chmod 644" in sh_cmd

View File

@@ -73,12 +73,14 @@ async def test_one_tick_records_event_and_publishes(repo, fake_bus, monkeypatch)
monkeypatch.setattr(ssh_driver, "_run", fake_run)
async def fake_run_with_stdin(argv, stdin_bytes):
# plant_file takes the base64-streaming path; treat any docker
# exec write as a successful no-op for the integration test.
return 0, "", ""
# plant_file delegates to decky_io.write_file_to_container; treat
# any docker exec write as a successful no-op for the integration
# test.
async def fake_write_file(*a, **kw):
return True, None
monkeypatch.setattr(ssh_driver, "_run_with_stdin", fake_run_with_stdin)
import decnet.decky_io.write as _decky_io_write
monkeypatch.setattr(_decky_io_write, "write_file_to_container", fake_write_file)
received: list = []
@@ -140,12 +142,14 @@ async def test_one_tick_picks_fleet_deckies(repo, fake_bus, monkeypatch):
monkeypatch.setattr(ssh_driver, "_run", fake_run)
async def fake_run_with_stdin(argv, stdin_bytes):
# plant_file takes the base64-streaming path; treat any docker
# exec write as a successful no-op for the integration test.
return 0, "", ""
# plant_file delegates to decky_io.write_file_to_container; treat
# any docker exec write as a successful no-op for the integration
# test.
async def fake_write_file(*a, **kw):
return True, None
monkeypatch.setattr(ssh_driver, "_run_with_stdin", fake_run_with_stdin)
import decnet.decky_io.write as _decky_io_write
monkeypatch.setattr(_decky_io_write, "write_file_to_container", fake_write_file)
await orch_worker._one_tick(repo, fake_bus)
@@ -282,12 +286,14 @@ async def test_tick_is_noop_when_no_running_deckies(repo, fake_bus, monkeypatch)
monkeypatch.setattr(ssh_driver, "_run", fake_run)
async def fake_run_with_stdin(argv, stdin_bytes):
# plant_file takes the base64-streaming path; treat any docker
# exec write as a successful no-op for the integration test.
return 0, "", ""
# plant_file delegates to decky_io.write_file_to_container; treat
# any docker exec write as a successful no-op for the integration
# test.
async def fake_write_file(*a, **kw):
return True, None
monkeypatch.setattr(ssh_driver, "_run_with_stdin", fake_run_with_stdin)
import decnet.decky_io.write as _decky_io_write
monkeypatch.setattr(_decky_io_write, "write_file_to_container", fake_write_file)
await orch_worker._one_tick(repo, fake_bus)
assert called is False