feat(deckies): generic file drops on fleet + MazeNET deckies

Extracts the docker-exec-with-base64-stdin pattern out of canary/planter
and orchestrator/drivers/ssh into a shared decnet.decky_io package.
Both consumers now delegate; the canary planter test still proves the
contract end-to-end.

Adds POST/DELETE /api/v1/deckies/files for arbitrary file drops.
Container resolution is shared with the canary path: topology_id absent
means fleet (<name>-ssh), present routes through resolve_decky_container
which picks <name>-ssh when the topology decky exposes ssh, else the
topology base container decnet_t_<id8>_<name>.

Path validation rejects relative paths and '..' traversal at the request
model layer.  Bad base64 → 400; unknown topology → 404; decky not in
topology → 422; docker exec failure → 409.
This commit is contained in:
2026-04-28 22:43:34 -04:00
parent 3fe999d706
commit 0bc4b05c73
19 changed files with 1047 additions and 176 deletions

View File

@@ -0,0 +1,39 @@
"""Shared primitives for writing/deleting files inside running deckies.
The canary planter and the orchestrator SSH driver both need to drop
bytes into a decky container's filesystem, then sometimes unlink them.
The ARG_MAX-safe ``base64 -d``-via-stdin trick lived in two places
before this module existed.
Public API:
* :func:`write_file_to_container` — write bytes at a path, set mode,
optionally backdate mtime.
* :func:`delete_file_from_container` — best-effort ``rm -f``.
* :func:`resolve_topology_container` — pick the right docker container
for a MazeNET decky based on its services list.
* :func:`resolve_decky_container` — async helper that takes
``(decky_name, topology_id?)``, hydrates the topology when needed,
and returns the docker container name.
Container resolution conventions are documented in
:mod:`decnet.topology.compose`; we mirror them here without taking
a runtime dependency on the compose generator.
"""
from __future__ import annotations
from .resolve import (
resolve_decky_container,
resolve_topology_container,
)
from .write import (
delete_file_from_container,
write_file_to_container,
)
__all__ = [
"delete_file_from_container",
"resolve_decky_container",
"resolve_topology_container",
"write_file_to_container",
]

View File

@@ -0,0 +1,72 @@
"""Decky-name → docker container name resolution.
Two scopes:
* **Fleet**: every fleet decky has a ``ssh`` service container named
``<decky_name>-ssh`` (see :mod:`decnet.services.ssh`). We always
target it because it carries the most realistic filesystem layout.
* **MazeNET (topology)**: same ``<name>-ssh`` convention when the
decky exposes the ssh service; otherwise the decky's base container
named ``decnet_t_<topology_id8>_<decky_name>`` (matches
:func:`decnet.topology.compose._container_name`).
Keeping resolution centralised here means new ``docker exec`` callers
(file drops, future bulk planters, etc.) never need to learn the
naming conventions — they just call :func:`resolve_decky_container`.
"""
from __future__ import annotations
from typing import Any, Iterable, Optional
_SSH_CONTAINER_SUFFIX = "-ssh"
def resolve_topology_container(
topology_id: str, decky_name: str, services: Iterable[str],
) -> str:
"""Container name for a MazeNET decky.
See module docstring for the convention. Pure function — no I/O.
"""
if "ssh" in set(services):
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
return f"decnet_t_{topology_id[:8]}_{decky_name}"
async def resolve_decky_container(
repo: Any,
decky_name: str,
*,
topology_id: Optional[str] = None,
) -> str:
"""Resolve the docker container name for *decky_name*.
Fleet path (``topology_id is None``): returns ``<decky_name>-ssh``
unconditionally. No DB lookup — the caller is responsible for
knowing the decky exists; if it doesn't, the subsequent
``docker exec`` returns a clear error.
Topology path: hydrates the topology, looks up the decky's services
list, delegates to :func:`resolve_topology_container`.
Raises:
LookupError — when ``topology_id`` is set but the topology or
its named decky doesn't exist. Callers translate this into
404/422 at the API layer.
"""
if topology_id is None:
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
from decnet.topology.persistence import hydrate
hydrated = await hydrate(repo, topology_id)
if hydrated is None:
raise LookupError(f"topology {topology_id!r} not found")
for decky in hydrated["deckies"]:
cfg = decky.get("decky_config") or {}
name = cfg.get("name") or decky.get("name")
if name == decky_name:
services = decky.get("services") or []
return resolve_topology_container(topology_id, decky_name, services)
raise LookupError(
f"decky {decky_name!r} is not in topology {topology_id!r}"
)

124
decnet/decky_io/write.py Normal file
View File

@@ -0,0 +1,124 @@
"""``docker exec``-driven file write/delete inside a decky container.
The write path streams a base64-encoded payload over stdin to
``base64 -d`` inside the container, so binary content of any size up
to docker's stream limits is safe — interpolating bytes into argv
would trip ARG_MAX (~128 KB on most kernels) for any non-trivial blob.
"""
from __future__ import annotations
import asyncio
import base64
import shlex
from datetime import datetime, timezone
from typing import Optional
from decnet.logging import get_logger
log = get_logger("decky_io.write")
_DOCKER = "docker"
_DEFAULT_TIMEOUT = 8.0
def _dirname(path: str) -> str:
idx = path.rfind("/")
if idx <= 0:
return "/"
return path[:idx]
async def _run(
argv: list[str],
*,
stdin_bytes: Optional[bytes] = None,
timeout: float = _DEFAULT_TIMEOUT,
) -> tuple[int, str, str]:
try:
proc = await asyncio.create_subprocess_exec(
*argv,
stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
except FileNotFoundError as exc:
return 127, "", f"argv[0] not found: {exc}"
try:
stdout, stderr = await asyncio.wait_for(
proc.communicate(input=stdin_bytes), timeout=timeout,
)
except asyncio.TimeoutError:
try:
proc.kill()
except ProcessLookupError:
pass
return 124, "", "timeout"
return (
proc.returncode if proc.returncode is not None else -1,
stdout.decode("utf-8", "replace"),
stderr.decode("utf-8", "replace"),
)
async def write_file_to_container(
container: str,
path: str,
content: bytes,
*,
mode: int = 0o644,
mtime: Optional[datetime] = None,
timeout: float = _DEFAULT_TIMEOUT,
) -> tuple[bool, Optional[str]]:
"""Write *content* to *path* inside *container* via ``docker exec``.
The directory above *path* is created if missing; *mode* is applied
after the write; when *mtime* is provided the file is backdated via
``touch -d`` (UTC ISO 8601).
Returns ``(success, error_or_none)``. ``error`` is the trimmed
docker stderr on rc != 0, or a short "rc=<n>" if stderr was empty.
"""
if not path:
return False, "empty path"
encoded = base64.b64encode(content)
parts = [
f"mkdir -p {shlex.quote(_dirname(path))}",
f"base64 -d > {shlex.quote(path)}",
f"chmod {mode:o} {shlex.quote(path)}",
]
if mtime is not None:
ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
parts.append(f"touch -d {shlex.quote(ts)} {shlex.quote(path)}")
sh_cmd = " && ".join(parts)
argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
rc, _stdout, stderr = await _run(argv, stdin_bytes=encoded, timeout=timeout)
success = rc == 0
if success:
return True, None
err = stderr.strip()[:256] or f"rc={rc}"
log.warning(
"decky_io.write failed container=%s path=%s rc=%d stderr=%r",
container, path, rc, stderr[:120],
)
return False, err
async def delete_file_from_container(
container: str,
path: str,
*,
timeout: float = _DEFAULT_TIMEOUT,
) -> tuple[bool, Optional[str]]:
"""Best-effort ``rm -f`` of *path* inside *container*.
Returns ``(success, error_or_none)``. ``rm -f`` returns rc=0 even
when the file is already gone, so a True result here means "the
file is not present after this call", regardless of who unlinked it.
"""
sh_cmd = f"rm -f {shlex.quote(path)}"
argv = [_DOCKER, "exec", container, "sh", "-c", sh_cmd]
rc, _stdout, stderr = await _run(argv, timeout=timeout)
if rc == 0:
return True, None
return False, stderr.strip()[:256] or f"rc={rc}"