feat(deckies): generic file drops on fleet + MazeNET deckies
Extracts the docker-exec-with-base64-stdin pattern out of canary/planter and orchestrator/drivers/ssh into a shared decnet.decky_io package. Both consumers now delegate; the canary planter test still proves the contract end-to-end. Adds POST/DELETE /api/v1/deckies/files for arbitrary file drops. Container resolution is shared with the canary path: topology_id absent means fleet (<name>-ssh), present routes through resolve_decky_container which picks <name>-ssh when the topology decky exposes ssh, else the topology base container decnet_t_<id8>_<name>. Path validation rejects relative paths and '..' traversal at the request model layer. Bad base64 → 400; unknown topology → 404; decky not in topology → 422; docker exec failure → 409.
This commit is contained in:
39
decnet/decky_io/__init__.py
Normal file
39
decnet/decky_io/__init__.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""Shared primitives for writing/deleting files inside running deckies.
|
||||
|
||||
The canary planter and the orchestrator SSH driver both need to drop
|
||||
bytes into a decky container's filesystem, then sometimes unlink them.
|
||||
The ARG_MAX-safe ``base64 -d``-via-stdin trick lived in two places
|
||||
before this module existed.
|
||||
|
||||
Public API:
|
||||
|
||||
* :func:`write_file_to_container` — write bytes at a path, set mode,
|
||||
optionally backdate mtime.
|
||||
* :func:`delete_file_from_container` — best-effort ``rm -f``.
|
||||
* :func:`resolve_topology_container` — pick the right docker container
|
||||
for a MazeNET decky based on its services list.
|
||||
* :func:`resolve_decky_container` — async helper that takes
|
||||
``(decky_name, topology_id?)``, hydrates the topology when needed,
|
||||
and returns the docker container name.
|
||||
|
||||
Container resolution conventions are documented in
|
||||
:mod:`decnet.topology.compose`; we mirror them here without taking
|
||||
a runtime dependency on the compose generator.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from .resolve import (
|
||||
resolve_decky_container,
|
||||
resolve_topology_container,
|
||||
)
|
||||
from .write import (
|
||||
delete_file_from_container,
|
||||
write_file_to_container,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"delete_file_from_container",
|
||||
"resolve_decky_container",
|
||||
"resolve_topology_container",
|
||||
"write_file_to_container",
|
||||
]
|
||||
72
decnet/decky_io/resolve.py
Normal file
72
decnet/decky_io/resolve.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Decky-name → docker container name resolution.
|
||||
|
||||
Two scopes:
|
||||
|
||||
* **Fleet**: every fleet decky has a ``ssh`` service container named
|
||||
``<decky_name>-ssh`` (see :mod:`decnet.services.ssh`). We always
|
||||
target it because it carries the most realistic filesystem layout.
|
||||
* **MazeNET (topology)**: same ``<name>-ssh`` convention when the
|
||||
decky exposes the ssh service; otherwise the decky's base container
|
||||
named ``decnet_t_<topology_id8>_<decky_name>`` (matches
|
||||
:func:`decnet.topology.compose._container_name`).
|
||||
|
||||
Keeping resolution centralised here means new ``docker exec`` callers
|
||||
(file drops, future bulk planters, etc.) never need to learn the
|
||||
naming conventions — they just call :func:`resolve_decky_container`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
_SSH_CONTAINER_SUFFIX = "-ssh"
|
||||
|
||||
|
||||
def resolve_topology_container(
|
||||
topology_id: str, decky_name: str, services: Iterable[str],
|
||||
) -> str:
|
||||
"""Container name for a MazeNET decky.
|
||||
|
||||
See module docstring for the convention. Pure function — no I/O.
|
||||
"""
|
||||
if "ssh" in set(services):
|
||||
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||
return f"decnet_t_{topology_id[:8]}_{decky_name}"
|
||||
|
||||
|
||||
async def resolve_decky_container(
|
||||
repo: Any,
|
||||
decky_name: str,
|
||||
*,
|
||||
topology_id: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Resolve the docker container name for *decky_name*.
|
||||
|
||||
Fleet path (``topology_id is None``): returns ``<decky_name>-ssh``
|
||||
unconditionally. No DB lookup — the caller is responsible for
|
||||
knowing the decky exists; if it doesn't, the subsequent
|
||||
``docker exec`` returns a clear error.
|
||||
|
||||
Topology path: hydrates the topology, looks up the decky's services
|
||||
list, delegates to :func:`resolve_topology_container`.
|
||||
|
||||
Raises:
|
||||
LookupError — when ``topology_id`` is set but the topology or
|
||||
its named decky doesn't exist. Callers translate this into
|
||||
404/422 at the API layer.
|
||||
"""
|
||||
if topology_id is None:
|
||||
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||
|
||||
from decnet.topology.persistence import hydrate
|
||||
hydrated = await hydrate(repo, topology_id)
|
||||
if hydrated is None:
|
||||
raise LookupError(f"topology {topology_id!r} not found")
|
||||
for decky in hydrated["deckies"]:
|
||||
cfg = decky.get("decky_config") or {}
|
||||
name = cfg.get("name") or decky.get("name")
|
||||
if name == decky_name:
|
||||
services = decky.get("services") or []
|
||||
return resolve_topology_container(topology_id, decky_name, services)
|
||||
raise LookupError(
|
||||
f"decky {decky_name!r} is not in topology {topology_id!r}"
|
||||
)
|
||||
124
decnet/decky_io/write.py
Normal file
124
decnet/decky_io/write.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""``docker exec``-driven file write/delete inside a decky container.
|
||||
|
||||
The write path streams a base64-encoded payload over stdin to
|
||||
``base64 -d`` inside the container, so binary content of any size up
|
||||
to docker's stream limits is safe — interpolating bytes into argv
|
||||
would trip ARG_MAX (~128 KB on most kernels) for any non-trivial blob.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import shlex
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from decnet.logging import get_logger
|
||||
|
||||
log = get_logger("decky_io.write")
|
||||
|
||||
_DOCKER = "docker"
|
||||
_DEFAULT_TIMEOUT = 8.0
|
||||
|
||||
|
||||
def _dirname(path: str) -> str:
|
||||
idx = path.rfind("/")
|
||||
if idx <= 0:
|
||||
return "/"
|
||||
return path[:idx]
|
||||
|
||||
|
||||
async def _run(
|
||||
argv: list[str],
|
||||
*,
|
||||
stdin_bytes: Optional[bytes] = None,
|
||||
timeout: float = _DEFAULT_TIMEOUT,
|
||||
) -> tuple[int, str, str]:
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*argv,
|
||||
stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
return 127, "", f"argv[0] not found: {exc}"
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(input=stdin_bytes), timeout=timeout,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
return 124, "", "timeout"
|
||||
return (
|
||||
proc.returncode if proc.returncode is not None else -1,
|
||||
stdout.decode("utf-8", "replace"),
|
||||
stderr.decode("utf-8", "replace"),
|
||||
)
|
||||
|
||||
|
||||
async def write_file_to_container(
|
||||
container: str,
|
||||
path: str,
|
||||
content: bytes,
|
||||
*,
|
||||
mode: int = 0o644,
|
||||
mtime: Optional[datetime] = None,
|
||||
timeout: float = _DEFAULT_TIMEOUT,
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Write *content* to *path* inside *container* via ``docker exec``.
|
||||
|
||||
The directory above *path* is created if missing; *mode* is applied
|
||||
after the write; when *mtime* is provided the file is backdated via
|
||||
``touch -d`` (UTC ISO 8601).
|
||||
|
||||
Returns ``(success, error_or_none)``. ``error`` is the trimmed
|
||||
docker stderr on rc != 0, or a short "rc=<n>" if stderr was empty.
|
||||
"""
|
||||
if not path:
|
||||
return False, "empty path"
|
||||
|
||||
encoded = base64.b64encode(content)
|
||||
parts = [
|
||||
f"mkdir -p {shlex.quote(_dirname(path))}",
|
||||
f"base64 -d > {shlex.quote(path)}",
|
||||
f"chmod {mode:o} {shlex.quote(path)}",
|
||||
]
|
||||
if mtime is not None:
|
||||
ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
parts.append(f"touch -d {shlex.quote(ts)} {shlex.quote(path)}")
|
||||
sh_cmd = " && ".join(parts)
|
||||
argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
|
||||
rc, _stdout, stderr = await _run(argv, stdin_bytes=encoded, timeout=timeout)
|
||||
success = rc == 0
|
||||
if success:
|
||||
return True, None
|
||||
err = stderr.strip()[:256] or f"rc={rc}"
|
||||
log.warning(
|
||||
"decky_io.write failed container=%s path=%s rc=%d stderr=%r",
|
||||
container, path, rc, stderr[:120],
|
||||
)
|
||||
return False, err
|
||||
|
||||
|
||||
async def delete_file_from_container(
|
||||
container: str,
|
||||
path: str,
|
||||
*,
|
||||
timeout: float = _DEFAULT_TIMEOUT,
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Best-effort ``rm -f`` of *path* inside *container*.
|
||||
|
||||
Returns ``(success, error_or_none)``. ``rm -f`` returns rc=0 even
|
||||
when the file is already gone, so a True result here means "the
|
||||
file is not present after this call", regardless of who unlinked it.
|
||||
"""
|
||||
sh_cmd = f"rm -f {shlex.quote(path)}"
|
||||
argv = [_DOCKER, "exec", container, "sh", "-c", sh_cmd]
|
||||
rc, _stdout, stderr = await _run(argv, timeout=timeout)
|
||||
if rc == 0:
|
||||
return True, None
|
||||
return False, stderr.strip()[:256] or f"rc={rc}"
|
||||
Reference in New Issue
Block a user