merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
74
decnet/orchestrator/drivers/__init__.py
Normal file
74
decnet/orchestrator/drivers/__init__.py
Normal file
@@ -0,0 +1,74 @@
|
||||
"""Activity drivers for the orchestrator.
|
||||
|
||||
Concrete drivers register dispatch in :func:`get_driver_for`. Same
|
||||
lazy-import pattern as :mod:`decnet.canary.factory`: the import-time
|
||||
cost of :mod:`decnet.orchestrator.drivers` stays low for callers that
|
||||
only need :class:`ActivityResult` / :class:`ActivityDriver`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.orchestrator.drivers.base import (
|
||||
ActivityDriver,
|
||||
ActivityResult,
|
||||
Driver,
|
||||
)
|
||||
from decnet.orchestrator.scheduler import Action, FileAction, TrafficAction
|
||||
|
||||
__all__ = [
|
||||
"ActivityDriver",
|
||||
"ActivityResult",
|
||||
"Driver",
|
||||
"SSHDriver",
|
||||
"get_driver_for",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name: str): # pragma: no cover - import passthrough
|
||||
"""Lazy access to concrete drivers.
|
||||
|
||||
Avoids dragging the docker-exec / email-driver code into every
|
||||
consumer that only needs the ABC.
|
||||
"""
|
||||
if name == "SSHDriver":
|
||||
from decnet.orchestrator.drivers.ssh import SSHDriver
|
||||
return SSHDriver
|
||||
if name == "EmailDriver":
|
||||
from decnet.orchestrator.drivers.email import EmailDriver
|
||||
return EmailDriver
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
def get_driver_for(action: Action) -> ActivityDriver:
|
||||
"""Return the concrete driver that handles *action*.
|
||||
|
||||
Stage 4 of the realism migration adds this seam so the orchestrator
|
||||
worker can dispatch by action type without an isinstance chain in
|
||||
``_one_tick``. Stage 5 wires the worker to call this function
|
||||
instead of holding a single ``SSHDriver`` instance.
|
||||
|
||||
The set of action shapes the orchestrator can plan grows with the
|
||||
migration:
|
||||
|
||||
* :class:`TrafficAction` / :class:`FileAction` → :class:`SSHDriver`
|
||||
* :class:`EmailAction` (post-stage-5) → ``EmailDriver``
|
||||
* :class:`EditAction` (post-stage-3b) → :class:`SSHDriver`
|
||||
"""
|
||||
# Lazy imports keep the side-effecting docker-exec / email-driver
|
||||
# modules out of every importer's graph.
|
||||
from decnet.orchestrator.drivers.ssh import SSHDriver
|
||||
|
||||
if isinstance(action, (TrafficAction, FileAction)):
|
||||
return SSHDriver()
|
||||
# EmailAction lands in stage 5; reachable only after that import is
|
||||
# added to scheduler. Importing inside the branch avoids a cycle
|
||||
# with realism.llm at module load time.
|
||||
try:
|
||||
from decnet.orchestrator.emailgen.scheduler import EmailAction
|
||||
except ImportError: # pragma: no cover - scheduler always exists
|
||||
EmailAction = None # type: ignore[assignment]
|
||||
if EmailAction is not None and isinstance(action, EmailAction):
|
||||
from decnet.orchestrator.drivers.email import EmailDriver
|
||||
return EmailDriver()
|
||||
raise TypeError(
|
||||
f"no driver registered for action type {type(action).__name__}"
|
||||
)
|
||||
92
decnet/orchestrator/drivers/base.py
Normal file
92
decnet/orchestrator/drivers/base.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""Driver ABC for orchestrator actions.
|
||||
|
||||
Each concrete driver (SSH, Email, future HTTP/SMB/MySQL) maps one
|
||||
:class:`~decnet.orchestrator.scheduler.Action` shape to a side effect
|
||||
on a target decky and returns an :class:`ActivityResult` the
|
||||
orchestrator persists.
|
||||
|
||||
The ABC lives here, the dispatch factory in
|
||||
:mod:`decnet.orchestrator.drivers` ``__init__``, and the impls in
|
||||
sibling modules — same pattern as :mod:`decnet.canary.factory`,
|
||||
:mod:`decnet.web.db.factory`, and :mod:`decnet.bus.factory`.
|
||||
|
||||
Why ABC and not :class:`Protocol`: drivers also expose lower-level
|
||||
helpers (``plant_file``, ``read_file``) that the planner-driven
|
||||
realism path will call directly without going through ``run``.
|
||||
Inheritance pins the contract for those helpers; a structural
|
||||
protocol would let a typo silently produce a half-implemented driver.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from decnet.orchestrator.scheduler import Action
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActivityResult:
|
||||
"""Outcome of one driver invocation.
|
||||
|
||||
``payload`` is the per-action JSON envelope the worker writes to
|
||||
the ``OrchestratorEvent.payload`` column and to the bus event
|
||||
body.
|
||||
"""
|
||||
success: bool
|
||||
payload: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class ActivityDriver(ABC):
|
||||
"""Base class every concrete orchestrator driver inherits.
|
||||
|
||||
Subclasses MUST implement :meth:`run` — the action-shape dispatch.
|
||||
Subclasses that interact with files on the target decky SHOULD
|
||||
implement :meth:`plant_file` and :meth:`read_file` so the realism
|
||||
edit-in-place path can read existing artifacts before mutating
|
||||
them. Drivers that don't touch files (e.g. a future pure-traffic
|
||||
driver) raise :class:`NotImplementedError` from those, and the
|
||||
planner avoids picking ``EditAction`` for them.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def run(self, action: Action) -> ActivityResult:
|
||||
"""Execute the action against its target decky."""
|
||||
|
||||
async def plant_file(
|
||||
self,
|
||||
decky_name: str,
|
||||
path: str,
|
||||
content: bytes,
|
||||
*,
|
||||
mode: int = 0o600,
|
||||
mtime: datetime | None = None,
|
||||
) -> ActivityResult:
|
||||
"""Write *content* to *path* inside *decky_name*.
|
||||
|
||||
Default raises :class:`NotImplementedError`; concrete drivers
|
||||
that have a write transport (docker exec, ssh, etc.) override.
|
||||
Bytes-typed so binary artifacts (DOCX/PDF) survive the wire.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{type(self).__name__} does not support plant_file"
|
||||
)
|
||||
|
||||
async def read_file(self, decky_name: str, path: str) -> bytes:
|
||||
"""Read *path* from inside *decky_name*.
|
||||
|
||||
Required for the realism edit-in-place flow (stage 3b of the
|
||||
realism migration): the driver reads the previous body, the
|
||||
realism engine produces the next iteration, the driver writes
|
||||
it back. Default raises :class:`NotImplementedError`.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{type(self).__name__} does not support read_file"
|
||||
)
|
||||
|
||||
|
||||
# Back-compat alias so existing imports of ``Driver`` keep working
|
||||
# while consumers transition to ``ActivityDriver``. Removed once the
|
||||
# realism migration is complete.
|
||||
Driver = ActivityDriver
|
||||
290
decnet/orchestrator/drivers/email.py
Normal file
290
decnet/orchestrator/drivers/email.py
Normal file
@@ -0,0 +1,290 @@
|
||||
"""Email driver — pluggable-LLM EML generation + decky-side delivery.
|
||||
|
||||
One :class:`EmailAction` becomes one EML written into the mail decky's
|
||||
configured emailgen spool directory (``/var/spool/decnet-emails/`` by
|
||||
default). The IMAP/POP3 service templates read that spool at request
|
||||
time so attackers see the generated mail in their MUA.
|
||||
|
||||
The LLM call goes through :mod:`decnet.realism.llm` — backend-agnostic
|
||||
by construction so swapping Ollama for the Anthropic API, vLLM, or
|
||||
llama.cpp is a config change, not a driver rewrite.
|
||||
Output is parsed-and-repaired into a valid EML using
|
||||
:mod:`email.mime.*`; the worker then ``docker exec``\\s a ``tee`` to
|
||||
drop the file inside the target container, followed by a
|
||||
``touch -d <Date>`` so the file's mtime matches the email's RFC 2822
|
||||
``Date:`` header.
|
||||
|
||||
Per CLAUDE.md "no shell strings": every subprocess invocation uses an
|
||||
argv list, never ``shell=True``. EML payloads are piped via ``stdin``,
|
||||
not interpolated into argv.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import shlex
|
||||
from datetime import datetime, timezone
|
||||
from email.mime.text import MIMEText
|
||||
from email.utils import formatdate
|
||||
from typing import Any, Optional
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.orchestrator.drivers.base import ActivityDriver, ActivityResult
|
||||
from decnet.orchestrator.emailgen.scheduler import EmailAction
|
||||
from decnet.orchestrator.emailgen.threads import new_message_id
|
||||
from decnet.realism.llm import LLMBackend, LLMTimeout, get_llm
|
||||
from decnet.realism.prompts.email import PromptInputs, build as build_prompt
|
||||
|
||||
log = get_logger("orchestrator.email")
|
||||
|
||||
_DOCKER = "docker"
|
||||
# docker-exec wall-clock cap for the per-EML write.
|
||||
_DOCKER_TIMEOUT = 8.0
|
||||
# Container suffix for the IMAP service on a mail decky.
|
||||
_IMAP_CONTAINER_SUFFIX = "-imap"
|
||||
_POP3_CONTAINER_SUFFIX = "-pop3"
|
||||
# Spool path inside the container. Match the IMAP template's stubbed
|
||||
# IMAP_EMAIL_SEED location once wiring lands; shipping the constant now
|
||||
# lets that integration land independently.
|
||||
_SPOOL_DIR = "/var/spool/decnet-emails"
|
||||
|
||||
|
||||
async def _run_capture(
|
||||
argv: list[str],
|
||||
*,
|
||||
stdin_data: Optional[bytes] = None,
|
||||
timeout: float = _DOCKER_TIMEOUT,
|
||||
) -> tuple[int, str, str]:
|
||||
"""Spawn *argv*, optionally feeding *stdin_data*. Never raises."""
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*argv,
|
||||
stdin=asyncio.subprocess.PIPE if stdin_data is not None else None,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
return 127, "", f"argv[0] not found: {exc}"
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(stdin_data), timeout=timeout,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
return 124, "", "timeout"
|
||||
return (
|
||||
proc.returncode if proc.returncode is not None else -1,
|
||||
stdout.decode("utf-8", "replace"),
|
||||
stderr.decode("utf-8", "replace"),
|
||||
)
|
||||
|
||||
|
||||
def _container_for(decky_name: str, services: list[str]) -> str:
|
||||
"""Pick the IMAP container if present, else POP3. Names follow the
|
||||
``<decky_name>-<service>`` convention from the service templates."""
|
||||
if "imap" in services:
|
||||
return f"{decky_name}{_IMAP_CONTAINER_SUFFIX}"
|
||||
return f"{decky_name}{_POP3_CONTAINER_SUFFIX}"
|
||||
|
||||
|
||||
def _parse_subject_and_body(ollama_output: str) -> tuple[str, str]:
|
||||
"""Split LLM output into (subject, body).
|
||||
|
||||
The prompt asks for ``Subject: <subject>\\n\\n<body>``. When the
|
||||
model misbehaves (e.g. wraps in markdown fences or skips the
|
||||
Subject line), fall back to a generic subject and treat the whole
|
||||
output as body. Never raises.
|
||||
"""
|
||||
text = ollama_output.strip()
|
||||
# Strip code fences if the model wrapped output.
|
||||
if text.startswith("```"):
|
||||
nl = text.find("\n")
|
||||
if nl > 0:
|
||||
text = text[nl + 1:]
|
||||
if text.endswith("```"):
|
||||
text = text[: -3]
|
||||
text = text.strip()
|
||||
lines = text.splitlines()
|
||||
if lines and lines[0].lower().startswith("subject:"):
|
||||
subject = lines[0].split(":", 1)[1].strip()
|
||||
# Drop the (possibly empty) blank line after Subject.
|
||||
body_lines = lines[1:]
|
||||
if body_lines and not body_lines[0].strip():
|
||||
body_lines = body_lines[1:]
|
||||
body = "\n".join(body_lines).strip()
|
||||
if not subject:
|
||||
subject = "Business Communication"
|
||||
return subject, body
|
||||
return "Business Communication", text
|
||||
|
||||
|
||||
def _build_eml(
|
||||
*,
|
||||
sender_name: str,
|
||||
sender_email: str,
|
||||
recipient_name: str,
|
||||
recipient_email: str,
|
||||
subject: str,
|
||||
body: str,
|
||||
message_id: str,
|
||||
in_reply_to: Optional[str],
|
||||
references: str,
|
||||
ts: datetime,
|
||||
) -> bytes:
|
||||
"""Assemble a valid plain-text RFC 2822 EML."""
|
||||
msg = MIMEText(body, "plain", "utf-8")
|
||||
msg["From"] = f"{sender_name} <{sender_email}>"
|
||||
msg["To"] = f"{recipient_name} <{recipient_email}>"
|
||||
msg["Subject"] = subject
|
||||
msg["Date"] = formatdate(ts.timestamp(), localtime=False)
|
||||
msg["Message-ID"] = message_id
|
||||
if in_reply_to:
|
||||
msg["In-Reply-To"] = in_reply_to
|
||||
if references:
|
||||
msg["References"] = references
|
||||
msg["MIME-Version"] = "1.0"
|
||||
return msg.as_bytes()
|
||||
|
||||
|
||||
class EmailDriver(ActivityDriver):
|
||||
"""Concrete driver for :class:`EmailAction`.
|
||||
|
||||
Stateless across calls — the LLM backend is constructed once at
|
||||
init time (or injected for tests). The driver itself does *not*
|
||||
know about the bus or DB; it returns an :class:`ActivityResult`
|
||||
that the worker pipes onward.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
llm: Optional[LLMBackend] = None,
|
||||
model: Optional[str] = None,
|
||||
spool_dir: str = _SPOOL_DIR,
|
||||
) -> None:
|
||||
# *llm* takes precedence so tests can inject a FakeBackend
|
||||
# without env-var trickery. *model* lets the worker honour
|
||||
# ``--model`` from the CLI without each backend needing to know
|
||||
# about CLI flags.
|
||||
self._llm = llm if llm is not None else get_llm(model=model)
|
||||
self.spool_dir = spool_dir
|
||||
|
||||
@property
|
||||
def model(self) -> str:
|
||||
"""Convenience accessor for telemetry / logging."""
|
||||
return self._llm.model
|
||||
|
||||
async def run(self, action: EmailAction) -> ActivityResult:
|
||||
return await self._run_email(action)
|
||||
|
||||
async def _run_email(self, action: EmailAction) -> ActivityResult:
|
||||
prompt, mannerisms_used = build_prompt(
|
||||
PromptInputs(
|
||||
sender=action.sender,
|
||||
recipient=action.recipient,
|
||||
context_hint=action.context_hint,
|
||||
parent_subject=action.subject_hint,
|
||||
parent_excerpt=action.parent_excerpt,
|
||||
)
|
||||
)
|
||||
try:
|
||||
llm_result = await self._llm.generate(prompt)
|
||||
except LLMTimeout as exc:
|
||||
log.warning("emailgen llm timeout model=%s: %s", self._llm.model, exc)
|
||||
return ActivityResult(
|
||||
success=False,
|
||||
payload={
|
||||
"stage": "llm",
|
||||
"error": "timeout",
|
||||
"model": self._llm.model,
|
||||
"thread_id": action.thread_id,
|
||||
},
|
||||
)
|
||||
|
||||
gen_ms = llm_result.latency_ms
|
||||
if not llm_result.success or not llm_result.text.strip():
|
||||
log.warning(
|
||||
"emailgen llm produced no usable output model=%s extra=%r",
|
||||
self._llm.model, llm_result.extra,
|
||||
)
|
||||
return ActivityResult(
|
||||
success=False,
|
||||
payload={
|
||||
"stage": "llm",
|
||||
"model": self._llm.model,
|
||||
"generation_ms": gen_ms,
|
||||
"thread_id": action.thread_id,
|
||||
**{
|
||||
k: v for k, v in llm_result.extra.items()
|
||||
if k in ("rc", "stderr")
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
subject, body = _parse_subject_and_body(llm_result.text)
|
||||
message_id = new_message_id(action.sender.email.split("@", 1)[1])
|
||||
ts = datetime.now(timezone.utc)
|
||||
eml_bytes = _build_eml(
|
||||
sender_name=action.sender.name,
|
||||
sender_email=action.sender.email,
|
||||
recipient_name=action.recipient.name,
|
||||
recipient_email=action.recipient.email,
|
||||
subject=subject,
|
||||
body=body,
|
||||
message_id=message_id,
|
||||
in_reply_to=action.parent_message_id,
|
||||
references=action.references,
|
||||
ts=ts,
|
||||
)
|
||||
|
||||
# Drop the EML into the mail decky's spool dir over docker exec.
|
||||
# File path: <spool>/<thread_id>/<uuid-from-message-id>.eml.
|
||||
# Per-thread sub-directory keeps `ls` in the spool readable by
|
||||
# operators inspecting the running decoy.
|
||||
eml_filename = message_id.strip("<>").replace("@", "_at_") + ".eml"
|
||||
eml_dir = f"{self.spool_dir.rstrip('/')}/{action.thread_id}"
|
||||
eml_path = f"{eml_dir}/{eml_filename}"
|
||||
container = _container_for(
|
||||
action.mail_decky_name, list(action.mail_decky_services),
|
||||
)
|
||||
# Stamp the file's mtime + atime to match the EML's Date: header
|
||||
# so an attacker `ls -lt`'ing the spool doesn't see a wall of
|
||||
# files all created within the worker's tick window — the cluster
|
||||
# itself is a tell. ``touch -d`` on GNU coreutils accepts RFC
|
||||
# 2822 dates directly via the same formatdate() string we wrote
|
||||
# into the header, so no extra parsing on the container side.
|
||||
eml_date_header = formatdate(ts.timestamp(), localtime=False)
|
||||
sh_cmd = (
|
||||
f"mkdir -p {shlex.quote(eml_dir)} && "
|
||||
f"tee {shlex.quote(eml_path)} >/dev/null && "
|
||||
f"touch -d {shlex.quote(eml_date_header)} {shlex.quote(eml_path)}"
|
||||
)
|
||||
argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
|
||||
rc2, _stdout2, stderr2 = await _run_capture(
|
||||
argv, stdin_data=eml_bytes, timeout=_DOCKER_TIMEOUT,
|
||||
)
|
||||
success = rc2 == 0
|
||||
payload: dict[str, Any] = {
|
||||
"stage": "delivered" if success else "delivery",
|
||||
"model": self.model,
|
||||
"generation_ms": gen_ms,
|
||||
"bytes": len(eml_bytes),
|
||||
"thread_id": action.thread_id,
|
||||
"message_id": message_id,
|
||||
"subject": subject,
|
||||
"language": action.sender.language or "en",
|
||||
"mannerisms_used": mannerisms_used,
|
||||
"is_reply": action.is_reply,
|
||||
"container": container,
|
||||
"eml_path": eml_path,
|
||||
"rc": rc2,
|
||||
"stderr": stderr2.strip()[:256] if not success else None,
|
||||
}
|
||||
if not success:
|
||||
log.warning(
|
||||
"emailgen delivery failed container=%s rc=%d stderr=%r",
|
||||
container, rc2, stderr2[:200],
|
||||
)
|
||||
return ActivityResult(success=success, payload=payload)
|
||||
293
decnet/orchestrator/drivers/ssh.py
Normal file
293
decnet/orchestrator/drivers/ssh.py
Normal file
@@ -0,0 +1,293 @@
|
||||
"""MVP SSH-flavoured driver.
|
||||
|
||||
Two action shapes:
|
||||
|
||||
* :class:`~decnet.orchestrator.scheduler.TrafficAction` — exec a tiny
|
||||
Python one-liner *inside the source decky's ssh container* that opens
|
||||
TCP/22 against the destination decky's IP and reads the SSH banner.
|
||||
This generates real on-the-wire SSH-protocol traffic between the two
|
||||
containers (sshd announces the banner on connect), without us having
|
||||
to ship credentials anywhere.
|
||||
* :class:`~decnet.orchestrator.scheduler.FileAction` — drop / refresh a
|
||||
file inside the destination decky's ssh container via ``docker exec``.
|
||||
|
||||
Both shell out via :func:`asyncio.create_subprocess_exec` with argv
|
||||
lists — never a shell string — so an attacker-controllable decky name
|
||||
or IP can't escape into a shell.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import shlex
|
||||
from typing import Any
|
||||
|
||||
import base64
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.orchestrator.drivers.base import ActivityDriver, ActivityResult
|
||||
from decnet.orchestrator.scheduler import (
|
||||
Action,
|
||||
EditAction,
|
||||
FileAction,
|
||||
TrafficAction,
|
||||
)
|
||||
|
||||
log = get_logger("orchestrator.ssh")
|
||||
|
||||
_DOCKER = "docker"
|
||||
# Per-call wall-clock cap. The orchestrator runs serially (one action
|
||||
# per tick); a wedged docker exec must not stall the whole worker.
|
||||
_TIMEOUT = 8.0
|
||||
|
||||
# Container suffix convention: services/*.py emit container_name as
|
||||
# ``<decky_name>-<service>``. The MVP only drives the ssh service.
|
||||
_SSH_CONTAINER_SUFFIX = "-ssh"
|
||||
|
||||
|
||||
def _container_for(decky_name: str) -> str:
|
||||
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||
|
||||
|
||||
async def _run(argv: list[str]) -> tuple[int, str, str]:
|
||||
"""Spawn *argv* and capture (rc, stdout, stderr).
|
||||
|
||||
Returns ``(rc=124, "", "timeout")`` on wall-clock expiry. Never
|
||||
raises — orchestrator success/failure is a payload attribute, not
|
||||
an exception.
|
||||
"""
|
||||
return await _run_with_stdin(argv, None)
|
||||
|
||||
|
||||
async def _run_with_stdin(
|
||||
argv: list[str], stdin_bytes: bytes | None,
|
||||
) -> tuple[int, str, str]:
|
||||
"""Spawn *argv*, optionally feeding *stdin_bytes*, and capture rc+output.
|
||||
|
||||
Used by :meth:`SSHDriver.plant_file` to stream base64 payloads via
|
||||
stdin (avoids ARG_MAX on large blobs — same fix as the canary
|
||||
planter in commit c17b9e0). Same failure semantics as :func:`_run`.
|
||||
"""
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*argv,
|
||||
stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
return 127, "", f"argv[0] not found: {exc}"
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(stdin_bytes), timeout=_TIMEOUT,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
return 124, "", "timeout"
|
||||
return (
|
||||
proc.returncode if proc.returncode is not None else -1,
|
||||
stdout.decode("utf-8", "replace"),
|
||||
stderr.decode("utf-8", "replace"),
|
||||
)
|
||||
|
||||
|
||||
# Python one-liner that probes the destination's SSH banner. Kept inline
|
||||
# so the driver has zero filesystem dependencies on the host side; the
|
||||
# *container* needs python3 (ssh service template ships it).
|
||||
_PROBE_PY = (
|
||||
"import socket,sys;"
|
||||
"s=socket.socket();s.settimeout(3);"
|
||||
"s.connect((sys.argv[1], 22));"
|
||||
"b=s.recv(128);s.close();"
|
||||
"sys.stdout.write(b.decode('latin1','replace'))"
|
||||
)
|
||||
|
||||
|
||||
class SSHDriver(ActivityDriver):
|
||||
"""Concrete :class:`ActivityDriver` for SSH-flavoured actions."""
|
||||
|
||||
async def run(self, action: Action) -> ActivityResult:
|
||||
if isinstance(action, TrafficAction):
|
||||
return await self._run_traffic(action)
|
||||
if isinstance(action, FileAction):
|
||||
return await self._run_file(action)
|
||||
if isinstance(action, EditAction):
|
||||
return await self._run_edit(action)
|
||||
raise TypeError(f"unsupported action type: {type(action)!r}")
|
||||
|
||||
async def _run_traffic(self, action: TrafficAction) -> ActivityResult:
|
||||
container = _container_for(action.src_name)
|
||||
argv = [
|
||||
_DOCKER, "exec", container,
|
||||
"python3", "-c", _PROBE_PY, action.dst_ip,
|
||||
]
|
||||
rc, stdout, stderr = await _run(argv)
|
||||
success = rc == 0 and stdout.startswith("SSH-")
|
||||
payload: dict[str, Any] = {
|
||||
"src_decky": action.src_name,
|
||||
"dst_decky": action.dst_name,
|
||||
"dst_ip": action.dst_ip,
|
||||
"dst_port": 22,
|
||||
"rc": rc,
|
||||
"banner": stdout.strip()[:128] if success else None,
|
||||
"stderr": stderr.strip()[:256] if not success else None,
|
||||
}
|
||||
if not success:
|
||||
log.debug(
|
||||
"orchestrator.ssh.traffic failed src=%s dst=%s rc=%d stderr=%r",
|
||||
action.src_name, action.dst_name, rc, stderr[:120],
|
||||
)
|
||||
return ActivityResult(success=success, payload=payload)
|
||||
|
||||
async def _run_edit(self, action: EditAction) -> ActivityResult:
|
||||
"""Mutate an existing synthetic file in place.
|
||||
|
||||
The realism planner already loaded the previous body from the
|
||||
``synthetic_files`` row, so we don't re-fetch via ``read_file``;
|
||||
the body the planner saw is the body we mutate. This avoids a
|
||||
TOCTOU window where the file changed between pick and apply
|
||||
(the realism worker is the only writer in the MVP, but the
|
||||
contract should still be tight).
|
||||
"""
|
||||
from decnet.realism.bodies import next_iteration as _next_iteration
|
||||
from decnet.realism.taxonomy import ContentClass
|
||||
|
||||
try:
|
||||
cls = ContentClass(action.content_class)
|
||||
except ValueError:
|
||||
return ActivityResult(
|
||||
success=False,
|
||||
payload={
|
||||
"dst_decky": action.dst_name,
|
||||
"path": action.path,
|
||||
"error": f"unknown content_class: {action.content_class!r}",
|
||||
},
|
||||
)
|
||||
try:
|
||||
new_body = _next_iteration(
|
||||
cls, action.persona, action.previous_body,
|
||||
)
|
||||
except KeyError:
|
||||
return ActivityResult(
|
||||
success=False,
|
||||
payload={
|
||||
"dst_decky": action.dst_name,
|
||||
"path": action.path,
|
||||
"error": (
|
||||
f"content_class={cls!s} does not support edits"
|
||||
),
|
||||
},
|
||||
)
|
||||
result = await self.plant_file(
|
||||
action.dst_name,
|
||||
action.path,
|
||||
new_body.encode("utf-8"),
|
||||
mode=0o644,
|
||||
mtime=action.mtime,
|
||||
)
|
||||
# Carry edit-specific metadata through to the orchestrator
|
||||
# event payload so the worker's synthetic_files bump (and the
|
||||
# dashboard's lineage view) sees what actually landed.
|
||||
if result.success:
|
||||
result.payload["new_body"] = new_body
|
||||
result.payload["new_body_bytes"] = len(new_body.encode("utf-8"))
|
||||
result.payload["synthetic_file_uuid"] = action.synthetic_file_uuid
|
||||
return result
|
||||
|
||||
async def _run_file(self, action: FileAction) -> ActivityResult:
|
||||
# FileAction.content_bytes wins when set — canary artifacts
|
||||
# (DOCX/PDF/honeydoc binaries) need their bytes preserved
|
||||
# exactly. Falls back to utf-8 encoding the str content for
|
||||
# the inert-realism path.
|
||||
# mtime carries through from the realism planner so the file
|
||||
# doesn't stamp at wall-clock-now (the realism failure today).
|
||||
body = action.content_bytes
|
||||
if body is None:
|
||||
body = action.content.encode("utf-8")
|
||||
return await self.plant_file(
|
||||
action.dst_name,
|
||||
action.path,
|
||||
body,
|
||||
mode=0o644,
|
||||
mtime=action.mtime,
|
||||
)
|
||||
|
||||
async def plant_file(
|
||||
self,
|
||||
decky_name: str,
|
||||
path: str,
|
||||
content: bytes,
|
||||
*,
|
||||
mode: int = 0o600,
|
||||
mtime: datetime | None = None,
|
||||
) -> ActivityResult:
|
||||
"""Write *content* to *path* inside *decky_name*'s ssh container.
|
||||
|
||||
Streams base64 via stdin (mirrors :mod:`decnet.canary.planter`'s
|
||||
ARG_MAX-safe write — see commit c17b9e0). Sets file mode and,
|
||||
when *mtime* is provided, ``touch -d`` to backdate the file so
|
||||
it doesn't all stamp at wall-clock-now (the realism failure
|
||||
this migration is fixing).
|
||||
"""
|
||||
container = _container_for(decky_name)
|
||||
b64 = base64.b64encode(content).decode("ascii")
|
||||
# touch -d accepts ISO 8601; we always emit UTC so the
|
||||
# container's local TZ doesn't drift the mtime.
|
||||
if mtime is not None:
|
||||
ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
touch_cmd = f"touch -d {shlex.quote(ts)} {shlex.quote(path)}"
|
||||
else:
|
||||
touch_cmd = f"touch {shlex.quote(path)}"
|
||||
sh_cmd = (
|
||||
f"mkdir -p {shlex.quote(_dirname(path))} && "
|
||||
f"base64 -d > {shlex.quote(path)} && "
|
||||
f"chmod {mode:o} {shlex.quote(path)} && "
|
||||
f"{touch_cmd}"
|
||||
)
|
||||
argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
|
||||
rc, _stdout, stderr = await _run_with_stdin(argv, b64.encode("ascii"))
|
||||
success = rc == 0
|
||||
payload: dict[str, Any] = {
|
||||
"dst_decky": decky_name,
|
||||
"path": path,
|
||||
"bytes": len(content),
|
||||
"rc": rc,
|
||||
"stderr": stderr.strip()[:256] if not success else None,
|
||||
}
|
||||
return ActivityResult(success=success, payload=payload)
|
||||
|
||||
async def read_file(self, decky_name: str, path: str) -> bytes:
|
||||
"""Read *path* from inside *decky_name*'s ssh container.
|
||||
|
||||
Used by the realism edit-in-place flow: the driver fetches
|
||||
the previous body, the realism engine produces the next
|
||||
iteration, the driver re-plants it via :meth:`plant_file`.
|
||||
|
||||
Raises :class:`FileNotFoundError` when the container path
|
||||
doesn't exist (rc=1 from ``cat`` with stderr ``No such
|
||||
file``). Other failures raise :class:`RuntimeError` carrying
|
||||
the docker stderr.
|
||||
"""
|
||||
container = _container_for(decky_name)
|
||||
argv = [_DOCKER, "exec", container, "cat", path]
|
||||
rc, stdout, stderr = await _run(argv)
|
||||
if rc == 0:
|
||||
return stdout.encode("utf-8") if isinstance(stdout, str) else stdout
|
||||
if "No such file" in stderr or "no such file" in stderr.lower():
|
||||
raise FileNotFoundError(f"{path} not present in {decky_name}")
|
||||
raise RuntimeError(
|
||||
f"docker exec cat failed rc={rc} stderr={stderr.strip()[:256]!r}"
|
||||
)
|
||||
|
||||
|
||||
def _dirname(path: str) -> str:
|
||||
"""Pure-string dirname. We can't trust ``os.path.dirname`` on the
|
||||
host to share the destination container's separator semantics, but
|
||||
deckies are POSIX so a plain ``rfind('/')`` suffices."""
|
||||
idx = path.rfind("/")
|
||||
if idx <= 0:
|
||||
return "/"
|
||||
return path[:idx]
|
||||
Reference in New Issue
Block a user