feat(emailgen): Ollama-driven fake email worker for IMAP/POP3 deckies
Second orchestrator worker (decnet emailgen) that drips persona-driven, threaded, multi-language fake emails into running mail deckies. Personas live on Topology.email_personas; topology-wide language_default falls through to any persona that doesn't pin its own. Em-dashes are suppressed at the prompt layer by default and only lifted for personas explicitly marked uses_llms_heavily — em-dashes are an LLM tell and a flat corpus of em-dashed mail is a giveaway. EML delivery writes into /var/spool/decnet-emails/<thread>/<msg>.eml on the mail decky via docker exec; wiring the IMAP/POP3 templates to read from that spool (replacing the hardcoded _BAIT_EMAILS) is the next step.
This commit is contained in:
272
decnet/orchestrator/drivers/email.py
Normal file
272
decnet/orchestrator/drivers/email.py
Normal file
@@ -0,0 +1,272 @@
|
||||
"""Email driver — Ollama-backed EML generation + decky-side delivery.
|
||||
|
||||
One :class:`EmailAction` becomes one EML written into the mail decky's
|
||||
configured emailgen spool directory (``/var/spool/decnet-emails/`` by
|
||||
default). An integration follow-up wires the IMAP/POP3 service templates
|
||||
to read EMLs from that spool at request time so attackers see the
|
||||
generated mail in their MUA.
|
||||
|
||||
The Ollama call shells out via ``ollama run <model>`` — the prototype at
|
||||
``DECNET-EMAILs/main.py`` proved the round-trip works. Output is
|
||||
parsed-and-repaired into a valid EML using :mod:`email.mime.*`; the
|
||||
worker then ``docker exec``\\s a ``tee`` to drop the file inside the
|
||||
target container.
|
||||
|
||||
Per CLAUDE.md "no shell strings": every subprocess invocation uses an
|
||||
argv list, never ``shell=True``. Ollama prompts and EML payloads are
|
||||
piped via ``stdin``, not interpolated into argv.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import shlex
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from email.mime.text import MIMEText
|
||||
from email.utils import formatdate
|
||||
from typing import Any, Optional
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.orchestrator.drivers.base import ActivityResult
|
||||
from decnet.orchestrator.emailgen.prompt import PromptInputs, build as build_prompt
|
||||
from decnet.orchestrator.emailgen.scheduler import EmailAction
|
||||
from decnet.orchestrator.emailgen.threads import new_message_id
|
||||
|
||||
log = get_logger("orchestrator.email")
|
||||
|
||||
_DOCKER = "docker"
|
||||
_OLLAMA = "ollama"
|
||||
# Wall-clock cap for the LLM call. Big enough for a 4070 running
|
||||
# llama3.1; small enough that a stuck Ollama server doesn't wedge the
|
||||
# emailgen tick.
|
||||
_DEFAULT_OLLAMA_TIMEOUT = float(os.environ.get("DECNET_EMAILGEN_TIMEOUT", "60"))
|
||||
_DEFAULT_MODEL = os.environ.get("DECNET_EMAILGEN_MODEL", "llama3.1")
|
||||
# docker-exec wall-clock cap for the per-EML write.
|
||||
_DOCKER_TIMEOUT = 8.0
|
||||
# Container suffix for the IMAP service on a mail decky.
|
||||
_IMAP_CONTAINER_SUFFIX = "-imap"
|
||||
_POP3_CONTAINER_SUFFIX = "-pop3"
|
||||
# Spool path inside the container. Match the IMAP template's stubbed
|
||||
# IMAP_EMAIL_SEED location once wiring lands; shipping the constant now
|
||||
# lets that integration land independently.
|
||||
_SPOOL_DIR = "/var/spool/decnet-emails"
|
||||
|
||||
|
||||
async def _run_capture(
|
||||
argv: list[str],
|
||||
*,
|
||||
stdin_data: Optional[bytes] = None,
|
||||
timeout: float = _DOCKER_TIMEOUT,
|
||||
) -> tuple[int, str, str]:
|
||||
"""Spawn *argv*, optionally feeding *stdin_data*. Never raises."""
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*argv,
|
||||
stdin=asyncio.subprocess.PIPE if stdin_data is not None else None,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
return 127, "", f"argv[0] not found: {exc}"
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(stdin_data), timeout=timeout,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
return 124, "", "timeout"
|
||||
return (
|
||||
proc.returncode if proc.returncode is not None else -1,
|
||||
stdout.decode("utf-8", "replace"),
|
||||
stderr.decode("utf-8", "replace"),
|
||||
)
|
||||
|
||||
|
||||
def _container_for(decky_name: str, services: list[str]) -> str:
|
||||
"""Pick the IMAP container if present, else POP3. Names follow the
|
||||
``<decky_name>-<service>`` convention from the service templates."""
|
||||
if "imap" in services:
|
||||
return f"{decky_name}{_IMAP_CONTAINER_SUFFIX}"
|
||||
return f"{decky_name}{_POP3_CONTAINER_SUFFIX}"
|
||||
|
||||
|
||||
def _parse_subject_and_body(ollama_output: str) -> tuple[str, str]:
|
||||
"""Split LLM output into (subject, body).
|
||||
|
||||
The prompt asks for ``Subject: <subject>\\n\\n<body>``. When the
|
||||
model misbehaves (e.g. wraps in markdown fences or skips the
|
||||
Subject line), fall back to a generic subject and treat the whole
|
||||
output as body. Never raises.
|
||||
"""
|
||||
text = ollama_output.strip()
|
||||
# Strip code fences if the model wrapped output.
|
||||
if text.startswith("```"):
|
||||
nl = text.find("\n")
|
||||
if nl > 0:
|
||||
text = text[nl + 1:]
|
||||
if text.endswith("```"):
|
||||
text = text[: -3]
|
||||
text = text.strip()
|
||||
lines = text.splitlines()
|
||||
if lines and lines[0].lower().startswith("subject:"):
|
||||
subject = lines[0].split(":", 1)[1].strip()
|
||||
# Drop the (possibly empty) blank line after Subject.
|
||||
body_lines = lines[1:]
|
||||
if body_lines and not body_lines[0].strip():
|
||||
body_lines = body_lines[1:]
|
||||
body = "\n".join(body_lines).strip()
|
||||
if not subject:
|
||||
subject = "Business Communication"
|
||||
return subject, body
|
||||
return "Business Communication", text
|
||||
|
||||
|
||||
def _build_eml(
|
||||
*,
|
||||
sender_name: str,
|
||||
sender_email: str,
|
||||
recipient_name: str,
|
||||
recipient_email: str,
|
||||
subject: str,
|
||||
body: str,
|
||||
message_id: str,
|
||||
in_reply_to: Optional[str],
|
||||
references: str,
|
||||
ts: datetime,
|
||||
) -> bytes:
|
||||
"""Assemble a valid plain-text RFC 2822 EML."""
|
||||
msg = MIMEText(body, "plain", "utf-8")
|
||||
msg["From"] = f"{sender_name} <{sender_email}>"
|
||||
msg["To"] = f"{recipient_name} <{recipient_email}>"
|
||||
msg["Subject"] = subject
|
||||
msg["Date"] = formatdate(ts.timestamp(), localtime=False)
|
||||
msg["Message-ID"] = message_id
|
||||
if in_reply_to:
|
||||
msg["In-Reply-To"] = in_reply_to
|
||||
if references:
|
||||
msg["References"] = references
|
||||
msg["MIME-Version"] = "1.0"
|
||||
return msg.as_bytes()
|
||||
|
||||
|
||||
class EmailDriver:
|
||||
"""Concrete driver for :class:`EmailAction`.
|
||||
|
||||
Stateless across calls — Ollama model + timeout are constructor
|
||||
args, not per-call. The driver does *not* know about the bus or
|
||||
DB; it returns an :class:`ActivityResult` that the worker pipes
|
||||
onward.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model: str = _DEFAULT_MODEL,
|
||||
ollama_timeout: float = _DEFAULT_OLLAMA_TIMEOUT,
|
||||
spool_dir: str = _SPOOL_DIR,
|
||||
) -> None:
|
||||
self.model = model
|
||||
self.ollama_timeout = ollama_timeout
|
||||
self.spool_dir = spool_dir
|
||||
|
||||
async def run(self, action: EmailAction) -> ActivityResult:
|
||||
# Look up the mail-decky container name + services. The driver
|
||||
# receives a denormalised view via the action — the worker
|
||||
# populates it from the same list the scheduler used.
|
||||
return await self._run_email(action)
|
||||
|
||||
async def _run_email(self, action: EmailAction) -> ActivityResult:
|
||||
t0 = time.monotonic()
|
||||
prompt, mannerisms_used = build_prompt(
|
||||
PromptInputs(
|
||||
sender=action.sender,
|
||||
recipient=action.recipient,
|
||||
context_hint=action.context_hint,
|
||||
parent_subject=action.subject_hint,
|
||||
parent_excerpt=action.parent_excerpt,
|
||||
)
|
||||
)
|
||||
rc, stdout, stderr = await _run_capture(
|
||||
[_OLLAMA, "run", self.model],
|
||||
stdin_data=prompt.encode("utf-8"),
|
||||
timeout=self.ollama_timeout,
|
||||
)
|
||||
gen_ms = int((time.monotonic() - t0) * 1000)
|
||||
if rc != 0 or not stdout.strip():
|
||||
log.warning(
|
||||
"emailgen ollama failed rc=%d stderr=%r model=%s",
|
||||
rc, stderr[:200], self.model,
|
||||
)
|
||||
return ActivityResult(
|
||||
success=False,
|
||||
payload={
|
||||
"stage": "ollama",
|
||||
"rc": rc,
|
||||
"stderr": stderr.strip()[:256],
|
||||
"generation_ms": gen_ms,
|
||||
"model": self.model,
|
||||
"thread_id": action.thread_id,
|
||||
},
|
||||
)
|
||||
|
||||
subject, body = _parse_subject_and_body(stdout)
|
||||
message_id = new_message_id(action.sender.email.split("@", 1)[1])
|
||||
ts = datetime.now(timezone.utc)
|
||||
eml_bytes = _build_eml(
|
||||
sender_name=action.sender.name,
|
||||
sender_email=action.sender.email,
|
||||
recipient_name=action.recipient.name,
|
||||
recipient_email=action.recipient.email,
|
||||
subject=subject,
|
||||
body=body,
|
||||
message_id=message_id,
|
||||
in_reply_to=action.parent_message_id,
|
||||
references=action.references,
|
||||
ts=ts,
|
||||
)
|
||||
|
||||
# Drop the EML into the mail decky's spool dir over docker exec.
|
||||
# File path: <spool>/<thread_id>/<uuid-from-message-id>.eml.
|
||||
# Per-thread sub-directory keeps `ls` in the spool readable by
|
||||
# operators inspecting the running decoy.
|
||||
eml_filename = message_id.strip("<>").replace("@", "_at_") + ".eml"
|
||||
eml_dir = f"{self.spool_dir.rstrip('/')}/{action.thread_id}"
|
||||
eml_path = f"{eml_dir}/{eml_filename}"
|
||||
container = _container_for(
|
||||
action.mail_decky_name, list(action.mail_decky_services),
|
||||
)
|
||||
sh_cmd = (
|
||||
f"mkdir -p {shlex.quote(eml_dir)} && "
|
||||
f"tee {shlex.quote(eml_path)} >/dev/null"
|
||||
)
|
||||
argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
|
||||
rc2, _stdout2, stderr2 = await _run_capture(
|
||||
argv, stdin_data=eml_bytes, timeout=_DOCKER_TIMEOUT,
|
||||
)
|
||||
success = rc2 == 0
|
||||
payload: dict[str, Any] = {
|
||||
"stage": "delivered" if success else "delivery",
|
||||
"model": self.model,
|
||||
"generation_ms": gen_ms,
|
||||
"bytes": len(eml_bytes),
|
||||
"thread_id": action.thread_id,
|
||||
"message_id": message_id,
|
||||
"subject": subject,
|
||||
"language": action.sender.language or "en",
|
||||
"mannerisms_used": mannerisms_used,
|
||||
"is_reply": action.is_reply,
|
||||
"container": container,
|
||||
"eml_path": eml_path,
|
||||
"rc": rc2,
|
||||
"stderr": stderr2.strip()[:256] if not success else None,
|
||||
}
|
||||
if not success:
|
||||
log.warning(
|
||||
"emailgen delivery failed container=%s rc=%d stderr=%r",
|
||||
container, rc2, stderr2[:200],
|
||||
)
|
||||
return ActivityResult(success=success, payload=payload)
|
||||
Reference in New Issue
Block a user