refactor(emailgen): pluggable LLM backend (base/factory/impl)

Lift the Ollama subprocess shell-out out of EmailDriver and into a proper provider subpackage shape: decnet/orchestrator/emailgen/llm/ base.py — LLMBackend Protocol + LLMResult + LLMTimeout factory.py — get_llm() reads DECNET_EMAILGEN_LLM impl/ollama.py — current 'ollama run' subprocess path impl/fake.py — canned-output backend used by tests Driver now takes an LLMBackend on construction (or inherits the factory default). Tests inject FakeBackend instead of monkeypatching the subprocess layer, which is cleaner and ~10x faster. Swapping Ollama for the Anthropic API / vLLM / llama.cpp is now a third branch in factory.py; no driver rewrite needed. Mirrors the convention used by decnet.web.db.factory + decnet.bus.factory per the provider-subpackages-from-day-one rule in memory.
2026-04-26 22:43:36 -04:00
parent 4badc75fb2
commit 6d520eaa6f
10 changed files with 546 additions and 79 deletions
--- a/decnet/orchestrator/drivers/email.py
+++ b/decnet/orchestrator/drivers/email.py
@@ -1,27 +1,27 @@
-"""Email driver — Ollama-backed EML generation + decky-side delivery.
+"""Email driver — pluggable-LLM EML generation + decky-side delivery.

 One :class:`EmailAction` becomes one EML written into the mail decky's
 configured emailgen spool directory (``/var/spool/decnet-emails/`` by
-default).  An integration follow-up wires the IMAP/POP3 service templates
-to read EMLs from that spool at request time so attackers see the
-generated mail in their MUA.
+default).  The IMAP/POP3 service templates read that spool at request
+time so attackers see the generated mail in their MUA.

-The Ollama call shells out via ``ollama run <model>`` — the prototype at
-``DECNET-EMAILs/main.py`` proved the round-trip works.  Output is
-parsed-and-repaired into a valid EML using :mod:`email.mime.*`; the
-worker then ``docker exec``\\s a ``tee`` to drop the file inside the
-target container.
+The LLM call goes through :mod:`decnet.orchestrator.emailgen.llm` —
+backend-agnostic by construction so swapping Ollama for the Anthropic
+API, vLLM, or llama.cpp is a config change, not a driver rewrite.
+Output is parsed-and-repaired into a valid EML using
+:mod:`email.mime.*`; the worker then ``docker exec``\\s a ``tee`` to
+drop the file inside the target container, followed by a
+``touch -d <Date>`` so the file's mtime matches the email's RFC 2822
+``Date:`` header.

 Per CLAUDE.md "no shell strings": every subprocess invocation uses an
-argv list, never ``shell=True``.  Ollama prompts and EML payloads are
-piped via ``stdin``, not interpolated into argv.
+argv list, never ``shell=True``.  EML payloads are piped via ``stdin``,
+not interpolated into argv.
 """
 from __future__ import annotations

 import asyncio
-import os
 import shlex
-import time
 from datetime import datetime, timezone
 from email.mime.text import MIMEText
 from email.utils import formatdate
@@ -29,6 +29,7 @@ from typing import Any, Optional

 from decnet.logging import get_logger
 from decnet.orchestrator.drivers.base import ActivityResult
+from decnet.orchestrator.emailgen.llm import LLMBackend, LLMTimeout, get_llm
 from decnet.orchestrator.emailgen.prompt import PromptInputs, build as build_prompt
 from decnet.orchestrator.emailgen.scheduler import EmailAction
 from decnet.orchestrator.emailgen.threads import new_message_id
@@ -36,12 +37,6 @@ from decnet.orchestrator.emailgen.threads import new_message_id
 log = get_logger("orchestrator.email")

 _DOCKER = "docker"
-_OLLAMA = "ollama"
-# Wall-clock cap for the LLM call.  Big enough for a 4070 running
-# llama3.1; small enough that a stuck Ollama server doesn't wedge the
-# emailgen tick.
-_DEFAULT_OLLAMA_TIMEOUT = float(os.environ.get("DECNET_EMAILGEN_TIMEOUT", "60"))
-_DEFAULT_MODEL = os.environ.get("DECNET_EMAILGEN_MODEL", "llama3.1")
 # docker-exec wall-clock cap for the per-EML write.
 _DOCKER_TIMEOUT = 8.0
 # Container suffix for the IMAP service on a mail decky.
@@ -156,31 +151,35 @@ def _build_eml(
 class EmailDriver:
    """Concrete driver for :class:`EmailAction`.

-    Stateless across calls — Ollama model + timeout are constructor
-    args, not per-call.  The driver does *not* know about the bus or
-    DB; it returns an :class:`ActivityResult` that the worker pipes
-    onward.
+    Stateless across calls — the LLM backend is constructed once at
+    init time (or injected for tests).  The driver itself does *not*
+    know about the bus or DB; it returns an :class:`ActivityResult`
+    that the worker pipes onward.
    """

    def __init__(
        self,
        *,
-        model: str = _DEFAULT_MODEL,
-        ollama_timeout: float = _DEFAULT_OLLAMA_TIMEOUT,
+        llm: Optional[LLMBackend] = None,
+        model: Optional[str] = None,
        spool_dir: str = _SPOOL_DIR,
    ) -> None:
-        self.model = model
-        self.ollama_timeout = ollama_timeout
+        # *llm* takes precedence so tests can inject a FakeBackend
+        # without env-var trickery.  *model* lets the worker honour
+        # ``--model`` from the CLI without each backend needing to know
+        # about CLI flags.
+        self._llm = llm if llm is not None else get_llm(model=model)
        self.spool_dir = spool_dir

+    @property
+    def model(self) -> str:
+        """Convenience accessor for telemetry / logging."""
+        return self._llm.model
+
    async def run(self, action: EmailAction) -> ActivityResult:
-        # Look up the mail-decky container name + services.  The driver
-        # receives a denormalised view via the action — the worker
-        # populates it from the same list the scheduler used.
        return await self._run_email(action)

    async def _run_email(self, action: EmailAction) -> ActivityResult:
-        t0 = time.monotonic()
        prompt, mannerisms_used = build_prompt(
            PromptInputs(
                sender=action.sender,
@@ -190,30 +189,41 @@ class EmailDriver:
                parent_excerpt=action.parent_excerpt,
            )
        )
-        rc, stdout, stderr = await _run_capture(
-            [_OLLAMA, "run", self.model],
-            stdin_data=prompt.encode("utf-8"),
-            timeout=self.ollama_timeout,
-        )
-        gen_ms = int((time.monotonic() - t0) * 1000)
-        if rc != 0 or not stdout.strip():
-            log.warning(
-                "emailgen ollama failed rc=%d stderr=%r model=%s",
-                rc, stderr[:200], self.model,
-            )
+        try:
+            llm_result = await self._llm.generate(prompt)
+        except LLMTimeout as exc:
+            log.warning("emailgen llm timeout model=%s: %s", self._llm.model, exc)
            return ActivityResult(
                success=False,
                payload={
-                    "stage": "ollama",
-                    "rc": rc,
-                    "stderr": stderr.strip()[:256],
-                    "generation_ms": gen_ms,
-                    "model": self.model,
+                    "stage": "llm",
+                    "error": "timeout",
+                    "model": self._llm.model,
                    "thread_id": action.thread_id,
                },
            )

-        subject, body = _parse_subject_and_body(stdout)
+        gen_ms = llm_result.latency_ms
+        if not llm_result.success or not llm_result.text.strip():
+            log.warning(
+                "emailgen llm produced no usable output model=%s extra=%r",
+                self._llm.model, llm_result.extra,
+            )
+            return ActivityResult(
+                success=False,
+                payload={
+                    "stage": "llm",
+                    "model": self._llm.model,
+                    "generation_ms": gen_ms,
+                    "thread_id": action.thread_id,
+                    **{
+                        k: v for k, v in llm_result.extra.items()
+                        if k in ("rc", "stderr")
+                    },
+                },
+            )
+
+        subject, body = _parse_subject_and_body(llm_result.text)
        message_id = new_message_id(action.sender.email.split("@", 1)[1])
        ts = datetime.now(timezone.utc)
        eml_bytes = _build_eml(