refactor(emailgen): pluggable LLM backend (base/factory/impl)

Lift the Ollama subprocess shell-out out of EmailDriver and into a proper provider subpackage shape: decnet/orchestrator/emailgen/llm/ base.py — LLMBackend Protocol + LLMResult + LLMTimeout factory.py — get_llm() reads DECNET_EMAILGEN_LLM impl/ollama.py — current 'ollama run' subprocess path impl/fake.py — canned-output backend used by tests Driver now takes an LLMBackend on construction (or inherits the factory default). Tests inject FakeBackend instead of monkeypatching the subprocess layer, which is cleaner and ~10x faster. Swapping Ollama for the Anthropic API / vLLM / llama.cpp is now a third branch in factory.py; no driver rewrite needed. Mirrors the convention used by decnet.web.db.factory + decnet.bus.factory per the provider-subpackages-from-day-one rule in memory.
2026-04-26 22:43:36 -04:00
parent 4badc75fb2
commit 6d520eaa6f
10 changed files with 546 additions and 79 deletions
--- a/decnet/orchestrator/emailgen/llm/impl/init.py
+++ b/decnet/orchestrator/emailgen/llm/impl/init.py
@@ -0,0 +1,6 @@
+"""Concrete LLM-backend implementations.
+
+Importers go through :func:`decnet.orchestrator.emailgen.llm.get_llm`,
+not these modules directly — same convention as
+:mod:`decnet.web.db.sqlite` and :mod:`decnet.bus.unix_client`.
+"""
--- a/decnet/orchestrator/emailgen/llm/impl/fake.py
+++ b/decnet/orchestrator/emailgen/llm/impl/fake.py
@@ -0,0 +1,50 @@
+"""In-process fake backend for tests.
+
+Returns a canned ``Subject:\\n\\nbody`` string so the driver path can be
+exercised without an Ollama install.  Configurable via ``DECNET_EMAILGEN_FAKE_OUTPUT``
+(env) or the ``output`` constructor arg — the env-var path lets
+integration tests run the worker end-to-end with deterministic output.
+"""
+from __future__ import annotations
+
+import os
+import time
+from typing import Optional
+
+from decnet.orchestrator.emailgen.llm.base import LLMBackend, LLMResult
+
+
+_DEFAULT_OUTPUT = (
+    "Subject: Quick update\n\n"
+    "Hi,\n\nFollowing up on the topic.\n\nBest regards,\nFake Persona\n"
+)
+
+
+class FakeBackend(LLMBackend):
+    def __init__(
+        self,
+        *,
+        model: str = "fake-model",
+        timeout: float = 1.0,
+        output: Optional[str] = None,
+        success: bool = True,
+    ) -> None:
+        self.model = model
+        self.timeout = timeout
+        self._output = (
+            output
+            if output is not None
+            else os.environ.get("DECNET_EMAILGEN_FAKE_OUTPUT", _DEFAULT_OUTPUT)
+        )
+        self._success = success
+
+    async def generate(self, prompt: str) -> LLMResult:    # noqa: ARG002
+        t0 = time.monotonic()
+        latency_ms = int((time.monotonic() - t0) * 1000)
+        return LLMResult(
+            success=self._success,
+            text=self._output if self._success else "",
+            model=self.model,
+            latency_ms=latency_ms,
+            extra={"rc": 0 if self._success else 1},
+        )
--- a/decnet/orchestrator/emailgen/llm/impl/ollama.py
+++ b/decnet/orchestrator/emailgen/llm/impl/ollama.py
@@ -0,0 +1,107 @@
+"""Ollama subprocess backend.
+
+Shells out to ``ollama run <model>`` with the prompt fed via stdin.
+Mirrors what the original prototype at ``DECNET-EMAILs/main.py`` did,
+but lifted out of the driver so the rest of emailgen never imports a
+specific transport.
+
+Why subprocess and not the Ollama HTTP API:
+* No new dependency (``ollama`` Python lib is optional).
+* Works on hosts where Ollama is bound to a unix socket, an unusual TCP
+  port, or behind a remote-mount layer — `ollama run` resolves all that.
+* Same path the operator uses by hand (``ollama run llama3.1``); easier
+  to debug discrepancies between worker output and a console session.
+
+Cost: per-call process spawn (~50ms on a warm box).  Acceptable for
+emailgen's tick rate (one email every 5 minutes by default).  When that
+cost matters, swap to an HTTP-API backend; the seam is in
+:mod:`decnet.orchestrator.emailgen.llm.factory`.
+"""
+from __future__ import annotations
+
+import asyncio
+import os
+import time
+from typing import Optional
+
+from decnet.logging import get_logger
+from decnet.orchestrator.emailgen.llm.base import (
+    LLMBackend,
+    LLMResult,
+    LLMTimeout,
+)
+
+log = get_logger("orchestrator.emailgen.llm")
+
+_OLLAMA = "ollama"
+_DEFAULT_MODEL = os.environ.get("DECNET_EMAILGEN_MODEL", "llama3.1")
+_DEFAULT_TIMEOUT = float(os.environ.get("DECNET_EMAILGEN_TIMEOUT", "60"))
+
+
+class OllamaBackend(LLMBackend):
+    """Concrete :class:`LLMBackend` that shells out to ``ollama run``."""
+
+    def __init__(
+        self,
+        *,
+        model: Optional[str] = None,
+        timeout: Optional[float] = None,
+    ) -> None:
+        self.model = model or _DEFAULT_MODEL
+        self.timeout = timeout if timeout is not None else _DEFAULT_TIMEOUT
+
+    async def generate(self, prompt: str) -> LLMResult:
+        t0 = time.monotonic()
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                _OLLAMA, "run", self.model,
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+        except FileNotFoundError as exc:
+            latency_ms = int((time.monotonic() - t0) * 1000)
+            return LLMResult(
+                success=False,
+                text="",
+                model=self.model,
+                latency_ms=latency_ms,
+                extra={"rc": 127, "stderr": f"argv[0] not found: {exc}"},
+            )
+        try:
+            stdout, stderr = await asyncio.wait_for(
+                proc.communicate(prompt.encode("utf-8")),
+                timeout=self.timeout,
+            )
+        except asyncio.TimeoutError as exc:
+            try:
+                proc.kill()
+            except ProcessLookupError:
+                pass
+            raise LLMTimeout(
+                f"ollama run {self.model} exceeded {self.timeout}s"
+            ) from exc
+
+        latency_ms = int((time.monotonic() - t0) * 1000)
+        rc = proc.returncode if proc.returncode is not None else -1
+        text = stdout.decode("utf-8", "replace")
+        stderr_s = stderr.decode("utf-8", "replace")
+        if rc != 0 or not text.strip():
+            log.warning(
+                "ollama backend non-zero / empty rc=%d model=%s stderr=%r",
+                rc, self.model, stderr_s[:200],
+            )
+            return LLMResult(
+                success=False,
+                text=text,
+                model=self.model,
+                latency_ms=latency_ms,
+                extra={"rc": rc, "stderr": stderr_s.strip()[:256]},
+            )
+        return LLMResult(
+            success=True,
+            text=text,
+            model=self.model,
+            latency_ms=latency_ms,
+            extra={"rc": rc},
+        )