refactor(emailgen): pluggable LLM backend (base/factory/impl)
Lift the Ollama subprocess shell-out out of EmailDriver and into a
proper provider subpackage shape:
decnet/orchestrator/emailgen/llm/
base.py — LLMBackend Protocol + LLMResult + LLMTimeout
factory.py — get_llm() reads DECNET_EMAILGEN_LLM
impl/ollama.py — current 'ollama run' subprocess path
impl/fake.py — canned-output backend used by tests
Driver now takes an LLMBackend on construction (or inherits the
factory default). Tests inject FakeBackend instead of monkeypatching
the subprocess layer, which is cleaner and ~10x faster. Swapping
Ollama for the Anthropic API / vLLM / llama.cpp is now a third branch
in factory.py; no driver rewrite needed.
Mirrors the convention used by decnet.web.db.factory + decnet.bus.factory
per the provider-subpackages-from-day-one rule in memory.
This commit is contained in:
6
decnet/orchestrator/emailgen/llm/impl/__init__.py
Normal file
6
decnet/orchestrator/emailgen/llm/impl/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""Concrete LLM-backend implementations.
|
||||
|
||||
Importers go through :func:`decnet.orchestrator.emailgen.llm.get_llm`,
|
||||
not these modules directly — same convention as
|
||||
:mod:`decnet.web.db.sqlite` and :mod:`decnet.bus.unix_client`.
|
||||
"""
|
||||
50
decnet/orchestrator/emailgen/llm/impl/fake.py
Normal file
50
decnet/orchestrator/emailgen/llm/impl/fake.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""In-process fake backend for tests.
|
||||
|
||||
Returns a canned ``Subject:\\n\\nbody`` string so the driver path can be
|
||||
exercised without an Ollama install. Configurable via ``DECNET_EMAILGEN_FAKE_OUTPUT``
|
||||
(env) or the ``output`` constructor arg — the env-var path lets
|
||||
integration tests run the worker end-to-end with deterministic output.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from decnet.orchestrator.emailgen.llm.base import LLMBackend, LLMResult
|
||||
|
||||
|
||||
_DEFAULT_OUTPUT = (
|
||||
"Subject: Quick update\n\n"
|
||||
"Hi,\n\nFollowing up on the topic.\n\nBest regards,\nFake Persona\n"
|
||||
)
|
||||
|
||||
|
||||
class FakeBackend(LLMBackend):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model: str = "fake-model",
|
||||
timeout: float = 1.0,
|
||||
output: Optional[str] = None,
|
||||
success: bool = True,
|
||||
) -> None:
|
||||
self.model = model
|
||||
self.timeout = timeout
|
||||
self._output = (
|
||||
output
|
||||
if output is not None
|
||||
else os.environ.get("DECNET_EMAILGEN_FAKE_OUTPUT", _DEFAULT_OUTPUT)
|
||||
)
|
||||
self._success = success
|
||||
|
||||
async def generate(self, prompt: str) -> LLMResult: # noqa: ARG002
|
||||
t0 = time.monotonic()
|
||||
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||
return LLMResult(
|
||||
success=self._success,
|
||||
text=self._output if self._success else "",
|
||||
model=self.model,
|
||||
latency_ms=latency_ms,
|
||||
extra={"rc": 0 if self._success else 1},
|
||||
)
|
||||
107
decnet/orchestrator/emailgen/llm/impl/ollama.py
Normal file
107
decnet/orchestrator/emailgen/llm/impl/ollama.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""Ollama subprocess backend.
|
||||
|
||||
Shells out to ``ollama run <model>`` with the prompt fed via stdin.
|
||||
Mirrors what the original prototype at ``DECNET-EMAILs/main.py`` did,
|
||||
but lifted out of the driver so the rest of emailgen never imports a
|
||||
specific transport.
|
||||
|
||||
Why subprocess and not the Ollama HTTP API:
|
||||
* No new dependency (``ollama`` Python lib is optional).
|
||||
* Works on hosts where Ollama is bound to a unix socket, an unusual TCP
|
||||
port, or behind a remote-mount layer — `ollama run` resolves all that.
|
||||
* Same path the operator uses by hand (``ollama run llama3.1``); easier
|
||||
to debug discrepancies between worker output and a console session.
|
||||
|
||||
Cost: per-call process spawn (~50ms on a warm box). Acceptable for
|
||||
emailgen's tick rate (one email every 5 minutes by default). When that
|
||||
cost matters, swap to an HTTP-API backend; the seam is in
|
||||
:mod:`decnet.orchestrator.emailgen.llm.factory`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.orchestrator.emailgen.llm.base import (
|
||||
LLMBackend,
|
||||
LLMResult,
|
||||
LLMTimeout,
|
||||
)
|
||||
|
||||
log = get_logger("orchestrator.emailgen.llm")
|
||||
|
||||
_OLLAMA = "ollama"
|
||||
_DEFAULT_MODEL = os.environ.get("DECNET_EMAILGEN_MODEL", "llama3.1")
|
||||
_DEFAULT_TIMEOUT = float(os.environ.get("DECNET_EMAILGEN_TIMEOUT", "60"))
|
||||
|
||||
|
||||
class OllamaBackend(LLMBackend):
|
||||
"""Concrete :class:`LLMBackend` that shells out to ``ollama run``."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model: Optional[str] = None,
|
||||
timeout: Optional[float] = None,
|
||||
) -> None:
|
||||
self.model = model or _DEFAULT_MODEL
|
||||
self.timeout = timeout if timeout is not None else _DEFAULT_TIMEOUT
|
||||
|
||||
async def generate(self, prompt: str) -> LLMResult:
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
_OLLAMA, "run", self.model,
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||
return LLMResult(
|
||||
success=False,
|
||||
text="",
|
||||
model=self.model,
|
||||
latency_ms=latency_ms,
|
||||
extra={"rc": 127, "stderr": f"argv[0] not found: {exc}"},
|
||||
)
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(prompt.encode("utf-8")),
|
||||
timeout=self.timeout,
|
||||
)
|
||||
except asyncio.TimeoutError as exc:
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
raise LLMTimeout(
|
||||
f"ollama run {self.model} exceeded {self.timeout}s"
|
||||
) from exc
|
||||
|
||||
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||
rc = proc.returncode if proc.returncode is not None else -1
|
||||
text = stdout.decode("utf-8", "replace")
|
||||
stderr_s = stderr.decode("utf-8", "replace")
|
||||
if rc != 0 or not text.strip():
|
||||
log.warning(
|
||||
"ollama backend non-zero / empty rc=%d model=%s stderr=%r",
|
||||
rc, self.model, stderr_s[:200],
|
||||
)
|
||||
return LLMResult(
|
||||
success=False,
|
||||
text=text,
|
||||
model=self.model,
|
||||
latency_ms=latency_ms,
|
||||
extra={"rc": rc, "stderr": stderr_s.strip()[:256]},
|
||||
)
|
||||
return LLMResult(
|
||||
success=True,
|
||||
text=text,
|
||||
model=self.model,
|
||||
latency_ms=latency_ms,
|
||||
extra={"rc": rc},
|
||||
)
|
||||
Reference in New Issue
Block a user