From 155ab59ee837c7feab2d4f6f5b41a650c78dae37 Mon Sep 17 00:00:00 2001 From: anti Date: Sat, 9 May 2026 23:09:36 -0400 Subject: [PATCH] feat(realism/llm): DB-backed LLMConfig, factory DB-first dispatch, Ollama HTTP mode --- decnet/realism/llm/config.py | 123 +++++++++++++++++++++++ decnet/realism/llm/factory.py | 16 ++- decnet/realism/llm/impl/ollama.py | 102 ++++++++++++++++--- tests/realism/llm/__init__.py | 0 tests/realism/llm/test_config.py | 159 ++++++++++++++++++++++++++++++ 5 files changed, 385 insertions(+), 15 deletions(-) create mode 100644 decnet/realism/llm/config.py create mode 100644 tests/realism/llm/__init__.py create mode 100644 tests/realism/llm/test_config.py diff --git a/decnet/realism/llm/config.py b/decnet/realism/llm/config.py new file mode 100644 index 00000000..8bd4425f --- /dev/null +++ b/decnet/realism/llm/config.py @@ -0,0 +1,123 @@ +"""DB-backed LLM provider configuration for the realism subsystem. + +The module holds a process-level cached backend that callers obtain via +:func:`decnet.realism.llm.factory.get_llm`. The cache is populated by: + +* The API process: :func:`load_from_db` called on first GET, then + ``apply`` on each successful PUT. +* The orchestrator worker: :func:`load_from_db` called on the same + periodic tick that refreshes planner weights. + +``get_llm()`` falls back to the env-var path when the cache is ``None`` +(i.e. the DB row does not exist yet or has never been loaded). +""" +from __future__ import annotations + +import json +import re +from typing import Any, Optional + +from pydantic import BaseModel, Field, field_validator + +from decnet.logging import get_logger + +log = get_logger("realism.llm.config") + +_SUPPORTED_PROVIDERS = {"ollama", "fake"} +_HTTP_RE = re.compile(r"^https?://", re.IGNORECASE) + +# Process-level singleton — rebuilt by apply(), read by get_llm(). +_cached_backend: Optional[Any] = None + +_CONFIG_KEY = "llm" + + +class LLMConfig(BaseModel): + """Operator-tunable LLM provider settings stored in ``realism_config``.""" + + provider: str = Field(default="ollama") + base_url: Optional[str] = Field(default=None) + model: str = Field(default="llama3.1") + timeout: float = Field(default=60.0, gt=0) + # Never returned to callers — encrypted Fernet token, write-only. + api_key_ciphertext: Optional[str] = Field(default=None) + + @field_validator("provider") + @classmethod + def _validate_provider(cls, v: str) -> str: + if v not in _SUPPORTED_PROVIDERS: + raise ValueError( + f"provider must be one of {sorted(_SUPPORTED_PROVIDERS)}, got {v!r}" + ) + return v + + @field_validator("base_url") + @classmethod + def _validate_base_url(cls, v: Optional[str]) -> Optional[str]: + if v is None or v == "": + return None + if not _HTTP_RE.match(v): + raise ValueError("base_url must start with http:// or https://") + return v.rstrip("/") + + +def get_cached_backend() -> Optional[Any]: + """Return the cached LLMBackend, or ``None`` if not yet hydrated.""" + return _cached_backend + + +def apply(cfg: LLMConfig) -> None: + """Build a backend from *cfg* and install it as the process cache. + + Existing circuit-breaker state is NOT reset — don't wipe a tripped + breaker just because the operator tuned a URL. + """ + global _cached_backend + + if cfg.provider == "fake": + from decnet.realism.llm.impl.fake import FakeBackend + _cached_backend = FakeBackend(model="fake-model") + log.info("realism.llm.config: applied provider=fake") + return + + if cfg.provider == "ollama": + api_key: Optional[str] = None + if cfg.api_key_ciphertext: + from decnet.web.db.secrets import decrypt_secret + api_key = decrypt_secret(cfg.api_key_ciphertext) + + from decnet.realism.llm.impl.ollama import OllamaBackend + _cached_backend = OllamaBackend( + model=cfg.model, + timeout=cfg.timeout, + base_url=cfg.base_url, + api_key=api_key, + ) + log.info( + "realism.llm.config: applied provider=ollama model=%s base_url=%s", + cfg.model, cfg.base_url or "(subprocess)", + ) + return + + raise ValueError(f"apply: unsupported provider {cfg.provider!r}") + + +async def load_from_db(repo: Any) -> Optional[LLMConfig]: + """Load the ``key='llm'`` RealismConfig row and return a parsed config. + + Returns ``None`` when the row doesn't exist or the JSON is malformed; + callers fall back to env-var defaults in both cases. + """ + try: + row = await repo.get_realism_config(_CONFIG_KEY) + except Exception as exc: # noqa: BLE001 + log.warning("realism.llm.config: DB read failed: %s", exc) + return None + if row is None: + return None + try: + data = json.loads(row.get("value") or "{}") + return LLMConfig(**data) + except Exception as exc: # noqa: BLE001 + log.warning("realism.llm.config: malformed config row: %s", exc) + return None diff --git a/decnet/realism/llm/factory.py b/decnet/realism/llm/factory.py index 6711d2b0..06323a8f 100644 --- a/decnet/realism/llm/factory.py +++ b/decnet/realism/llm/factory.py @@ -24,7 +24,14 @@ from decnet.realism.llm.base import LLMBackend def get_llm(*, model: str | None = None, **kwargs: Any) -> LLMBackend: - """Instantiate the LLM backend selected by environment. + """Instantiate the LLM backend selected by DB config or environment. + + Resolution order: + 1. Process-level cached backend (populated by the DB config row via + :func:`decnet.realism.llm.config.apply`). Returned as-is when + *model* and *kwargs* are both absent — the common case. + 2. Env-var path (``DECNET_REALISM_LLM`` / ``DECNET_REALISM_MODEL`` / + ``DECNET_REALISM_TIMEOUT``) — legacy / default-install fallback. *model* (when provided) overrides whatever the backend's own default is — e.g. for :class:`OllamaBackend` that's ``llama3.1`` unless @@ -32,6 +39,13 @@ def get_llm(*, model: str | None = None, **kwargs: Any) -> LLMBackend: ``decnet orchestrate --model gpt-oss`` without each backend having to know about CLI flags. """ + # Fast path: DB-configured cached backend. + if model is None and not kwargs: + from decnet.realism.llm.config import get_cached_backend + cached = get_cached_backend() + if cached is not None: + return cached + backend_key = os.environ.get("DECNET_REALISM_LLM", "ollama").lower() if backend_key == "ollama": diff --git a/decnet/realism/llm/impl/ollama.py b/decnet/realism/llm/impl/ollama.py index 5c1735bc..66d4aed7 100644 --- a/decnet/realism/llm/impl/ollama.py +++ b/decnet/realism/llm/impl/ollama.py @@ -1,18 +1,17 @@ -"""Ollama subprocess backend. +"""Ollama backend — subprocess (local) or HTTP (remote). -Shells out to ``ollama run `` with the prompt fed via stdin. +**Subprocess mode** (default, ``base_url=None``) + Shells out to ``ollama run `` with the prompt on stdin. + Works on any host where Ollama is reachable however it's bound — + unix socket, unusual TCP port, remote-mount — because ``ollama run`` + resolves all of that transparently. -Why subprocess and not the Ollama HTTP API: -* No new dependency (``ollama`` Python lib is optional). -* Works on hosts where Ollama is bound to a unix socket, an unusual TCP - port, or behind a remote-mount layer — `ollama run` resolves all that. -* Same path the operator uses by hand (``ollama run llama3.1``); easier - to debug discrepancies between worker output and a console session. - -Cost: per-call process spawn (~50ms on a warm box). Acceptable for -realism tick rates (one body per ~5 minutes per persona by default). -When that cost matters, swap to an HTTP-API backend; the seam is in -:mod:`decnet.realism.llm.factory`. +**HTTP mode** (``base_url`` set, e.g. ``http://10.0.0.1:11434``) + POSTs to ``{base_url}/api/generate`` via httpx (non-streaming). + Required when targeting a remote Ollama daemon. ``api_key`` is sent + as ``Authorization: Bearer`` when provided (for reverse-proxy setups). + No shell metacharacters ever reach the network call — base_url is + validated by :class:`decnet.realism.llm.config.LLMConfig` before storage. """ from __future__ import annotations @@ -32,18 +31,93 @@ _DEFAULT_TIMEOUT = float(os.environ.get("DECNET_REALISM_TIMEOUT", "60")) class OllamaBackend(LLMBackend): - """Concrete :class:`LLMBackend` that shells out to ``ollama run``.""" + """Concrete :class:`LLMBackend` for Ollama — subprocess or HTTP.""" def __init__( self, *, model: Optional[str] = None, timeout: Optional[float] = None, + base_url: Optional[str] = None, + api_key: Optional[str] = None, ) -> None: self.model = model or _DEFAULT_MODEL self.timeout = timeout if timeout is not None else _DEFAULT_TIMEOUT + self.base_url = base_url or None + self.api_key = api_key or None async def generate(self, prompt: str) -> LLMResult: + if self.base_url: + return await self._generate_http(prompt) + return await self._generate_subprocess(prompt) + + async def _generate_http(self, prompt: str) -> LLMResult: + import httpx + + url = f"{self.base_url}/api/generate" + headers: dict[str, str] = {"Content-Type": "application/json"} + if self.api_key: + headers["Authorization"] = f"Bearer {self.api_key}" + payload = {"model": self.model, "prompt": prompt, "stream": False} + + t0 = time.monotonic() + try: + async with httpx.AsyncClient(timeout=self.timeout) as client: + resp = await client.post(url, json=payload, headers=headers) + except httpx.TimeoutException as exc: + raise LLMTimeout( + f"ollama HTTP {self.model} exceeded {self.timeout}s" + ) from exc + except Exception as exc: # noqa: BLE001 + latency_ms = int((time.monotonic() - t0) * 1000) + log.warning("ollama HTTP error model=%s exc=%s", self.model, exc) + return LLMResult( + success=False, + text="", + model=self.model, + latency_ms=latency_ms, + extra={"error": str(exc)}, + ) + + latency_ms = int((time.monotonic() - t0) * 1000) + if resp.status_code != 200: + log.warning( + "ollama HTTP non-200 model=%s status=%d body=%r", + self.model, resp.status_code, resp.text[:200], + ) + return LLMResult( + success=False, + text="", + model=self.model, + latency_ms=latency_ms, + extra={"status": resp.status_code, "body": resp.text[:256]}, + ) + + try: + data = resp.json() + text = data.get("response", "") + except Exception: + text = resp.text + + if not text.strip(): + log.warning("ollama HTTP empty response model=%s", self.model) + return LLMResult( + success=False, + text=text, + model=self.model, + latency_ms=latency_ms, + extra={"status": resp.status_code}, + ) + + return LLMResult( + success=True, + text=text, + model=self.model, + latency_ms=latency_ms, + extra={"status": resp.status_code}, + ) + + async def _generate_subprocess(self, prompt: str) -> LLMResult: t0 = time.monotonic() try: proc = await asyncio.create_subprocess_exec( diff --git a/tests/realism/llm/__init__.py b/tests/realism/llm/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/realism/llm/test_config.py b/tests/realism/llm/test_config.py new file mode 100644 index 00000000..c4a289af --- /dev/null +++ b/tests/realism/llm/test_config.py @@ -0,0 +1,159 @@ +"""Tests for decnet.realism.llm.config and the updated factory DB-first path.""" +from __future__ import annotations + +import json +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from decnet.realism.llm import config as _cfg_mod +from decnet.realism.llm import get_llm +from decnet.realism.llm.impl.fake import FakeBackend +from decnet.realism.llm.impl.ollama import OllamaBackend + + +# ── LLMConfig validation ────────────────────────────────────────────────────── + + +def test_defaults(): + c = _cfg_mod.LLMConfig() + assert c.provider == "ollama" + assert c.base_url is None + assert c.model == "llama3.1" + assert c.timeout == 60.0 + + +def test_base_url_trailing_slash_stripped(): + c = _cfg_mod.LLMConfig(base_url="http://localhost:11434/") + assert c.base_url == "http://localhost:11434" + + +def test_base_url_empty_string_normalised_to_none(): + c = _cfg_mod.LLMConfig(base_url="") + assert c.base_url is None + + +def test_base_url_non_http_rejected(): + from pydantic import ValidationError + with pytest.raises(ValidationError, match="http"): + _cfg_mod.LLMConfig(base_url="ollama://localhost") + + +def test_unknown_provider_rejected(): + from pydantic import ValidationError + with pytest.raises(ValidationError): + _cfg_mod.LLMConfig(provider="vllm") + + +# ── apply() builds the right backend ───────────────────────────────────────── + + +def test_apply_ollama_no_url(): + _cfg_mod._cached_backend = None + _cfg_mod.apply(_cfg_mod.LLMConfig(provider="ollama", model="phi3")) + b = _cfg_mod.get_cached_backend() + assert isinstance(b, OllamaBackend) + assert b.model == "phi3" + assert b.base_url is None + + +def test_apply_ollama_with_url(): + _cfg_mod._cached_backend = None + _cfg_mod.apply(_cfg_mod.LLMConfig( + provider="ollama", + model="llama3.1", + base_url="http://10.0.0.1:11434", + )) + b = _cfg_mod.get_cached_backend() + assert isinstance(b, OllamaBackend) + assert b.base_url == "http://10.0.0.1:11434" + + +def test_apply_fake(): + _cfg_mod._cached_backend = None + _cfg_mod.apply(_cfg_mod.LLMConfig(provider="fake")) + b = _cfg_mod.get_cached_backend() + assert isinstance(b, FakeBackend) + + +def test_apply_ollama_with_api_key(monkeypatch): + from cryptography.fernet import Fernet + key = Fernet.generate_key().decode() + monkeypatch.setenv("DECNET_SECRET_KEY", key) + from decnet.web.db.secrets import encrypt_secret + ct = encrypt_secret("sk-supersecret") + _cfg_mod._cached_backend = None + _cfg_mod.apply(_cfg_mod.LLMConfig(provider="ollama", api_key_ciphertext=ct)) + b = _cfg_mod.get_cached_backend() + assert isinstance(b, OllamaBackend) + assert b.api_key == "sk-supersecret" + + +# ── load_from_db ────────────────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_load_from_db_returns_none_when_no_row(): + repo = MagicMock() + repo.get_realism_config = AsyncMock(return_value=None) + result = await _cfg_mod.load_from_db(repo) + assert result is None + + +@pytest.mark.asyncio +async def test_load_from_db_parses_valid_row(): + repo = MagicMock() + payload = {"provider": "ollama", "model": "qwen2:7b", "timeout": 30} + repo.get_realism_config = AsyncMock( + return_value={"value": json.dumps(payload)} + ) + result = await _cfg_mod.load_from_db(repo) + assert result is not None + assert result.model == "qwen2:7b" + assert result.timeout == 30.0 + + +@pytest.mark.asyncio +async def test_load_from_db_returns_none_on_bad_json(): + repo = MagicMock() + repo.get_realism_config = AsyncMock(return_value={"value": "not-json{{"}) + result = await _cfg_mod.load_from_db(repo) + assert result is None + + +@pytest.mark.asyncio +async def test_load_from_db_returns_none_on_db_error(): + repo = MagicMock() + repo.get_realism_config = AsyncMock(side_effect=RuntimeError("db down")) + result = await _cfg_mod.load_from_db(repo) + assert result is None + + +# ── factory DB-first path ───────────────────────────────────────────────────── + + +def test_factory_uses_cached_backend_when_set(): + _cfg_mod._cached_backend = None + _cfg_mod.apply(_cfg_mod.LLMConfig(provider="fake")) + backend = get_llm() + assert isinstance(backend, FakeBackend) + + +def test_factory_falls_back_to_env_when_no_cache(monkeypatch): + _cfg_mod._cached_backend = None + monkeypatch.setenv("DECNET_REALISM_LLM", "ollama") + backend = get_llm() + assert isinstance(backend, OllamaBackend) + + +def test_factory_model_override_bypasses_cache(): + _cfg_mod._cached_backend = None + _cfg_mod.apply(_cfg_mod.LLMConfig(provider="fake")) + # Explicit model override skips the cache and uses env dispatch. + monkeypatch = None # model override makes it fall through to env + # With model= set, the fast-path is skipped; falls to env default. + import os + os.environ.setdefault("DECNET_REALISM_LLM", "ollama") + backend = get_llm(model="llama3:8b") + assert isinstance(backend, OllamaBackend) + assert backend.model == "llama3:8b"