From 155ab59ee837c7feab2d4f6f5b41a650c78dae37 Mon Sep 17 00:00:00 2001
From: anti <samuel@securejump.cl>
Date: Sat, 9 May 2026 23:09:36 -0400
Subject: [PATCH] feat(realism/llm): DB-backed LLMConfig, factory DB-first
 dispatch, Ollama HTTP mode

---
 decnet/realism/llm/config.py      | 123 +++++++++++++++++++++++
 decnet/realism/llm/factory.py     |  16 ++-
 decnet/realism/llm/impl/ollama.py | 102 ++++++++++++++++---
 tests/realism/llm/__init__.py     |   0
 tests/realism/llm/test_config.py  | 159 ++++++++++++++++++++++++++++++
 5 files changed, 385 insertions(+), 15 deletions(-)
 create mode 100644 decnet/realism/llm/config.py
 create mode 100644 tests/realism/llm/__init__.py
 create mode 100644 tests/realism/llm/test_config.py

diff --git a/decnet/realism/llm/config.py b/decnet/realism/llm/config.py
new file mode 100644
index 00000000..8bd4425f
--- /dev/null
+++ b/decnet/realism/llm/config.py
@@ -0,0 +1,123 @@
+"""DB-backed LLM provider configuration for the realism subsystem.
+
+The module holds a process-level cached backend that callers obtain via
+:func:`decnet.realism.llm.factory.get_llm`.  The cache is populated by:
+
+* The API process: :func:`load_from_db` called on first GET, then
+  ``apply`` on each successful PUT.
+* The orchestrator worker: :func:`load_from_db` called on the same
+  periodic tick that refreshes planner weights.
+
+``get_llm()`` falls back to the env-var path when the cache is ``None``
+(i.e. the DB row does not exist yet or has never been loaded).
+"""
+from __future__ import annotations
+
+import json
+import re
+from typing import Any, Optional
+
+from pydantic import BaseModel, Field, field_validator
+
+from decnet.logging import get_logger
+
+log = get_logger("realism.llm.config")
+
+_SUPPORTED_PROVIDERS = {"ollama", "fake"}
+_HTTP_RE = re.compile(r"^https?://", re.IGNORECASE)
+
+# Process-level singleton — rebuilt by apply(), read by get_llm().
+_cached_backend: Optional[Any] = None
+
+_CONFIG_KEY = "llm"
+
+
+class LLMConfig(BaseModel):
+    """Operator-tunable LLM provider settings stored in ``realism_config``."""
+
+    provider: str = Field(default="ollama")
+    base_url: Optional[str] = Field(default=None)
+    model: str = Field(default="llama3.1")
+    timeout: float = Field(default=60.0, gt=0)
+    # Never returned to callers — encrypted Fernet token, write-only.
+    api_key_ciphertext: Optional[str] = Field(default=None)
+
+    @field_validator("provider")
+    @classmethod
+    def _validate_provider(cls, v: str) -> str:
+        if v not in _SUPPORTED_PROVIDERS:
+            raise ValueError(
+                f"provider must be one of {sorted(_SUPPORTED_PROVIDERS)}, got {v!r}"
+            )
+        return v
+
+    @field_validator("base_url")
+    @classmethod
+    def _validate_base_url(cls, v: Optional[str]) -> Optional[str]:
+        if v is None or v == "":
+            return None
+        if not _HTTP_RE.match(v):
+            raise ValueError("base_url must start with http:// or https://")
+        return v.rstrip("/")
+
+
+def get_cached_backend() -> Optional[Any]:
+    """Return the cached LLMBackend, or ``None`` if not yet hydrated."""
+    return _cached_backend
+
+
+def apply(cfg: LLMConfig) -> None:
+    """Build a backend from *cfg* and install it as the process cache.
+
+    Existing circuit-breaker state is NOT reset — don't wipe a tripped
+    breaker just because the operator tuned a URL.
+    """
+    global _cached_backend
+
+    if cfg.provider == "fake":
+        from decnet.realism.llm.impl.fake import FakeBackend
+        _cached_backend = FakeBackend(model="fake-model")
+        log.info("realism.llm.config: applied provider=fake")
+        return
+
+    if cfg.provider == "ollama":
+        api_key: Optional[str] = None
+        if cfg.api_key_ciphertext:
+            from decnet.web.db.secrets import decrypt_secret
+            api_key = decrypt_secret(cfg.api_key_ciphertext)
+
+        from decnet.realism.llm.impl.ollama import OllamaBackend
+        _cached_backend = OllamaBackend(
+            model=cfg.model,
+            timeout=cfg.timeout,
+            base_url=cfg.base_url,
+            api_key=api_key,
+        )
+        log.info(
+            "realism.llm.config: applied provider=ollama model=%s base_url=%s",
+            cfg.model, cfg.base_url or "(subprocess)",
+        )
+        return
+
+    raise ValueError(f"apply: unsupported provider {cfg.provider!r}")
+
+
+async def load_from_db(repo: Any) -> Optional[LLMConfig]:
+    """Load the ``key='llm'`` RealismConfig row and return a parsed config.
+
+    Returns ``None`` when the row doesn't exist or the JSON is malformed;
+    callers fall back to env-var defaults in both cases.
+    """
+    try:
+        row = await repo.get_realism_config(_CONFIG_KEY)
+    except Exception as exc:  # noqa: BLE001
+        log.warning("realism.llm.config: DB read failed: %s", exc)
+        return None
+    if row is None:
+        return None
+    try:
+        data = json.loads(row.get("value") or "{}")
+        return LLMConfig(**data)
+    except Exception as exc:  # noqa: BLE001
+        log.warning("realism.llm.config: malformed config row: %s", exc)
+        return None
diff --git a/decnet/realism/llm/factory.py b/decnet/realism/llm/factory.py
index 6711d2b0..06323a8f 100644
--- a/decnet/realism/llm/factory.py
+++ b/decnet/realism/llm/factory.py
@@ -24,7 +24,14 @@ from decnet.realism.llm.base import LLMBackend
 
 
 def get_llm(*, model: str | None = None, **kwargs: Any) -> LLMBackend:
-    """Instantiate the LLM backend selected by environment.
+    """Instantiate the LLM backend selected by DB config or environment.
+
+    Resolution order:
+    1. Process-level cached backend (populated by the DB config row via
+       :func:`decnet.realism.llm.config.apply`).  Returned as-is when
+       *model* and *kwargs* are both absent — the common case.
+    2. Env-var path (``DECNET_REALISM_LLM`` / ``DECNET_REALISM_MODEL`` /
+       ``DECNET_REALISM_TIMEOUT``) — legacy / default-install fallback.
 
     *model* (when provided) overrides whatever the backend's own default
     is — e.g. for :class:`OllamaBackend` that's ``llama3.1`` unless
@@ -32,6 +39,13 @@ def get_llm(*, model: str | None = None, **kwargs: Any) -> LLMBackend:
     ``decnet orchestrate --model gpt-oss`` without each backend having
     to know about CLI flags.
     """
+    # Fast path: DB-configured cached backend.
+    if model is None and not kwargs:
+        from decnet.realism.llm.config import get_cached_backend
+        cached = get_cached_backend()
+        if cached is not None:
+            return cached
+
     backend_key = os.environ.get("DECNET_REALISM_LLM", "ollama").lower()
 
     if backend_key == "ollama":
diff --git a/decnet/realism/llm/impl/ollama.py b/decnet/realism/llm/impl/ollama.py
index 5c1735bc..66d4aed7 100644
--- a/decnet/realism/llm/impl/ollama.py
+++ b/decnet/realism/llm/impl/ollama.py
@@ -1,18 +1,17 @@
-"""Ollama subprocess backend.
+"""Ollama backend — subprocess (local) or HTTP (remote).
 
-Shells out to ``ollama run <model>`` with the prompt fed via stdin.
+**Subprocess mode** (default, ``base_url=None``)
+  Shells out to ``ollama run <model>`` with the prompt on stdin.
+  Works on any host where Ollama is reachable however it's bound —
+  unix socket, unusual TCP port, remote-mount — because ``ollama run``
+  resolves all of that transparently.
 
-Why subprocess and not the Ollama HTTP API:
-* No new dependency (``ollama`` Python lib is optional).
-* Works on hosts where Ollama is bound to a unix socket, an unusual TCP
-  port, or behind a remote-mount layer — `ollama run` resolves all that.
-* Same path the operator uses by hand (``ollama run llama3.1``); easier
-  to debug discrepancies between worker output and a console session.
-
-Cost: per-call process spawn (~50ms on a warm box).  Acceptable for
-realism tick rates (one body per ~5 minutes per persona by default).
-When that cost matters, swap to an HTTP-API backend; the seam is in
-:mod:`decnet.realism.llm.factory`.
+**HTTP mode** (``base_url`` set, e.g. ``http://10.0.0.1:11434``)
+  POSTs to ``{base_url}/api/generate`` via httpx (non-streaming).
+  Required when targeting a remote Ollama daemon.  ``api_key`` is sent
+  as ``Authorization: Bearer`` when provided (for reverse-proxy setups).
+  No shell metacharacters ever reach the network call — base_url is
+  validated by :class:`decnet.realism.llm.config.LLMConfig` before storage.
 """
 from __future__ import annotations
 
@@ -32,18 +31,93 @@ _DEFAULT_TIMEOUT = float(os.environ.get("DECNET_REALISM_TIMEOUT", "60"))
 
 
 class OllamaBackend(LLMBackend):
-    """Concrete :class:`LLMBackend` that shells out to ``ollama run``."""
+    """Concrete :class:`LLMBackend` for Ollama — subprocess or HTTP."""
 
     def __init__(
         self,
         *,
         model: Optional[str] = None,
         timeout: Optional[float] = None,
+        base_url: Optional[str] = None,
+        api_key: Optional[str] = None,
     ) -> None:
         self.model = model or _DEFAULT_MODEL
         self.timeout = timeout if timeout is not None else _DEFAULT_TIMEOUT
+        self.base_url = base_url or None
+        self.api_key = api_key or None
 
     async def generate(self, prompt: str) -> LLMResult:
+        if self.base_url:
+            return await self._generate_http(prompt)
+        return await self._generate_subprocess(prompt)
+
+    async def _generate_http(self, prompt: str) -> LLMResult:
+        import httpx
+
+        url = f"{self.base_url}/api/generate"
+        headers: dict[str, str] = {"Content-Type": "application/json"}
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        payload = {"model": self.model, "prompt": prompt, "stream": False}
+
+        t0 = time.monotonic()
+        try:
+            async with httpx.AsyncClient(timeout=self.timeout) as client:
+                resp = await client.post(url, json=payload, headers=headers)
+        except httpx.TimeoutException as exc:
+            raise LLMTimeout(
+                f"ollama HTTP {self.model} exceeded {self.timeout}s"
+            ) from exc
+        except Exception as exc:  # noqa: BLE001
+            latency_ms = int((time.monotonic() - t0) * 1000)
+            log.warning("ollama HTTP error model=%s exc=%s", self.model, exc)
+            return LLMResult(
+                success=False,
+                text="",
+                model=self.model,
+                latency_ms=latency_ms,
+                extra={"error": str(exc)},
+            )
+
+        latency_ms = int((time.monotonic() - t0) * 1000)
+        if resp.status_code != 200:
+            log.warning(
+                "ollama HTTP non-200 model=%s status=%d body=%r",
+                self.model, resp.status_code, resp.text[:200],
+            )
+            return LLMResult(
+                success=False,
+                text="",
+                model=self.model,
+                latency_ms=latency_ms,
+                extra={"status": resp.status_code, "body": resp.text[:256]},
+            )
+
+        try:
+            data = resp.json()
+            text = data.get("response", "")
+        except Exception:
+            text = resp.text
+
+        if not text.strip():
+            log.warning("ollama HTTP empty response model=%s", self.model)
+            return LLMResult(
+                success=False,
+                text=text,
+                model=self.model,
+                latency_ms=latency_ms,
+                extra={"status": resp.status_code},
+            )
+
+        return LLMResult(
+            success=True,
+            text=text,
+            model=self.model,
+            latency_ms=latency_ms,
+            extra={"status": resp.status_code},
+        )
+
+    async def _generate_subprocess(self, prompt: str) -> LLMResult:
         t0 = time.monotonic()
         try:
             proc = await asyncio.create_subprocess_exec(
diff --git a/tests/realism/llm/__init__.py b/tests/realism/llm/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/realism/llm/test_config.py b/tests/realism/llm/test_config.py
new file mode 100644
index 00000000..c4a289af
--- /dev/null
+++ b/tests/realism/llm/test_config.py
@@ -0,0 +1,159 @@
+"""Tests for decnet.realism.llm.config and the updated factory DB-first path."""
+from __future__ import annotations
+
+import json
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from decnet.realism.llm import config as _cfg_mod
+from decnet.realism.llm import get_llm
+from decnet.realism.llm.impl.fake import FakeBackend
+from decnet.realism.llm.impl.ollama import OllamaBackend
+
+
+# ── LLMConfig validation ──────────────────────────────────────────────────────
+
+
+def test_defaults():
+    c = _cfg_mod.LLMConfig()
+    assert c.provider == "ollama"
+    assert c.base_url is None
+    assert c.model == "llama3.1"
+    assert c.timeout == 60.0
+
+
+def test_base_url_trailing_slash_stripped():
+    c = _cfg_mod.LLMConfig(base_url="http://localhost:11434/")
+    assert c.base_url == "http://localhost:11434"
+
+
+def test_base_url_empty_string_normalised_to_none():
+    c = _cfg_mod.LLMConfig(base_url="")
+    assert c.base_url is None
+
+
+def test_base_url_non_http_rejected():
+    from pydantic import ValidationError
+    with pytest.raises(ValidationError, match="http"):
+        _cfg_mod.LLMConfig(base_url="ollama://localhost")
+
+
+def test_unknown_provider_rejected():
+    from pydantic import ValidationError
+    with pytest.raises(ValidationError):
+        _cfg_mod.LLMConfig(provider="vllm")
+
+
+# ── apply() builds the right backend ─────────────────────────────────────────
+
+
+def test_apply_ollama_no_url():
+    _cfg_mod._cached_backend = None
+    _cfg_mod.apply(_cfg_mod.LLMConfig(provider="ollama", model="phi3"))
+    b = _cfg_mod.get_cached_backend()
+    assert isinstance(b, OllamaBackend)
+    assert b.model == "phi3"
+    assert b.base_url is None
+
+
+def test_apply_ollama_with_url():
+    _cfg_mod._cached_backend = None
+    _cfg_mod.apply(_cfg_mod.LLMConfig(
+        provider="ollama",
+        model="llama3.1",
+        base_url="http://10.0.0.1:11434",
+    ))
+    b = _cfg_mod.get_cached_backend()
+    assert isinstance(b, OllamaBackend)
+    assert b.base_url == "http://10.0.0.1:11434"
+
+
+def test_apply_fake():
+    _cfg_mod._cached_backend = None
+    _cfg_mod.apply(_cfg_mod.LLMConfig(provider="fake"))
+    b = _cfg_mod.get_cached_backend()
+    assert isinstance(b, FakeBackend)
+
+
+def test_apply_ollama_with_api_key(monkeypatch):
+    from cryptography.fernet import Fernet
+    key = Fernet.generate_key().decode()
+    monkeypatch.setenv("DECNET_SECRET_KEY", key)
+    from decnet.web.db.secrets import encrypt_secret
+    ct = encrypt_secret("sk-supersecret")
+    _cfg_mod._cached_backend = None
+    _cfg_mod.apply(_cfg_mod.LLMConfig(provider="ollama", api_key_ciphertext=ct))
+    b = _cfg_mod.get_cached_backend()
+    assert isinstance(b, OllamaBackend)
+    assert b.api_key == "sk-supersecret"
+
+
+# ── load_from_db ──────────────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_load_from_db_returns_none_when_no_row():
+    repo = MagicMock()
+    repo.get_realism_config = AsyncMock(return_value=None)
+    result = await _cfg_mod.load_from_db(repo)
+    assert result is None
+
+
+@pytest.mark.asyncio
+async def test_load_from_db_parses_valid_row():
+    repo = MagicMock()
+    payload = {"provider": "ollama", "model": "qwen2:7b", "timeout": 30}
+    repo.get_realism_config = AsyncMock(
+        return_value={"value": json.dumps(payload)}
+    )
+    result = await _cfg_mod.load_from_db(repo)
+    assert result is not None
+    assert result.model == "qwen2:7b"
+    assert result.timeout == 30.0
+
+
+@pytest.mark.asyncio
+async def test_load_from_db_returns_none_on_bad_json():
+    repo = MagicMock()
+    repo.get_realism_config = AsyncMock(return_value={"value": "not-json{{"})
+    result = await _cfg_mod.load_from_db(repo)
+    assert result is None
+
+
+@pytest.mark.asyncio
+async def test_load_from_db_returns_none_on_db_error():
+    repo = MagicMock()
+    repo.get_realism_config = AsyncMock(side_effect=RuntimeError("db down"))
+    result = await _cfg_mod.load_from_db(repo)
+    assert result is None
+
+
+# ── factory DB-first path ─────────────────────────────────────────────────────
+
+
+def test_factory_uses_cached_backend_when_set():
+    _cfg_mod._cached_backend = None
+    _cfg_mod.apply(_cfg_mod.LLMConfig(provider="fake"))
+    backend = get_llm()
+    assert isinstance(backend, FakeBackend)
+
+
+def test_factory_falls_back_to_env_when_no_cache(monkeypatch):
+    _cfg_mod._cached_backend = None
+    monkeypatch.setenv("DECNET_REALISM_LLM", "ollama")
+    backend = get_llm()
+    assert isinstance(backend, OllamaBackend)
+
+
+def test_factory_model_override_bypasses_cache():
+    _cfg_mod._cached_backend = None
+    _cfg_mod.apply(_cfg_mod.LLMConfig(provider="fake"))
+    # Explicit model override skips the cache and uses env dispatch.
+    monkeypatch = None  # model override makes it fall through to env
+    # With model= set, the fast-path is skipped; falls to env default.
+    import os
+    os.environ.setdefault("DECNET_REALISM_LLM", "ollama")
+    backend = get_llm(model="llama3:8b")
+    assert isinstance(backend, OllamaBackend)
+    assert backend.model == "llama3:8b"