feat(realism/llm): DB-backed LLMConfig, factory DB-first dispatch, Ollama HTTP mode
This commit is contained in:
123
decnet/realism/llm/config.py
Normal file
123
decnet/realism/llm/config.py
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
"""DB-backed LLM provider configuration for the realism subsystem.
|
||||||
|
|
||||||
|
The module holds a process-level cached backend that callers obtain via
|
||||||
|
:func:`decnet.realism.llm.factory.get_llm`. The cache is populated by:
|
||||||
|
|
||||||
|
* The API process: :func:`load_from_db` called on first GET, then
|
||||||
|
``apply`` on each successful PUT.
|
||||||
|
* The orchestrator worker: :func:`load_from_db` called on the same
|
||||||
|
periodic tick that refreshes planner weights.
|
||||||
|
|
||||||
|
``get_llm()`` falls back to the env-var path when the cache is ``None``
|
||||||
|
(i.e. the DB row does not exist yet or has never been loaded).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field, field_validator
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
|
||||||
|
log = get_logger("realism.llm.config")
|
||||||
|
|
||||||
|
_SUPPORTED_PROVIDERS = {"ollama", "fake"}
|
||||||
|
_HTTP_RE = re.compile(r"^https?://", re.IGNORECASE)
|
||||||
|
|
||||||
|
# Process-level singleton — rebuilt by apply(), read by get_llm().
|
||||||
|
_cached_backend: Optional[Any] = None
|
||||||
|
|
||||||
|
_CONFIG_KEY = "llm"
|
||||||
|
|
||||||
|
|
||||||
|
class LLMConfig(BaseModel):
|
||||||
|
"""Operator-tunable LLM provider settings stored in ``realism_config``."""
|
||||||
|
|
||||||
|
provider: str = Field(default="ollama")
|
||||||
|
base_url: Optional[str] = Field(default=None)
|
||||||
|
model: str = Field(default="llama3.1")
|
||||||
|
timeout: float = Field(default=60.0, gt=0)
|
||||||
|
# Never returned to callers — encrypted Fernet token, write-only.
|
||||||
|
api_key_ciphertext: Optional[str] = Field(default=None)
|
||||||
|
|
||||||
|
@field_validator("provider")
|
||||||
|
@classmethod
|
||||||
|
def _validate_provider(cls, v: str) -> str:
|
||||||
|
if v not in _SUPPORTED_PROVIDERS:
|
||||||
|
raise ValueError(
|
||||||
|
f"provider must be one of {sorted(_SUPPORTED_PROVIDERS)}, got {v!r}"
|
||||||
|
)
|
||||||
|
return v
|
||||||
|
|
||||||
|
@field_validator("base_url")
|
||||||
|
@classmethod
|
||||||
|
def _validate_base_url(cls, v: Optional[str]) -> Optional[str]:
|
||||||
|
if v is None or v == "":
|
||||||
|
return None
|
||||||
|
if not _HTTP_RE.match(v):
|
||||||
|
raise ValueError("base_url must start with http:// or https://")
|
||||||
|
return v.rstrip("/")
|
||||||
|
|
||||||
|
|
||||||
|
def get_cached_backend() -> Optional[Any]:
|
||||||
|
"""Return the cached LLMBackend, or ``None`` if not yet hydrated."""
|
||||||
|
return _cached_backend
|
||||||
|
|
||||||
|
|
||||||
|
def apply(cfg: LLMConfig) -> None:
|
||||||
|
"""Build a backend from *cfg* and install it as the process cache.
|
||||||
|
|
||||||
|
Existing circuit-breaker state is NOT reset — don't wipe a tripped
|
||||||
|
breaker just because the operator tuned a URL.
|
||||||
|
"""
|
||||||
|
global _cached_backend
|
||||||
|
|
||||||
|
if cfg.provider == "fake":
|
||||||
|
from decnet.realism.llm.impl.fake import FakeBackend
|
||||||
|
_cached_backend = FakeBackend(model="fake-model")
|
||||||
|
log.info("realism.llm.config: applied provider=fake")
|
||||||
|
return
|
||||||
|
|
||||||
|
if cfg.provider == "ollama":
|
||||||
|
api_key: Optional[str] = None
|
||||||
|
if cfg.api_key_ciphertext:
|
||||||
|
from decnet.web.db.secrets import decrypt_secret
|
||||||
|
api_key = decrypt_secret(cfg.api_key_ciphertext)
|
||||||
|
|
||||||
|
from decnet.realism.llm.impl.ollama import OllamaBackend
|
||||||
|
_cached_backend = OllamaBackend(
|
||||||
|
model=cfg.model,
|
||||||
|
timeout=cfg.timeout,
|
||||||
|
base_url=cfg.base_url,
|
||||||
|
api_key=api_key,
|
||||||
|
)
|
||||||
|
log.info(
|
||||||
|
"realism.llm.config: applied provider=ollama model=%s base_url=%s",
|
||||||
|
cfg.model, cfg.base_url or "(subprocess)",
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
raise ValueError(f"apply: unsupported provider {cfg.provider!r}")
|
||||||
|
|
||||||
|
|
||||||
|
async def load_from_db(repo: Any) -> Optional[LLMConfig]:
|
||||||
|
"""Load the ``key='llm'`` RealismConfig row and return a parsed config.
|
||||||
|
|
||||||
|
Returns ``None`` when the row doesn't exist or the JSON is malformed;
|
||||||
|
callers fall back to env-var defaults in both cases.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
row = await repo.get_realism_config(_CONFIG_KEY)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.warning("realism.llm.config: DB read failed: %s", exc)
|
||||||
|
return None
|
||||||
|
if row is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
data = json.loads(row.get("value") or "{}")
|
||||||
|
return LLMConfig(**data)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.warning("realism.llm.config: malformed config row: %s", exc)
|
||||||
|
return None
|
||||||
@@ -24,7 +24,14 @@ from decnet.realism.llm.base import LLMBackend
|
|||||||
|
|
||||||
|
|
||||||
def get_llm(*, model: str | None = None, **kwargs: Any) -> LLMBackend:
|
def get_llm(*, model: str | None = None, **kwargs: Any) -> LLMBackend:
|
||||||
"""Instantiate the LLM backend selected by environment.
|
"""Instantiate the LLM backend selected by DB config or environment.
|
||||||
|
|
||||||
|
Resolution order:
|
||||||
|
1. Process-level cached backend (populated by the DB config row via
|
||||||
|
:func:`decnet.realism.llm.config.apply`). Returned as-is when
|
||||||
|
*model* and *kwargs* are both absent — the common case.
|
||||||
|
2. Env-var path (``DECNET_REALISM_LLM`` / ``DECNET_REALISM_MODEL`` /
|
||||||
|
``DECNET_REALISM_TIMEOUT``) — legacy / default-install fallback.
|
||||||
|
|
||||||
*model* (when provided) overrides whatever the backend's own default
|
*model* (when provided) overrides whatever the backend's own default
|
||||||
is — e.g. for :class:`OllamaBackend` that's ``llama3.1`` unless
|
is — e.g. for :class:`OllamaBackend` that's ``llama3.1`` unless
|
||||||
@@ -32,6 +39,13 @@ def get_llm(*, model: str | None = None, **kwargs: Any) -> LLMBackend:
|
|||||||
``decnet orchestrate --model gpt-oss`` without each backend having
|
``decnet orchestrate --model gpt-oss`` without each backend having
|
||||||
to know about CLI flags.
|
to know about CLI flags.
|
||||||
"""
|
"""
|
||||||
|
# Fast path: DB-configured cached backend.
|
||||||
|
if model is None and not kwargs:
|
||||||
|
from decnet.realism.llm.config import get_cached_backend
|
||||||
|
cached = get_cached_backend()
|
||||||
|
if cached is not None:
|
||||||
|
return cached
|
||||||
|
|
||||||
backend_key = os.environ.get("DECNET_REALISM_LLM", "ollama").lower()
|
backend_key = os.environ.get("DECNET_REALISM_LLM", "ollama").lower()
|
||||||
|
|
||||||
if backend_key == "ollama":
|
if backend_key == "ollama":
|
||||||
|
|||||||
@@ -1,18 +1,17 @@
|
|||||||
"""Ollama subprocess backend.
|
"""Ollama backend — subprocess (local) or HTTP (remote).
|
||||||
|
|
||||||
Shells out to ``ollama run <model>`` with the prompt fed via stdin.
|
**Subprocess mode** (default, ``base_url=None``)
|
||||||
|
Shells out to ``ollama run <model>`` with the prompt on stdin.
|
||||||
|
Works on any host where Ollama is reachable however it's bound —
|
||||||
|
unix socket, unusual TCP port, remote-mount — because ``ollama run``
|
||||||
|
resolves all of that transparently.
|
||||||
|
|
||||||
Why subprocess and not the Ollama HTTP API:
|
**HTTP mode** (``base_url`` set, e.g. ``http://10.0.0.1:11434``)
|
||||||
* No new dependency (``ollama`` Python lib is optional).
|
POSTs to ``{base_url}/api/generate`` via httpx (non-streaming).
|
||||||
* Works on hosts where Ollama is bound to a unix socket, an unusual TCP
|
Required when targeting a remote Ollama daemon. ``api_key`` is sent
|
||||||
port, or behind a remote-mount layer — `ollama run` resolves all that.
|
as ``Authorization: Bearer`` when provided (for reverse-proxy setups).
|
||||||
* Same path the operator uses by hand (``ollama run llama3.1``); easier
|
No shell metacharacters ever reach the network call — base_url is
|
||||||
to debug discrepancies between worker output and a console session.
|
validated by :class:`decnet.realism.llm.config.LLMConfig` before storage.
|
||||||
|
|
||||||
Cost: per-call process spawn (~50ms on a warm box). Acceptable for
|
|
||||||
realism tick rates (one body per ~5 minutes per persona by default).
|
|
||||||
When that cost matters, swap to an HTTP-API backend; the seam is in
|
|
||||||
:mod:`decnet.realism.llm.factory`.
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@@ -32,18 +31,93 @@ _DEFAULT_TIMEOUT = float(os.environ.get("DECNET_REALISM_TIMEOUT", "60"))
|
|||||||
|
|
||||||
|
|
||||||
class OllamaBackend(LLMBackend):
|
class OllamaBackend(LLMBackend):
|
||||||
"""Concrete :class:`LLMBackend` that shells out to ``ollama run``."""
|
"""Concrete :class:`LLMBackend` for Ollama — subprocess or HTTP."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
model: Optional[str] = None,
|
model: Optional[str] = None,
|
||||||
timeout: Optional[float] = None,
|
timeout: Optional[float] = None,
|
||||||
|
base_url: Optional[str] = None,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.model = model or _DEFAULT_MODEL
|
self.model = model or _DEFAULT_MODEL
|
||||||
self.timeout = timeout if timeout is not None else _DEFAULT_TIMEOUT
|
self.timeout = timeout if timeout is not None else _DEFAULT_TIMEOUT
|
||||||
|
self.base_url = base_url or None
|
||||||
|
self.api_key = api_key or None
|
||||||
|
|
||||||
async def generate(self, prompt: str) -> LLMResult:
|
async def generate(self, prompt: str) -> LLMResult:
|
||||||
|
if self.base_url:
|
||||||
|
return await self._generate_http(prompt)
|
||||||
|
return await self._generate_subprocess(prompt)
|
||||||
|
|
||||||
|
async def _generate_http(self, prompt: str) -> LLMResult:
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
url = f"{self.base_url}/api/generate"
|
||||||
|
headers: dict[str, str] = {"Content-Type": "application/json"}
|
||||||
|
if self.api_key:
|
||||||
|
headers["Authorization"] = f"Bearer {self.api_key}"
|
||||||
|
payload = {"model": self.model, "prompt": prompt, "stream": False}
|
||||||
|
|
||||||
|
t0 = time.monotonic()
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||||
|
resp = await client.post(url, json=payload, headers=headers)
|
||||||
|
except httpx.TimeoutException as exc:
|
||||||
|
raise LLMTimeout(
|
||||||
|
f"ollama HTTP {self.model} exceeded {self.timeout}s"
|
||||||
|
) from exc
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
log.warning("ollama HTTP error model=%s exc=%s", self.model, exc)
|
||||||
|
return LLMResult(
|
||||||
|
success=False,
|
||||||
|
text="",
|
||||||
|
model=self.model,
|
||||||
|
latency_ms=latency_ms,
|
||||||
|
extra={"error": str(exc)},
|
||||||
|
)
|
||||||
|
|
||||||
|
latency_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
log.warning(
|
||||||
|
"ollama HTTP non-200 model=%s status=%d body=%r",
|
||||||
|
self.model, resp.status_code, resp.text[:200],
|
||||||
|
)
|
||||||
|
return LLMResult(
|
||||||
|
success=False,
|
||||||
|
text="",
|
||||||
|
model=self.model,
|
||||||
|
latency_ms=latency_ms,
|
||||||
|
extra={"status": resp.status_code, "body": resp.text[:256]},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = resp.json()
|
||||||
|
text = data.get("response", "")
|
||||||
|
except Exception:
|
||||||
|
text = resp.text
|
||||||
|
|
||||||
|
if not text.strip():
|
||||||
|
log.warning("ollama HTTP empty response model=%s", self.model)
|
||||||
|
return LLMResult(
|
||||||
|
success=False,
|
||||||
|
text=text,
|
||||||
|
model=self.model,
|
||||||
|
latency_ms=latency_ms,
|
||||||
|
extra={"status": resp.status_code},
|
||||||
|
)
|
||||||
|
|
||||||
|
return LLMResult(
|
||||||
|
success=True,
|
||||||
|
text=text,
|
||||||
|
model=self.model,
|
||||||
|
latency_ms=latency_ms,
|
||||||
|
extra={"status": resp.status_code},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _generate_subprocess(self, prompt: str) -> LLMResult:
|
||||||
t0 = time.monotonic()
|
t0 = time.monotonic()
|
||||||
try:
|
try:
|
||||||
proc = await asyncio.create_subprocess_exec(
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
|||||||
0
tests/realism/llm/__init__.py
Normal file
0
tests/realism/llm/__init__.py
Normal file
159
tests/realism/llm/test_config.py
Normal file
159
tests/realism/llm/test_config.py
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
"""Tests for decnet.realism.llm.config and the updated factory DB-first path."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from decnet.realism.llm import config as _cfg_mod
|
||||||
|
from decnet.realism.llm import get_llm
|
||||||
|
from decnet.realism.llm.impl.fake import FakeBackend
|
||||||
|
from decnet.realism.llm.impl.ollama import OllamaBackend
|
||||||
|
|
||||||
|
|
||||||
|
# ── LLMConfig validation ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_defaults():
|
||||||
|
c = _cfg_mod.LLMConfig()
|
||||||
|
assert c.provider == "ollama"
|
||||||
|
assert c.base_url is None
|
||||||
|
assert c.model == "llama3.1"
|
||||||
|
assert c.timeout == 60.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_base_url_trailing_slash_stripped():
|
||||||
|
c = _cfg_mod.LLMConfig(base_url="http://localhost:11434/")
|
||||||
|
assert c.base_url == "http://localhost:11434"
|
||||||
|
|
||||||
|
|
||||||
|
def test_base_url_empty_string_normalised_to_none():
|
||||||
|
c = _cfg_mod.LLMConfig(base_url="")
|
||||||
|
assert c.base_url is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_base_url_non_http_rejected():
|
||||||
|
from pydantic import ValidationError
|
||||||
|
with pytest.raises(ValidationError, match="http"):
|
||||||
|
_cfg_mod.LLMConfig(base_url="ollama://localhost")
|
||||||
|
|
||||||
|
|
||||||
|
def test_unknown_provider_rejected():
|
||||||
|
from pydantic import ValidationError
|
||||||
|
with pytest.raises(ValidationError):
|
||||||
|
_cfg_mod.LLMConfig(provider="vllm")
|
||||||
|
|
||||||
|
|
||||||
|
# ── apply() builds the right backend ─────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_ollama_no_url():
|
||||||
|
_cfg_mod._cached_backend = None
|
||||||
|
_cfg_mod.apply(_cfg_mod.LLMConfig(provider="ollama", model="phi3"))
|
||||||
|
b = _cfg_mod.get_cached_backend()
|
||||||
|
assert isinstance(b, OllamaBackend)
|
||||||
|
assert b.model == "phi3"
|
||||||
|
assert b.base_url is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_ollama_with_url():
|
||||||
|
_cfg_mod._cached_backend = None
|
||||||
|
_cfg_mod.apply(_cfg_mod.LLMConfig(
|
||||||
|
provider="ollama",
|
||||||
|
model="llama3.1",
|
||||||
|
base_url="http://10.0.0.1:11434",
|
||||||
|
))
|
||||||
|
b = _cfg_mod.get_cached_backend()
|
||||||
|
assert isinstance(b, OllamaBackend)
|
||||||
|
assert b.base_url == "http://10.0.0.1:11434"
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_fake():
|
||||||
|
_cfg_mod._cached_backend = None
|
||||||
|
_cfg_mod.apply(_cfg_mod.LLMConfig(provider="fake"))
|
||||||
|
b = _cfg_mod.get_cached_backend()
|
||||||
|
assert isinstance(b, FakeBackend)
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_ollama_with_api_key(monkeypatch):
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
key = Fernet.generate_key().decode()
|
||||||
|
monkeypatch.setenv("DECNET_SECRET_KEY", key)
|
||||||
|
from decnet.web.db.secrets import encrypt_secret
|
||||||
|
ct = encrypt_secret("sk-supersecret")
|
||||||
|
_cfg_mod._cached_backend = None
|
||||||
|
_cfg_mod.apply(_cfg_mod.LLMConfig(provider="ollama", api_key_ciphertext=ct))
|
||||||
|
b = _cfg_mod.get_cached_backend()
|
||||||
|
assert isinstance(b, OllamaBackend)
|
||||||
|
assert b.api_key == "sk-supersecret"
|
||||||
|
|
||||||
|
|
||||||
|
# ── load_from_db ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_load_from_db_returns_none_when_no_row():
|
||||||
|
repo = MagicMock()
|
||||||
|
repo.get_realism_config = AsyncMock(return_value=None)
|
||||||
|
result = await _cfg_mod.load_from_db(repo)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_load_from_db_parses_valid_row():
|
||||||
|
repo = MagicMock()
|
||||||
|
payload = {"provider": "ollama", "model": "qwen2:7b", "timeout": 30}
|
||||||
|
repo.get_realism_config = AsyncMock(
|
||||||
|
return_value={"value": json.dumps(payload)}
|
||||||
|
)
|
||||||
|
result = await _cfg_mod.load_from_db(repo)
|
||||||
|
assert result is not None
|
||||||
|
assert result.model == "qwen2:7b"
|
||||||
|
assert result.timeout == 30.0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_load_from_db_returns_none_on_bad_json():
|
||||||
|
repo = MagicMock()
|
||||||
|
repo.get_realism_config = AsyncMock(return_value={"value": "not-json{{"})
|
||||||
|
result = await _cfg_mod.load_from_db(repo)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_load_from_db_returns_none_on_db_error():
|
||||||
|
repo = MagicMock()
|
||||||
|
repo.get_realism_config = AsyncMock(side_effect=RuntimeError("db down"))
|
||||||
|
result = await _cfg_mod.load_from_db(repo)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
# ── factory DB-first path ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_factory_uses_cached_backend_when_set():
|
||||||
|
_cfg_mod._cached_backend = None
|
||||||
|
_cfg_mod.apply(_cfg_mod.LLMConfig(provider="fake"))
|
||||||
|
backend = get_llm()
|
||||||
|
assert isinstance(backend, FakeBackend)
|
||||||
|
|
||||||
|
|
||||||
|
def test_factory_falls_back_to_env_when_no_cache(monkeypatch):
|
||||||
|
_cfg_mod._cached_backend = None
|
||||||
|
monkeypatch.setenv("DECNET_REALISM_LLM", "ollama")
|
||||||
|
backend = get_llm()
|
||||||
|
assert isinstance(backend, OllamaBackend)
|
||||||
|
|
||||||
|
|
||||||
|
def test_factory_model_override_bypasses_cache():
|
||||||
|
_cfg_mod._cached_backend = None
|
||||||
|
_cfg_mod.apply(_cfg_mod.LLMConfig(provider="fake"))
|
||||||
|
# Explicit model override skips the cache and uses env dispatch.
|
||||||
|
monkeypatch = None # model override makes it fall through to env
|
||||||
|
# With model= set, the fast-path is skipped; falls to env default.
|
||||||
|
import os
|
||||||
|
os.environ.setdefault("DECNET_REALISM_LLM", "ollama")
|
||||||
|
backend = get_llm(model="llama3:8b")
|
||||||
|
assert isinstance(backend, OllamaBackend)
|
||||||
|
assert backend.model == "llama3:8b"
|
||||||
Reference in New Issue
Block a user