Files
DECNET/decnet/realism/llm/base.py

48 lines
1.4 KiB
Python

"""Backend protocol shared by every LLM transport.
Deliberately narrow: realism consumers need one async ``generate``
call that takes a prompt string and returns the model's output text
plus enough metadata to populate per-event payloads (model name,
latency, success bit). Streaming, embeddings, multi-turn chat — all
out of scope here; realism only ever does one-shot single-prompt
generations.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Protocol
class LLMTimeout(Exception):
"""Raised when a generation exceeds the backend's wall-clock cap.
Backends MUST raise this rather than returning silently empty
output; the driver discriminates timeout from "model produced
nothing useful" so payloads carry the right ``stage`` value.
"""
@dataclass
class LLMResult:
"""Outcome of one ``generate`` call.
``success`` is ``False`` when the backend ran cleanly but produced
no usable output (e.g. an empty stdout). Hard failures (subprocess
crash, network error) raise; soft failures land here so the driver
can persist + log them as one event.
"""
success: bool
text: str
model: str
latency_ms: int
extra: dict[str, Any] = field(default_factory=dict)
class LLMBackend(Protocol):
"""Minimal contract for a realism LLM provider."""
model: str
timeout: float
async def generate(self, prompt: str) -> LLMResult: ...