refactor(realism): move emailgen LLM/personas/prompt into shared library

Lift the format-agnostic pieces from decnet/orchestrator/emailgen/
into the new decnet/realism/ library so file-class content generation
(stage 3 of the realism migration) can reuse them. Email-specific
delivery (RFC 2822 EML, IMAP/POP3 spool, thread chains) stays in
orchestrator/.

Renames (history-preserving git mv):
  emailgen/personas.py     -> realism/personas.py
  emailgen/prompt.py       -> realism/prompts/email.py
  emailgen/global_pool.py  -> realism/personas_pool.py
  emailgen/llm/            -> realism/llm/

Env-var clean break (pre-v1, no aliases):
  DECNET_EMAILGEN_LLM      -> DECNET_REALISM_LLM
  DECNET_EMAILGEN_MODEL    -> DECNET_REALISM_MODEL
  DECNET_EMAILGEN_TIMEOUT  -> DECNET_REALISM_TIMEOUT
  DECNET_EMAILGEN_PERSONAS -> DECNET_REALISM_PERSONAS
  DECNET_EMAILGEN_FAKE_OUTPUT -> DECNET_REALISM_FAKE_OUTPUT

Importers rewritten in: orchestrator/emailgen/scheduler.py,
orchestrator/drivers/email.py, web/router/{emailgen,topology}/
api_personas.py, cli/emailgen.py. Tests for moved modules relocated
to tests/realism/; tests for stay-put modules updated in place.

API URL `/api/v1/emailgen/personas` and CLI `decnet emailgen
import-personas` keep their public names until the service-collapse
commit (stage 5).
This commit is contained in:
2026-04-27 16:05:43 -04:00
parent f57c621117
commit 0b9873982d
34 changed files with 455 additions and 298 deletions

View File

@@ -58,7 +58,7 @@ def test_in_work_hours_equal_start_end_means_always_on() -> None:
)
def test_malformed_window_fails_open(garbage: str) -> None:
# The fleet must not silence on a typo — same fail-open semantics
# as decnet.orchestrator.emailgen.personas.in_active_hours.
# as decnet.realism.personas.in_active_hours.
assert in_work_hours(garbage, _NOW) is True

View File

@@ -0,0 +1,152 @@
"""Prompt builder behaviour: language constraint, em-dash suppression,
deterministic mannerism injection."""
from __future__ import annotations
import random
from decnet.realism.personas import EmailPersona
from decnet.realism.prompts.email import (
PromptInputs,
build,
select_mannerisms,
)
def _persona(**over) -> EmailPersona:
base = dict(
name="John Smith",
email="john@corp.com",
role="COO",
tone="formal",
mannerisms=[
"opens with 'I hope this finds you well'",
"uses 'Best regards' exclusively",
"references policy by number",
"ccs legal",
],
language="en",
)
base.update(over)
return EmailPersona(**base)
class _SeededRng:
"""Adapter so prompt code thinks it has a SystemRandom."""
def __init__(self, seed: int):
self._r = random.Random(seed)
def shuffle(self, seq):
self._r.shuffle(seq)
def random(self):
return self._r.random()
def choice(self, seq):
return self._r.choice(seq)
def test_select_mannerisms_returns_subset_of_pool():
persona = _persona()
picks = select_mannerisms(persona, rng=_SeededRng(0), n=2)
assert len(picks) == 2
assert all(m in persona.mannerisms for m in picks)
def test_select_mannerisms_deterministic_under_same_seed():
persona = _persona()
a = select_mannerisms(persona, rng=_SeededRng(42), n=2)
b = select_mannerisms(persona, rng=_SeededRng(42), n=2)
assert a == b
def test_select_mannerisms_returns_all_when_pool_smaller_than_n():
persona = _persona(mannerisms=["a"])
picks = select_mannerisms(persona, rng=_SeededRng(0), n=2)
assert picks == ["a"]
def test_select_mannerisms_empty_pool():
persona = _persona(mannerisms=[])
assert select_mannerisms(persona) == []
def test_build_includes_language_constraint_english():
sender = _persona(language="en")
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(0),
)
assert "in English" in prompt
def test_build_includes_language_constraint_spanish():
sender = _persona(language="es")
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(0),
)
assert "in Spanish" in prompt
def test_build_em_dash_suppression_default():
sender = _persona()
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(0),
)
assert "Do NOT use em-dashes" in prompt
def test_build_em_dash_lifted_for_llm_heavy_persona():
sender = _persona(uses_llms_heavily=True)
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(0),
)
assert "Do NOT use em-dashes" not in prompt
assert "fine" in prompt.lower()
def test_build_reply_thread_block_prefixes_re():
sender = _persona()
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(
sender=sender,
recipient=recip,
context_hint="budget",
parent_subject="Re: Q3 budget",
parent_excerpt="Numbers attached.",
),
rng=_SeededRng(0),
)
assert "REPLY in an ongoing thread" in prompt
assert "Re: Q3 budget" in prompt
assert "Numbers attached" in prompt
assert "prefixed with 'Re: '" in prompt
def test_build_returns_mannerisms_used_metadata():
sender = _persona()
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
_, used = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(7),
)
assert used
assert all(m in sender.mannerisms for m in used)
def test_build_uses_explicit_signature_when_provided():
sender = _persona(signature="-- John\\nCOO")
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(0),
)
assert "Use this exact signature block" in prompt

137
tests/realism/test_llm.py Normal file
View File

@@ -0,0 +1,137 @@
"""LLM backend factory + Ollama implementation."""
from __future__ import annotations
import asyncio
import pytest
from decnet.realism.llm import LLMTimeout, get_llm
from decnet.realism.llm.impl.fake import FakeBackend
from decnet.realism.llm.impl.ollama import OllamaBackend
# ── factory dispatch ─────────────────────────────────────────────────────────
def test_factory_default_is_ollama(monkeypatch):
monkeypatch.delenv("DECNET_REALISM_LLM", raising=False)
backend = get_llm()
assert isinstance(backend, OllamaBackend)
def test_factory_selects_fake(monkeypatch):
monkeypatch.setenv("DECNET_REALISM_LLM", "fake")
backend = get_llm()
assert isinstance(backend, FakeBackend)
def test_factory_unknown_raises(monkeypatch):
monkeypatch.setenv("DECNET_REALISM_LLM", "vllm-someday")
with pytest.raises(ValueError, match="Unsupported"):
get_llm()
def test_factory_passes_model_through(monkeypatch):
monkeypatch.setenv("DECNET_REALISM_LLM", "ollama")
backend = get_llm(model="qwen2:7b")
assert backend.model == "qwen2:7b"
# ── FakeBackend ──────────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_fake_backend_returns_canned_output():
fb = FakeBackend(output="Subject: hi\n\nbody")
result = await fb.generate("any prompt")
assert result.success is True
assert result.text.startswith("Subject:")
assert result.model == "fake-model"
@pytest.mark.asyncio
async def test_fake_backend_can_simulate_failure():
fb = FakeBackend(success=False)
result = await fb.generate("prompt")
assert result.success is False
assert result.text == ""
# ── OllamaBackend (subprocess stubbed) ───────────────────────────────────────
@pytest.mark.asyncio
async def test_ollama_backend_success(monkeypatch):
"""Stub asyncio.create_subprocess_exec to return canned stdout."""
class _StubProc:
returncode = 0
async def communicate(self, _stdin):
return b"Subject: hi\n\nbody\n", b""
async def fake_create(*args, **kwargs): # noqa: ARG001
return _StubProc()
monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
backend = OllamaBackend(model="m1", timeout=1.0)
result = await backend.generate("hello")
assert result.success is True
assert "Subject:" in result.text
assert result.model == "m1"
@pytest.mark.asyncio
async def test_ollama_backend_non_zero_rc_marks_failure(monkeypatch):
class _StubProc:
returncode = 1
async def communicate(self, _stdin):
return b"", b"model not found"
async def fake_create(*args, **kwargs): # noqa: ARG001
return _StubProc()
monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
backend = OllamaBackend(model="m1", timeout=1.0)
result = await backend.generate("hello")
assert result.success is False
assert result.extra["rc"] == 1
assert "model not found" in result.extra["stderr"]
@pytest.mark.asyncio
async def test_ollama_backend_timeout_raises(monkeypatch):
class _StubProc:
returncode = None
async def communicate(self, _stdin):
await asyncio.sleep(10) # well past the timeout
return b"", b""
def kill(self):
pass
async def fake_create(*args, **kwargs): # noqa: ARG001
return _StubProc()
monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
backend = OllamaBackend(model="m1", timeout=0.05)
with pytest.raises(LLMTimeout):
await backend.generate("hello")
@pytest.mark.asyncio
async def test_ollama_backend_missing_binary_returns_failure(monkeypatch):
async def fake_create(*args, **kwargs): # noqa: ARG001
raise FileNotFoundError("ollama: not found")
monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
backend = OllamaBackend(model="m1", timeout=1.0)
result = await backend.generate("hello")
assert result.success is False
assert result.extra["rc"] == 127

View File

@@ -0,0 +1,101 @@
"""Persona schema parsing + active-hours window tests."""
from __future__ import annotations
import json
from decnet.realism.personas import (
EmailPersona,
in_active_hours,
parse_personas,
)
def _persona(**over) -> dict:
base = {
"name": "John Smith",
"email": "john@corp.com",
"role": "COO",
"tone": "formal",
"mannerisms": ["uses 'Best regards'"],
}
base.update(over)
return base
def test_parse_empty_inputs():
assert parse_personas(None) == []
assert parse_personas("") == []
assert parse_personas([]) == []
def test_parse_invalid_json_returns_empty_no_raise():
assert parse_personas("{not json") == []
def test_parse_invalid_top_level_shape_returns_empty():
assert parse_personas('{"not": "a list"}') == []
def test_parse_drops_invalid_entry_keeps_valid():
raw = json.dumps([
_persona(),
{"name": "broken", "email": "not-an-email"},
_persona(name="Sarah", email="sarah@corp.com"),
])
parsed = parse_personas(raw)
assert len(parsed) == 2
assert {p.name for p in parsed} == {"John Smith", "Sarah"}
def test_parse_resolves_language_default_when_unset():
raw = json.dumps([_persona()])
parsed = parse_personas(raw, language_default="es")
assert parsed[0].language == "es"
def test_parse_persona_language_overrides_default():
raw = json.dumps([_persona(language="pt")])
parsed = parse_personas(raw, language_default="es")
assert parsed[0].language == "pt"
def test_parse_accepts_python_list_directly():
parsed = parse_personas([_persona()])
assert len(parsed) == 1
def test_uses_llms_heavily_default_false():
parsed = parse_personas([_persona()])
assert parsed[0].uses_llms_heavily is False
def test_uses_llms_heavily_can_be_set():
parsed = parse_personas([_persona(uses_llms_heavily=True)])
assert parsed[0].uses_llms_heavily is True
def test_active_hours_normal_window():
p = EmailPersona(**_persona(active_hours="09:00-18:00"))
assert in_active_hours(p, 12) is True
assert in_active_hours(p, 8) is False
assert in_active_hours(p, 18) is False
assert in_active_hours(p, 9) is True
def test_active_hours_wraparound_window():
p = EmailPersona(**_persona(active_hours="22:00-06:00"))
assert in_active_hours(p, 23) is True
assert in_active_hours(p, 0) is True
assert in_active_hours(p, 5) is True
assert in_active_hours(p, 7) is False
def test_active_hours_malformed_treats_as_always_on():
p = EmailPersona(**_persona(active_hours="garbage"))
assert in_active_hours(p, 0) is True
assert in_active_hours(p, 23) is True
def test_active_hours_equal_window_treated_as_always_on():
p = EmailPersona(**_persona(active_hours="10:00-10:00"))
assert in_active_hours(p, 5) is True

View File

@@ -0,0 +1,99 @@
"""Global persona pool — disk-backed source for fleet/shard mail deckies."""
from __future__ import annotations
import json
import pytest
from decnet.realism import personas_pool as global_pool
@pytest.fixture(autouse=True)
def _reset():
global_pool.reset_cache()
yield
global_pool.reset_cache()
_TWO = [
{
"name": "John Smith",
"email": "john@corp.com",
"role": "COO",
"tone": "formal",
"mannerisms": ["uses 'Best regards'"],
},
{
"name": "Sarah Johnson",
"email": "sarah@corp.com",
"role": "PM",
"tone": "direct",
"mannerisms": ["uses bullets"],
},
]
def test_load_returns_empty_when_file_missing(tmp_path, monkeypatch):
monkeypatch.setenv(
"DECNET_REALISM_PERSONAS", str(tmp_path / "does-not-exist.json")
)
assert global_pool.load() == []
def test_load_returns_parsed_personas(tmp_path, monkeypatch):
f = tmp_path / "personas.json"
f.write_text(json.dumps(_TWO))
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(f))
personas = global_pool.load()
assert len(personas) == 2
assert {p.email for p in personas} == {"john@corp.com", "sarah@corp.com"}
def test_load_resolves_language_default(tmp_path, monkeypatch):
f = tmp_path / "personas.json"
f.write_text(json.dumps(_TWO))
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(f))
personas = global_pool.load(language_default="es")
assert all(p.language == "es" for p in personas)
def test_load_invalid_json_returns_empty(tmp_path, monkeypatch):
f = tmp_path / "personas.json"
f.write_text("{not valid")
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(f))
assert global_pool.load() == []
def test_load_caches_until_mtime_changes(tmp_path, monkeypatch):
f = tmp_path / "personas.json"
f.write_text(json.dumps(_TWO))
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(f))
first = global_pool.load()
assert len(first) == 2
# Re-write with a single persona; bump mtime so the cache invalidates.
import time as _time
_time.sleep(0.01)
f.write_text(json.dumps(_TWO[:1]))
import os
os.utime(f, None)
second = global_pool.load()
assert len(second) == 1
def test_resolve_path_honours_env_override(tmp_path, monkeypatch):
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(tmp_path / "x.json"))
assert global_pool.resolve_path() == tmp_path / "x.json"
def test_resolve_path_falls_back_to_user_path_when_system_missing(monkeypatch):
monkeypatch.delenv("DECNET_REALISM_PERSONAS", raising=False)
# In a typical dev box /etc/decnet/ doesn't exist; the resolver
# should pick ~/.decnet/email_personas.json.
p = global_pool.resolve_path()
# We don't assert the exact path (depends on whether /etc/decnet
# exists on the test host), only that it ends with the canonical
# filename and isn't an empty path.
assert p.name == "email_personas.json"