refactor(realism): move emailgen LLM/personas/prompt into shared library

Lift the format-agnostic pieces from decnet/orchestrator/emailgen/
into the new decnet/realism/ library so file-class content generation
(stage 3 of the realism migration) can reuse them. Email-specific
delivery (RFC 2822 EML, IMAP/POP3 spool, thread chains) stays in
orchestrator/.

Renames (history-preserving git mv):
  emailgen/personas.py     -> realism/personas.py
  emailgen/prompt.py       -> realism/prompts/email.py
  emailgen/global_pool.py  -> realism/personas_pool.py
  emailgen/llm/            -> realism/llm/

Env-var clean break (pre-v1, no aliases):
  DECNET_EMAILGEN_LLM      -> DECNET_REALISM_LLM
  DECNET_EMAILGEN_MODEL    -> DECNET_REALISM_MODEL
  DECNET_EMAILGEN_TIMEOUT  -> DECNET_REALISM_TIMEOUT
  DECNET_EMAILGEN_PERSONAS -> DECNET_REALISM_PERSONAS
  DECNET_EMAILGEN_FAKE_OUTPUT -> DECNET_REALISM_FAKE_OUTPUT

Importers rewritten in: orchestrator/emailgen/scheduler.py,
orchestrator/drivers/email.py, web/router/{emailgen,topology}/
api_personas.py, cli/emailgen.py. Tests for moved modules relocated
to tests/realism/; tests for stay-put modules updated in place.

API URL `/api/v1/emailgen/personas` and CLI `decnet emailgen
import-personas` keep their public names until the service-collapse
commit (stage 5).
This commit is contained in:
2026-04-27 16:05:43 -04:00
parent f57c621117
commit 0b9873982d
34 changed files with 455 additions and 298 deletions

View File

@@ -5,10 +5,10 @@ from __future__ import annotations
import pytest
from decnet.orchestrator.drivers import email as email_driver
from decnet.orchestrator.emailgen.llm.base import LLMResult, LLMTimeout
from decnet.orchestrator.emailgen.llm.impl.fake import FakeBackend
from decnet.orchestrator.emailgen.personas import EmailPersona
from decnet.orchestrator.emailgen.scheduler import EmailAction
from decnet.realism.llm.base import LLMResult, LLMTimeout
from decnet.realism.llm.impl.fake import FakeBackend
from decnet.realism.personas import EmailPersona
class _RaisingBackend:

View File

@@ -4,7 +4,7 @@ from __future__ import annotations
from decnet.bus import topics as _topics
from decnet.orchestrator.drivers.base import ActivityResult
from decnet.orchestrator.emailgen import events
from decnet.orchestrator.emailgen.personas import EmailPersona
from decnet.realism.personas import EmailPersona
from decnet.orchestrator.emailgen.scheduler import EmailAction

View File

@@ -1,99 +0,0 @@
"""Global persona pool — disk-backed source for fleet/shard mail deckies."""
from __future__ import annotations
import json
import pytest
from decnet.orchestrator.emailgen import global_pool
@pytest.fixture(autouse=True)
def _reset():
global_pool.reset_cache()
yield
global_pool.reset_cache()
_TWO = [
{
"name": "John Smith",
"email": "john@corp.com",
"role": "COO",
"tone": "formal",
"mannerisms": ["uses 'Best regards'"],
},
{
"name": "Sarah Johnson",
"email": "sarah@corp.com",
"role": "PM",
"tone": "direct",
"mannerisms": ["uses bullets"],
},
]
def test_load_returns_empty_when_file_missing(tmp_path, monkeypatch):
monkeypatch.setenv(
"DECNET_EMAILGEN_PERSONAS", str(tmp_path / "does-not-exist.json")
)
assert global_pool.load() == []
def test_load_returns_parsed_personas(tmp_path, monkeypatch):
f = tmp_path / "personas.json"
f.write_text(json.dumps(_TWO))
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
personas = global_pool.load()
assert len(personas) == 2
assert {p.email for p in personas} == {"john@corp.com", "sarah@corp.com"}
def test_load_resolves_language_default(tmp_path, monkeypatch):
f = tmp_path / "personas.json"
f.write_text(json.dumps(_TWO))
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
personas = global_pool.load(language_default="es")
assert all(p.language == "es" for p in personas)
def test_load_invalid_json_returns_empty(tmp_path, monkeypatch):
f = tmp_path / "personas.json"
f.write_text("{not valid")
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
assert global_pool.load() == []
def test_load_caches_until_mtime_changes(tmp_path, monkeypatch):
f = tmp_path / "personas.json"
f.write_text(json.dumps(_TWO))
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
first = global_pool.load()
assert len(first) == 2
# Re-write with a single persona; bump mtime so the cache invalidates.
import time as _time
_time.sleep(0.01)
f.write_text(json.dumps(_TWO[:1]))
import os
os.utime(f, None)
second = global_pool.load()
assert len(second) == 1
def test_resolve_path_honours_env_override(tmp_path, monkeypatch):
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(tmp_path / "x.json"))
assert global_pool.resolve_path() == tmp_path / "x.json"
def test_resolve_path_falls_back_to_user_path_when_system_missing(monkeypatch):
monkeypatch.delenv("DECNET_EMAILGEN_PERSONAS", raising=False)
# In a typical dev box /etc/decnet/ doesn't exist; the resolver
# should pick ~/.decnet/email_personas.json.
p = global_pool.resolve_path()
# We don't assert the exact path (depends on whether /etc/decnet
# exists on the test host), only that it ends with the canonical
# filename and isn't an empty path.
assert p.name == "email_personas.json"

View File

@@ -1,137 +0,0 @@
"""LLM backend factory + Ollama implementation."""
from __future__ import annotations
import asyncio
import pytest
from decnet.orchestrator.emailgen.llm import LLMTimeout, get_llm
from decnet.orchestrator.emailgen.llm.impl.fake import FakeBackend
from decnet.orchestrator.emailgen.llm.impl.ollama import OllamaBackend
# ── factory dispatch ─────────────────────────────────────────────────────────
def test_factory_default_is_ollama(monkeypatch):
monkeypatch.delenv("DECNET_EMAILGEN_LLM", raising=False)
backend = get_llm()
assert isinstance(backend, OllamaBackend)
def test_factory_selects_fake(monkeypatch):
monkeypatch.setenv("DECNET_EMAILGEN_LLM", "fake")
backend = get_llm()
assert isinstance(backend, FakeBackend)
def test_factory_unknown_raises(monkeypatch):
monkeypatch.setenv("DECNET_EMAILGEN_LLM", "vllm-someday")
with pytest.raises(ValueError, match="Unsupported"):
get_llm()
def test_factory_passes_model_through(monkeypatch):
monkeypatch.setenv("DECNET_EMAILGEN_LLM", "ollama")
backend = get_llm(model="qwen2:7b")
assert backend.model == "qwen2:7b"
# ── FakeBackend ──────────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_fake_backend_returns_canned_output():
fb = FakeBackend(output="Subject: hi\n\nbody")
result = await fb.generate("any prompt")
assert result.success is True
assert result.text.startswith("Subject:")
assert result.model == "fake-model"
@pytest.mark.asyncio
async def test_fake_backend_can_simulate_failure():
fb = FakeBackend(success=False)
result = await fb.generate("prompt")
assert result.success is False
assert result.text == ""
# ── OllamaBackend (subprocess stubbed) ───────────────────────────────────────
@pytest.mark.asyncio
async def test_ollama_backend_success(monkeypatch):
"""Stub asyncio.create_subprocess_exec to return canned stdout."""
class _StubProc:
returncode = 0
async def communicate(self, _stdin):
return b"Subject: hi\n\nbody\n", b""
async def fake_create(*args, **kwargs): # noqa: ARG001
return _StubProc()
monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
backend = OllamaBackend(model="m1", timeout=1.0)
result = await backend.generate("hello")
assert result.success is True
assert "Subject:" in result.text
assert result.model == "m1"
@pytest.mark.asyncio
async def test_ollama_backend_non_zero_rc_marks_failure(monkeypatch):
class _StubProc:
returncode = 1
async def communicate(self, _stdin):
return b"", b"model not found"
async def fake_create(*args, **kwargs): # noqa: ARG001
return _StubProc()
monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
backend = OllamaBackend(model="m1", timeout=1.0)
result = await backend.generate("hello")
assert result.success is False
assert result.extra["rc"] == 1
assert "model not found" in result.extra["stderr"]
@pytest.mark.asyncio
async def test_ollama_backend_timeout_raises(monkeypatch):
class _StubProc:
returncode = None
async def communicate(self, _stdin):
await asyncio.sleep(10) # well past the timeout
return b"", b""
def kill(self):
pass
async def fake_create(*args, **kwargs): # noqa: ARG001
return _StubProc()
monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
backend = OllamaBackend(model="m1", timeout=0.05)
with pytest.raises(LLMTimeout):
await backend.generate("hello")
@pytest.mark.asyncio
async def test_ollama_backend_missing_binary_returns_failure(monkeypatch):
async def fake_create(*args, **kwargs): # noqa: ARG001
raise FileNotFoundError("ollama: not found")
monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
backend = OllamaBackend(model="m1", timeout=1.0)
result = await backend.generate("hello")
assert result.success is False
assert result.extra["rc"] == 127

View File

@@ -1,101 +0,0 @@
"""Persona schema parsing + active-hours window tests."""
from __future__ import annotations
import json
from decnet.orchestrator.emailgen.personas import (
EmailPersona,
in_active_hours,
parse_personas,
)
def _persona(**over) -> dict:
base = {
"name": "John Smith",
"email": "john@corp.com",
"role": "COO",
"tone": "formal",
"mannerisms": ["uses 'Best regards'"],
}
base.update(over)
return base
def test_parse_empty_inputs():
assert parse_personas(None) == []
assert parse_personas("") == []
assert parse_personas([]) == []
def test_parse_invalid_json_returns_empty_no_raise():
assert parse_personas("{not json") == []
def test_parse_invalid_top_level_shape_returns_empty():
assert parse_personas('{"not": "a list"}') == []
def test_parse_drops_invalid_entry_keeps_valid():
raw = json.dumps([
_persona(),
{"name": "broken", "email": "not-an-email"},
_persona(name="Sarah", email="sarah@corp.com"),
])
parsed = parse_personas(raw)
assert len(parsed) == 2
assert {p.name for p in parsed} == {"John Smith", "Sarah"}
def test_parse_resolves_language_default_when_unset():
raw = json.dumps([_persona()])
parsed = parse_personas(raw, language_default="es")
assert parsed[0].language == "es"
def test_parse_persona_language_overrides_default():
raw = json.dumps([_persona(language="pt")])
parsed = parse_personas(raw, language_default="es")
assert parsed[0].language == "pt"
def test_parse_accepts_python_list_directly():
parsed = parse_personas([_persona()])
assert len(parsed) == 1
def test_uses_llms_heavily_default_false():
parsed = parse_personas([_persona()])
assert parsed[0].uses_llms_heavily is False
def test_uses_llms_heavily_can_be_set():
parsed = parse_personas([_persona(uses_llms_heavily=True)])
assert parsed[0].uses_llms_heavily is True
def test_active_hours_normal_window():
p = EmailPersona(**_persona(active_hours="09:00-18:00"))
assert in_active_hours(p, 12) is True
assert in_active_hours(p, 8) is False
assert in_active_hours(p, 18) is False
assert in_active_hours(p, 9) is True
def test_active_hours_wraparound_window():
p = EmailPersona(**_persona(active_hours="22:00-06:00"))
assert in_active_hours(p, 23) is True
assert in_active_hours(p, 0) is True
assert in_active_hours(p, 5) is True
assert in_active_hours(p, 7) is False
def test_active_hours_malformed_treats_as_always_on():
p = EmailPersona(**_persona(active_hours="garbage"))
assert in_active_hours(p, 0) is True
assert in_active_hours(p, 23) is True
def test_active_hours_equal_window_treated_as_always_on():
p = EmailPersona(**_persona(active_hours="10:00-10:00"))
assert in_active_hours(p, 5) is True

View File

@@ -1,152 +0,0 @@
"""Prompt builder behaviour: language constraint, em-dash suppression,
deterministic mannerism injection."""
from __future__ import annotations
import random
from decnet.orchestrator.emailgen.personas import EmailPersona
from decnet.orchestrator.emailgen.prompt import (
PromptInputs,
build,
select_mannerisms,
)
def _persona(**over) -> EmailPersona:
base = dict(
name="John Smith",
email="john@corp.com",
role="COO",
tone="formal",
mannerisms=[
"opens with 'I hope this finds you well'",
"uses 'Best regards' exclusively",
"references policy by number",
"ccs legal",
],
language="en",
)
base.update(over)
return EmailPersona(**base)
class _SeededRng:
"""Adapter so prompt code thinks it has a SystemRandom."""
def __init__(self, seed: int):
self._r = random.Random(seed)
def shuffle(self, seq):
self._r.shuffle(seq)
def random(self):
return self._r.random()
def choice(self, seq):
return self._r.choice(seq)
def test_select_mannerisms_returns_subset_of_pool():
persona = _persona()
picks = select_mannerisms(persona, rng=_SeededRng(0), n=2)
assert len(picks) == 2
assert all(m in persona.mannerisms for m in picks)
def test_select_mannerisms_deterministic_under_same_seed():
persona = _persona()
a = select_mannerisms(persona, rng=_SeededRng(42), n=2)
b = select_mannerisms(persona, rng=_SeededRng(42), n=2)
assert a == b
def test_select_mannerisms_returns_all_when_pool_smaller_than_n():
persona = _persona(mannerisms=["a"])
picks = select_mannerisms(persona, rng=_SeededRng(0), n=2)
assert picks == ["a"]
def test_select_mannerisms_empty_pool():
persona = _persona(mannerisms=[])
assert select_mannerisms(persona) == []
def test_build_includes_language_constraint_english():
sender = _persona(language="en")
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(0),
)
assert "in English" in prompt
def test_build_includes_language_constraint_spanish():
sender = _persona(language="es")
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(0),
)
assert "in Spanish" in prompt
def test_build_em_dash_suppression_default():
sender = _persona()
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(0),
)
assert "Do NOT use em-dashes" in prompt
def test_build_em_dash_lifted_for_llm_heavy_persona():
sender = _persona(uses_llms_heavily=True)
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(0),
)
assert "Do NOT use em-dashes" not in prompt
assert "fine" in prompt.lower()
def test_build_reply_thread_block_prefixes_re():
sender = _persona()
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(
sender=sender,
recipient=recip,
context_hint="budget",
parent_subject="Re: Q3 budget",
parent_excerpt="Numbers attached.",
),
rng=_SeededRng(0),
)
assert "REPLY in an ongoing thread" in prompt
assert "Re: Q3 budget" in prompt
assert "Numbers attached" in prompt
assert "prefixed with 'Re: '" in prompt
def test_build_returns_mannerisms_used_metadata():
sender = _persona()
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
_, used = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(7),
)
assert used
assert all(m in sender.mannerisms for m in used)
def test_build_uses_explicit_signature_when_provided():
sender = _persona(signature="-- John\\nCOO")
recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
prompt, _ = build(
PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
rng=_SeededRng(0),
)
assert "Use this exact signature block" in prompt

View File

@@ -7,7 +7,8 @@ from typing import Any
import pytest
from decnet.orchestrator.emailgen import global_pool, scheduler
from decnet.orchestrator.emailgen import scheduler
from decnet.realism import personas_pool as global_pool
@pytest.fixture(autouse=True)
@@ -147,7 +148,7 @@ async def test_pick_for_fleet_source_uses_global_pool(tmp_path, monkeypatch):
personas come from the host-wide JSON file."""
pool_file = tmp_path / "personas.json"
pool_file.write_text(json.dumps(_PERSONAS_TWO))
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(pool_file))
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(pool_file))
repo = _FakeRepo(
deckies=[_decky(source="fleet", topology_id=None)],
@@ -163,7 +164,7 @@ async def test_pick_for_shard_source_uses_global_pool(tmp_path, monkeypatch):
"""SWARM shards are non-topology too — same path as fleet."""
pool_file = tmp_path / "personas.json"
pool_file.write_text(json.dumps(_PERSONAS_TWO))
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(pool_file))
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(pool_file))
repo = _FakeRepo(
deckies=[_decky(source="shard", topology_id=None)],
@@ -174,7 +175,7 @@ async def test_pick_for_shard_source_uses_global_pool(tmp_path, monkeypatch):
@pytest.mark.asyncio
async def test_pick_fleet_with_empty_global_pool_returns_none(tmp_path, monkeypatch):
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(tmp_path / "missing.json"))
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(tmp_path / "missing.json"))
repo = _FakeRepo(deckies=[_decky(source="fleet", topology_id=None)])
assert await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0)) is None
@@ -191,7 +192,7 @@ async def test_topology_personas_isolated_from_global_pool(tmp_path, monkeypatch
"tone": "casual",
"mannerisms": [],
}]))
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(pool_file))
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(pool_file))
repo = _FakeRepo(
deckies=[_decky()],

View File

@@ -10,8 +10,8 @@ import pytest_asyncio
from decnet.bus.fake import FakeBus
from decnet.orchestrator.drivers import email as email_driver
from decnet.orchestrator.emailgen import worker as eg_worker
from decnet.orchestrator.emailgen.llm.impl.fake import FakeBackend
from decnet.orchestrator.emailgen.scheduler import EmailAction # noqa: F401
from decnet.realism.llm.impl.fake import FakeBackend
from decnet.web.db.models import Topology, TopologyDecky
from decnet.web.db.sqlite.repository import SQLiteRepository