refactor(realism): move emailgen LLM/personas/prompt into shared library

Lift the format-agnostic pieces from decnet/orchestrator/emailgen/ into the new decnet/realism/ library so file-class content generation (stage 3 of the realism migration) can reuse them. Email-specific delivery (RFC 2822 EML, IMAP/POP3 spool, thread chains) stays in orchestrator/. Renames (history-preserving git mv): emailgen/personas.py -> realism/personas.py emailgen/prompt.py -> realism/prompts/email.py emailgen/global_pool.py -> realism/personas_pool.py emailgen/llm/ -> realism/llm/ Env-var clean break (pre-v1, no aliases): DECNET_EMAILGEN_LLM -> DECNET_REALISM_LLM DECNET_EMAILGEN_MODEL -> DECNET_REALISM_MODEL DECNET_EMAILGEN_TIMEOUT -> DECNET_REALISM_TIMEOUT DECNET_EMAILGEN_PERSONAS -> DECNET_REALISM_PERSONAS DECNET_EMAILGEN_FAKE_OUTPUT -> DECNET_REALISM_FAKE_OUTPUT Importers rewritten in: orchestrator/emailgen/scheduler.py, orchestrator/drivers/email.py, web/router/{emailgen,topology}/ api_personas.py, cli/emailgen.py. Tests for moved modules relocated to tests/realism/; tests for stay-put modules updated in place. API URL `/api/v1/emailgen/personas` and CLI `decnet emailgen import-personas` keep their public names until the service-collapse commit (stage 5).
2026-04-27 16:05:43 -04:00
parent f57c621117
commit 0b9873982d
34 changed files with 455 additions and 298 deletions
--- a/tests/orchestrator/emailgen/test_driver.py
+++ b/tests/orchestrator/emailgen/test_driver.py
@@ -5,10 +5,10 @@ from __future__ import annotations
 import pytest

 from decnet.orchestrator.drivers import email as email_driver
-from decnet.orchestrator.emailgen.llm.base import LLMResult, LLMTimeout
-from decnet.orchestrator.emailgen.llm.impl.fake import FakeBackend
-from decnet.orchestrator.emailgen.personas import EmailPersona
 from decnet.orchestrator.emailgen.scheduler import EmailAction
+from decnet.realism.llm.base import LLMResult, LLMTimeout
+from decnet.realism.llm.impl.fake import FakeBackend
+from decnet.realism.personas import EmailPersona


 class _RaisingBackend:
--- a/tests/orchestrator/emailgen/test_events.py
+++ b/tests/orchestrator/emailgen/test_events.py
@@ -4,7 +4,7 @@ from __future__ import annotations
 from decnet.bus import topics as _topics
 from decnet.orchestrator.drivers.base import ActivityResult
 from decnet.orchestrator.emailgen import events
-from decnet.orchestrator.emailgen.personas import EmailPersona
+from decnet.realism.personas import EmailPersona
 from decnet.orchestrator.emailgen.scheduler import EmailAction


--- a/tests/orchestrator/emailgen/test_global_pool.py
+++ b/tests/orchestrator/emailgen/test_global_pool.py
@@ -1,99 +0,0 @@
-"""Global persona pool — disk-backed source for fleet/shard mail deckies."""
-from __future__ import annotations
-
-import json
-
-import pytest
-
-from decnet.orchestrator.emailgen import global_pool
-
-
-@pytest.fixture(autouse=True)
-def _reset():
-    global_pool.reset_cache()
-    yield
-    global_pool.reset_cache()
-
-
-_TWO = [
-    {
-        "name": "John Smith",
-        "email": "john@corp.com",
-        "role": "COO",
-        "tone": "formal",
-        "mannerisms": ["uses 'Best regards'"],
-    },
-    {
-        "name": "Sarah Johnson",
-        "email": "sarah@corp.com",
-        "role": "PM",
-        "tone": "direct",
-        "mannerisms": ["uses bullets"],
-    },
-]
-
-
-def test_load_returns_empty_when_file_missing(tmp_path, monkeypatch):
-    monkeypatch.setenv(
-        "DECNET_EMAILGEN_PERSONAS", str(tmp_path / "does-not-exist.json")
-    )
-    assert global_pool.load() == []
-
-
-def test_load_returns_parsed_personas(tmp_path, monkeypatch):
-    f = tmp_path / "personas.json"
-    f.write_text(json.dumps(_TWO))
-    monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
-    personas = global_pool.load()
-    assert len(personas) == 2
-    assert {p.email for p in personas} == {"john@corp.com", "sarah@corp.com"}
-
-
-def test_load_resolves_language_default(tmp_path, monkeypatch):
-    f = tmp_path / "personas.json"
-    f.write_text(json.dumps(_TWO))
-    monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
-    personas = global_pool.load(language_default="es")
-    assert all(p.language == "es" for p in personas)
-
-
-def test_load_invalid_json_returns_empty(tmp_path, monkeypatch):
-    f = tmp_path / "personas.json"
-    f.write_text("{not valid")
-    monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
-    assert global_pool.load() == []
-
-
-def test_load_caches_until_mtime_changes(tmp_path, monkeypatch):
-    f = tmp_path / "personas.json"
-    f.write_text(json.dumps(_TWO))
-    monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
-
-    first = global_pool.load()
-    assert len(first) == 2
-
-    # Re-write with a single persona; bump mtime so the cache invalidates.
-    import time as _time
-    _time.sleep(0.01)
-    f.write_text(json.dumps(_TWO[:1]))
-    import os
-    os.utime(f, None)
-
-    second = global_pool.load()
-    assert len(second) == 1
-
-
-def test_resolve_path_honours_env_override(tmp_path, monkeypatch):
-    monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(tmp_path / "x.json"))
-    assert global_pool.resolve_path() == tmp_path / "x.json"
-
-
-def test_resolve_path_falls_back_to_user_path_when_system_missing(monkeypatch):
-    monkeypatch.delenv("DECNET_EMAILGEN_PERSONAS", raising=False)
-    # In a typical dev box /etc/decnet/ doesn't exist; the resolver
-    # should pick ~/.decnet/email_personas.json.
-    p = global_pool.resolve_path()
-    # We don't assert the exact path (depends on whether /etc/decnet
-    # exists on the test host), only that it ends with the canonical
-    # filename and isn't an empty path.
-    assert p.name == "email_personas.json"
--- a/tests/orchestrator/emailgen/test_llm.py
+++ b/tests/orchestrator/emailgen/test_llm.py
@@ -1,137 +0,0 @@
-"""LLM backend factory + Ollama implementation."""
-from __future__ import annotations
-
-import asyncio
-
-import pytest
-
-from decnet.orchestrator.emailgen.llm import LLMTimeout, get_llm
-from decnet.orchestrator.emailgen.llm.impl.fake import FakeBackend
-from decnet.orchestrator.emailgen.llm.impl.ollama import OllamaBackend
-
-
-# ── factory dispatch ─────────────────────────────────────────────────────────
-
-
-def test_factory_default_is_ollama(monkeypatch):
-    monkeypatch.delenv("DECNET_EMAILGEN_LLM", raising=False)
-    backend = get_llm()
-    assert isinstance(backend, OllamaBackend)
-
-
-def test_factory_selects_fake(monkeypatch):
-    monkeypatch.setenv("DECNET_EMAILGEN_LLM", "fake")
-    backend = get_llm()
-    assert isinstance(backend, FakeBackend)
-
-
-def test_factory_unknown_raises(monkeypatch):
-    monkeypatch.setenv("DECNET_EMAILGEN_LLM", "vllm-someday")
-    with pytest.raises(ValueError, match="Unsupported"):
-        get_llm()
-
-
-def test_factory_passes_model_through(monkeypatch):
-    monkeypatch.setenv("DECNET_EMAILGEN_LLM", "ollama")
-    backend = get_llm(model="qwen2:7b")
-    assert backend.model == "qwen2:7b"
-
-
-# ── FakeBackend ──────────────────────────────────────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_fake_backend_returns_canned_output():
-    fb = FakeBackend(output="Subject: hi\n\nbody")
-    result = await fb.generate("any prompt")
-    assert result.success is True
-    assert result.text.startswith("Subject:")
-    assert result.model == "fake-model"
-
-
-@pytest.mark.asyncio
-async def test_fake_backend_can_simulate_failure():
-    fb = FakeBackend(success=False)
-    result = await fb.generate("prompt")
-    assert result.success is False
-    assert result.text == ""
-
-
-# ── OllamaBackend (subprocess stubbed) ───────────────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_ollama_backend_success(monkeypatch):
-    """Stub asyncio.create_subprocess_exec to return canned stdout."""
-
-    class _StubProc:
-        returncode = 0
-
-        async def communicate(self, _stdin):
-            return b"Subject: hi\n\nbody\n", b""
-
-    async def fake_create(*args, **kwargs):    # noqa: ARG001
-        return _StubProc()
-
-    monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
-
-    backend = OllamaBackend(model="m1", timeout=1.0)
-    result = await backend.generate("hello")
-    assert result.success is True
-    assert "Subject:" in result.text
-    assert result.model == "m1"
-
-
-@pytest.mark.asyncio
-async def test_ollama_backend_non_zero_rc_marks_failure(monkeypatch):
-    class _StubProc:
-        returncode = 1
-
-        async def communicate(self, _stdin):
-            return b"", b"model not found"
-
-    async def fake_create(*args, **kwargs):    # noqa: ARG001
-        return _StubProc()
-
-    monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
-
-    backend = OllamaBackend(model="m1", timeout=1.0)
-    result = await backend.generate("hello")
-    assert result.success is False
-    assert result.extra["rc"] == 1
-    assert "model not found" in result.extra["stderr"]
-
-
-@pytest.mark.asyncio
-async def test_ollama_backend_timeout_raises(monkeypatch):
-    class _StubProc:
-        returncode = None
-
-        async def communicate(self, _stdin):
-            await asyncio.sleep(10)    # well past the timeout
-            return b"", b""
-
-        def kill(self):
-            pass
-
-    async def fake_create(*args, **kwargs):    # noqa: ARG001
-        return _StubProc()
-
-    monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
-
-    backend = OllamaBackend(model="m1", timeout=0.05)
-    with pytest.raises(LLMTimeout):
-        await backend.generate("hello")
-
-
-@pytest.mark.asyncio
-async def test_ollama_backend_missing_binary_returns_failure(monkeypatch):
-    async def fake_create(*args, **kwargs):    # noqa: ARG001
-        raise FileNotFoundError("ollama: not found")
-
-    monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create)
-
-    backend = OllamaBackend(model="m1", timeout=1.0)
-    result = await backend.generate("hello")
-    assert result.success is False
-    assert result.extra["rc"] == 127
--- a/tests/orchestrator/emailgen/test_personas.py
+++ b/tests/orchestrator/emailgen/test_personas.py
@@ -1,101 +0,0 @@
-"""Persona schema parsing + active-hours window tests."""
-from __future__ import annotations
-
-import json
-
-from decnet.orchestrator.emailgen.personas import (
-    EmailPersona,
-    in_active_hours,
-    parse_personas,
-)
-
-
-def _persona(**over) -> dict:
-    base = {
-        "name": "John Smith",
-        "email": "john@corp.com",
-        "role": "COO",
-        "tone": "formal",
-        "mannerisms": ["uses 'Best regards'"],
-    }
-    base.update(over)
-    return base
-
-
-def test_parse_empty_inputs():
-    assert parse_personas(None) == []
-    assert parse_personas("") == []
-    assert parse_personas([]) == []
-
-
-def test_parse_invalid_json_returns_empty_no_raise():
-    assert parse_personas("{not json") == []
-
-
-def test_parse_invalid_top_level_shape_returns_empty():
-    assert parse_personas('{"not": "a list"}') == []
-
-
-def test_parse_drops_invalid_entry_keeps_valid():
-    raw = json.dumps([
-        _persona(),
-        {"name": "broken", "email": "not-an-email"},
-        _persona(name="Sarah", email="sarah@corp.com"),
-    ])
-    parsed = parse_personas(raw)
-    assert len(parsed) == 2
-    assert {p.name for p in parsed} == {"John Smith", "Sarah"}
-
-
-def test_parse_resolves_language_default_when_unset():
-    raw = json.dumps([_persona()])
-    parsed = parse_personas(raw, language_default="es")
-    assert parsed[0].language == "es"
-
-
-def test_parse_persona_language_overrides_default():
-    raw = json.dumps([_persona(language="pt")])
-    parsed = parse_personas(raw, language_default="es")
-    assert parsed[0].language == "pt"
-
-
-def test_parse_accepts_python_list_directly():
-    parsed = parse_personas([_persona()])
-    assert len(parsed) == 1
-
-
-def test_uses_llms_heavily_default_false():
-    parsed = parse_personas([_persona()])
-    assert parsed[0].uses_llms_heavily is False
-
-
-def test_uses_llms_heavily_can_be_set():
-    parsed = parse_personas([_persona(uses_llms_heavily=True)])
-    assert parsed[0].uses_llms_heavily is True
-
-
-def test_active_hours_normal_window():
-    p = EmailPersona(**_persona(active_hours="09:00-18:00"))
-    assert in_active_hours(p, 12) is True
-    assert in_active_hours(p, 8) is False
-    assert in_active_hours(p, 18) is False
-    assert in_active_hours(p, 9) is True
-
-
-def test_active_hours_wraparound_window():
-    p = EmailPersona(**_persona(active_hours="22:00-06:00"))
-    assert in_active_hours(p, 23) is True
-    assert in_active_hours(p, 0) is True
-    assert in_active_hours(p, 5) is True
-    assert in_active_hours(p, 7) is False
-
-
-def test_active_hours_malformed_treats_as_always_on():
-    p = EmailPersona(**_persona(active_hours="garbage"))
-    assert in_active_hours(p, 0) is True
-    assert in_active_hours(p, 23) is True
-
-
-def test_active_hours_equal_window_treated_as_always_on():
-    p = EmailPersona(**_persona(active_hours="10:00-10:00"))
-    assert in_active_hours(p, 5) is True
--- a/tests/orchestrator/emailgen/test_prompt.py
+++ b/tests/orchestrator/emailgen/test_prompt.py
@@ -1,152 +0,0 @@
-"""Prompt builder behaviour: language constraint, em-dash suppression,
-deterministic mannerism injection."""
-from __future__ import annotations
-
-import random
-
-from decnet.orchestrator.emailgen.personas import EmailPersona
-from decnet.orchestrator.emailgen.prompt import (
-    PromptInputs,
-    build,
-    select_mannerisms,
-)
-
-
-def _persona(**over) -> EmailPersona:
-    base = dict(
-        name="John Smith",
-        email="john@corp.com",
-        role="COO",
-        tone="formal",
-        mannerisms=[
-            "opens with 'I hope this finds you well'",
-            "uses 'Best regards' exclusively",
-            "references policy by number",
-            "ccs legal",
-        ],
-        language="en",
-    )
-    base.update(over)
-    return EmailPersona(**base)
-
-
-class _SeededRng:
-    """Adapter so prompt code thinks it has a SystemRandom."""
-
-    def __init__(self, seed: int):
-        self._r = random.Random(seed)
-
-    def shuffle(self, seq):
-        self._r.shuffle(seq)
-
-    def random(self):
-        return self._r.random()
-
-    def choice(self, seq):
-        return self._r.choice(seq)
-
-
-def test_select_mannerisms_returns_subset_of_pool():
-    persona = _persona()
-    picks = select_mannerisms(persona, rng=_SeededRng(0), n=2)
-    assert len(picks) == 2
-    assert all(m in persona.mannerisms for m in picks)
-
-
-def test_select_mannerisms_deterministic_under_same_seed():
-    persona = _persona()
-    a = select_mannerisms(persona, rng=_SeededRng(42), n=2)
-    b = select_mannerisms(persona, rng=_SeededRng(42), n=2)
-    assert a == b
-
-
-def test_select_mannerisms_returns_all_when_pool_smaller_than_n():
-    persona = _persona(mannerisms=["a"])
-    picks = select_mannerisms(persona, rng=_SeededRng(0), n=2)
-    assert picks == ["a"]
-
-
-def test_select_mannerisms_empty_pool():
-    persona = _persona(mannerisms=[])
-    assert select_mannerisms(persona) == []
-
-
-def test_build_includes_language_constraint_english():
-    sender = _persona(language="en")
-    recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
-    prompt, _ = build(
-        PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
-        rng=_SeededRng(0),
-    )
-    assert "in English" in prompt
-
-
-def test_build_includes_language_constraint_spanish():
-    sender = _persona(language="es")
-    recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
-    prompt, _ = build(
-        PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
-        rng=_SeededRng(0),
-    )
-    assert "in Spanish" in prompt
-
-
-def test_build_em_dash_suppression_default():
-    sender = _persona()
-    recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
-    prompt, _ = build(
-        PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
-        rng=_SeededRng(0),
-    )
-    assert "Do NOT use em-dashes" in prompt
-
-
-def test_build_em_dash_lifted_for_llm_heavy_persona():
-    sender = _persona(uses_llms_heavily=True)
-    recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
-    prompt, _ = build(
-        PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
-        rng=_SeededRng(0),
-    )
-    assert "Do NOT use em-dashes" not in prompt
-    assert "fine" in prompt.lower()
-
-
-def test_build_reply_thread_block_prefixes_re():
-    sender = _persona()
-    recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
-    prompt, _ = build(
-        PromptInputs(
-            sender=sender,
-            recipient=recip,
-            context_hint="budget",
-            parent_subject="Re: Q3 budget",
-            parent_excerpt="Numbers attached.",
-        ),
-        rng=_SeededRng(0),
-    )
-    assert "REPLY in an ongoing thread" in prompt
-    assert "Re: Q3 budget" in prompt
-    assert "Numbers attached" in prompt
-    assert "prefixed with 'Re: '" in prompt
-
-
-def test_build_returns_mannerisms_used_metadata():
-    sender = _persona()
-    recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
-    _, used = build(
-        PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
-        rng=_SeededRng(7),
-    )
-    assert used
-    assert all(m in sender.mannerisms for m in used)
-
-
-def test_build_uses_explicit_signature_when_provided():
-    sender = _persona(signature="-- John\\nCOO")
-    recip = _persona(name="Sarah", email="sarah@corp.com", role="PM")
-    prompt, _ = build(
-        PromptInputs(sender=sender, recipient=recip, context_hint="budget"),
-        rng=_SeededRng(0),
-    )
-    assert "Use this exact signature block" in prompt
--- a/tests/orchestrator/emailgen/test_scheduler.py
+++ b/tests/orchestrator/emailgen/test_scheduler.py
@@ -7,7 +7,8 @@ from typing import Any

 import pytest

-from decnet.orchestrator.emailgen import global_pool, scheduler
+from decnet.orchestrator.emailgen import scheduler
+from decnet.realism import personas_pool as global_pool


@pytest.fixture(autouse=True)
@@ -147,7 +148,7 @@ async def test_pick_for_fleet_source_uses_global_pool(tmp_path, monkeypatch):
    personas come from the host-wide JSON file."""
    pool_file = tmp_path / "personas.json"
    pool_file.write_text(json.dumps(_PERSONAS_TWO))
-    monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(pool_file))
+    monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(pool_file))

    repo = _FakeRepo(
        deckies=[_decky(source="fleet", topology_id=None)],
@@ -163,7 +164,7 @@ async def test_pick_for_shard_source_uses_global_pool(tmp_path, monkeypatch):
    """SWARM shards are non-topology too — same path as fleet."""
    pool_file = tmp_path / "personas.json"
    pool_file.write_text(json.dumps(_PERSONAS_TWO))
-    monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(pool_file))
+    monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(pool_file))

    repo = _FakeRepo(
        deckies=[_decky(source="shard", topology_id=None)],
@@ -174,7 +175,7 @@ async def test_pick_for_shard_source_uses_global_pool(tmp_path, monkeypatch):

@pytest.mark.asyncio
 async def test_pick_fleet_with_empty_global_pool_returns_none(tmp_path, monkeypatch):
-    monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(tmp_path / "missing.json"))
+    monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(tmp_path / "missing.json"))
    repo = _FakeRepo(deckies=[_decky(source="fleet", topology_id=None)])
    assert await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0)) is None

@@ -191,7 +192,7 @@ async def test_topology_personas_isolated_from_global_pool(tmp_path, monkeypatch
        "tone": "casual",
        "mannerisms": [],
    }]))
-    monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(pool_file))
+    monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(pool_file))

    repo = _FakeRepo(
        deckies=[_decky()],
--- a/tests/orchestrator/emailgen/test_worker_integration.py
+++ b/tests/orchestrator/emailgen/test_worker_integration.py
@@ -10,8 +10,8 @@ import pytest_asyncio
 from decnet.bus.fake import FakeBus
 from decnet.orchestrator.drivers import email as email_driver
 from decnet.orchestrator.emailgen import worker as eg_worker
-from decnet.orchestrator.emailgen.llm.impl.fake import FakeBackend
 from decnet.orchestrator.emailgen.scheduler import EmailAction  # noqa: F401
+from decnet.realism.llm.impl.fake import FakeBackend
 from decnet.web.db.models import Topology, TopologyDecky
 from decnet.web.db.sqlite.repository import SQLiteRepository