merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

View File

@@ -0,0 +1,215 @@
"""EmailDriver: inject a fake LLM backend + stub docker exec; verify
EML parse-and-repair and payload metadata."""
from __future__ import annotations
import pytest
from decnet.orchestrator.drivers import email as email_driver
from decnet.orchestrator.emailgen.scheduler import EmailAction
from decnet.realism.llm.base import LLMResult, LLMTimeout
from decnet.realism.llm.impl.fake import FakeBackend
from decnet.realism.personas import EmailPersona
class _RaisingBackend:
"""Async stub that raises LLMTimeout on every call."""
model = "stuck-model"
timeout = 0.1
async def generate(self, prompt: str) -> LLMResult: # noqa: ARG002
raise LLMTimeout("stuck")
class _FailingBackend:
"""Async stub that returns success=False."""
model = "broken-model"
timeout = 1.0
async def generate(self, prompt: str) -> LLMResult: # noqa: ARG002
return LLMResult(
success=False,
text="",
model=self.model,
latency_ms=5,
extra={"rc": 1, "stderr": "model not found"},
)
def _persona(name="John", email="john@corp.com"):
return EmailPersona(
name=name,
email=email,
role="COO",
tone="formal",
mannerisms=["uses 'Best regards'"],
language="en",
)
def _action(is_reply=False):
return EmailAction(
mail_decky_uuid="d1",
mail_decky_name="mailhost",
mail_decky_services=("imap",),
sender=_persona(),
recipient=_persona(name="Sarah", email="sarah@corp.com"),
thread_id="thr1",
parent_message_id="<old@corp.com>" if is_reply else None,
references="" if not is_reply else "<old@corp.com>",
subject_hint="Re: budget" if is_reply else None,
parent_excerpt=None,
context_hint="Q3 budget" if not is_reply else "Re: budget",
is_reply=is_reply,
)
def test_parse_subject_and_body_extracts_subject_line():
out = "Subject: Quick update\n\nHi Sarah,\nNumbers attached.\n"
subject, body = email_driver._parse_subject_and_body(out)
assert subject == "Quick update"
assert body.startswith("Hi Sarah")
def test_parse_subject_strips_code_fences():
out = "```\nSubject: Quick update\n\nbody\n```\n"
subject, body = email_driver._parse_subject_and_body(out)
assert subject == "Quick update"
assert body == "body"
def test_parse_subject_falls_back_when_missing():
out = "Just a body, no subject\n"
subject, body = email_driver._parse_subject_and_body(out)
assert subject == "Business Communication"
assert "body" in body.lower()
def test_build_eml_includes_required_headers():
from datetime import datetime, timezone
eml = email_driver._build_eml(
sender_name="John",
sender_email="john@corp.com",
recipient_name="Sarah",
recipient_email="sarah@corp.com",
subject="Q3 budget",
body="Hi Sarah,\nNumbers attached.",
message_id="<m1@corp.com>",
in_reply_to=None,
references="",
ts=datetime(2026, 4, 26, 12, 0, tzinfo=timezone.utc),
).decode("utf-8")
assert "From: John <john@corp.com>" in eml
assert "To: Sarah <sarah@corp.com>" in eml
assert "Subject: Q3 budget" in eml
assert "Message-ID: <m1@corp.com>" in eml
assert "MIME-Version: 1.0" in eml
assert "In-Reply-To" not in eml
def test_build_eml_threads_carry_in_reply_to_and_references():
from datetime import datetime, timezone
eml = email_driver._build_eml(
sender_name="John",
sender_email="john@corp.com",
recipient_name="Sarah",
recipient_email="sarah@corp.com",
subject="Re: Q3",
body="Following up.",
message_id="<m2@corp.com>",
in_reply_to="<m1@corp.com>",
references="<m1@corp.com>",
ts=datetime(2026, 4, 26, 12, 0, tzinfo=timezone.utc),
).decode("utf-8")
assert "In-Reply-To: <m1@corp.com>" in eml
assert "References: <m1@corp.com>" in eml
def test_container_for_imap_takes_priority():
assert email_driver._container_for("mailhost", ["imap", "pop3"]) == "mailhost-imap"
def test_container_for_pop3_only():
assert email_driver._container_for("mailhost", ["pop3"]) == "mailhost-pop3"
@pytest.mark.asyncio
async def test_driver_run_success_path(monkeypatch):
"""Inject a FakeBackend + stub docker exec; success end-to-end."""
docker_calls: list[list[str]] = []
async def fake_run_capture(argv, *, stdin_data=None, timeout=8.0):
docker_calls.append(list(argv))
return 0, "", ""
monkeypatch.setattr(email_driver, "_run_capture", fake_run_capture)
llm = FakeBackend(
model="llama3.1",
output="Subject: Q3 budget\n\nHi Sarah,\nNumbers attached.\n",
)
drv = email_driver.EmailDriver(llm=llm)
result = await drv.run(_action())
assert result.success is True
assert result.payload["model"] == "llama3.1"
assert result.payload["subject"] == "Q3 budget"
assert result.payload["language"] == "en"
assert result.payload["mannerisms_used"]
assert result.payload["message_id"].startswith("<")
assert result.payload["eml_path"].endswith(".eml")
assert result.payload["container"] == "mailhost-imap"
# Only docker exec is shelled out now — the LLM call is in-process
# via the FakeBackend.
assert len(docker_calls) == 1
assert docker_calls[0][0] == "docker"
docker_sh = docker_calls[0][-1]
assert "touch -d" in docker_sh
assert "tee" in docker_sh
assert docker_sh.index("tee") < docker_sh.index("touch -d")
@pytest.mark.asyncio
async def test_driver_run_llm_failure_short_circuits(monkeypatch):
"""When the backend reports success=False, no docker exec should fire."""
docker_called = False
async def fake_run_capture(argv, *, stdin_data=None, timeout=8.0):
nonlocal docker_called
docker_called = True
return 0, "", ""
monkeypatch.setattr(email_driver, "_run_capture", fake_run_capture)
drv = email_driver.EmailDriver(llm=_FailingBackend())
result = await drv.run(_action())
assert result.success is False
assert result.payload["stage"] == "llm"
assert "stderr" in result.payload
assert "model not found" in result.payload["stderr"]
assert docker_called is False
@pytest.mark.asyncio
async def test_driver_run_llm_timeout_reported_distinctly(monkeypatch):
drv = email_driver.EmailDriver(llm=_RaisingBackend())
result = await drv.run(_action())
assert result.success is False
assert result.payload["stage"] == "llm"
assert result.payload["error"] == "timeout"
@pytest.mark.asyncio
async def test_driver_run_delivery_failure(monkeypatch):
async def fake_run_capture(argv, *, stdin_data=None, timeout=8.0):
return 1, "", "no such container"
monkeypatch.setattr(email_driver, "_run_capture", fake_run_capture)
drv = email_driver.EmailDriver(
llm=FakeBackend(output="Subject: hi\n\nbody\n"),
)
result = await drv.run(_action())
assert result.success is False
assert result.payload["stage"] == "delivery"
assert "no such container" in result.payload["stderr"]

View File

@@ -0,0 +1,72 @@
"""events.to_row / topic_for / event_type_for."""
from __future__ import annotations
from decnet.bus import topics as _topics
from decnet.orchestrator.drivers.base import ActivityResult
from decnet.orchestrator.emailgen import events
from decnet.realism.personas import EmailPersona
from decnet.orchestrator.emailgen.scheduler import EmailAction
def _persona(email="john@corp.com"):
return EmailPersona(
name="John", email=email, role="COO", tone="formal",
mannerisms=[], language="en",
)
def _action():
return EmailAction(
mail_decky_uuid="d1",
mail_decky_name="mailhost",
mail_decky_services=("imap",),
sender=_persona(),
recipient=_persona(email="sarah@corp.com"),
thread_id="thr1",
parent_message_id=None,
references="",
subject_hint=None,
parent_excerpt=None,
context_hint="Q3 budget",
is_reply=False,
)
def test_to_row_pulls_message_id_subject_from_payload():
res = ActivityResult(
success=True,
payload={
"message_id": "<m1@corp.com>",
"subject": "Q3 budget",
"language": "en",
"eml_path": "/var/spool/decnet-emails/thr1/m1.eml",
"model": "llama3.1",
},
)
row = events.to_row(_action(), res)
assert row["mail_decky_uuid"] == "d1"
assert row["thread_id"] == "thr1"
assert row["message_id"] == "<m1@corp.com>"
assert row["subject"] == "Q3 budget"
assert row["sender_email"] == "john@corp.com"
assert row["recipient_email"] == "sarah@corp.com"
assert row["language"] == "en"
assert row["eml_path"].endswith(".eml")
assert row["success"] is True
assert row["payload"]["model"] == "llama3.1"
def test_to_row_falls_back_to_persona_language():
res = ActivityResult(success=True, payload={})
row = events.to_row(_action(), res)
assert row["language"] == "en"
assert row["message_id"] == ""
def test_topic_for_uses_orchestrator_email_root():
topic = events.topic_for(_action())
assert topic == f"orchestrator.{_topics.ORCHESTRATOR_EMAIL}.d1"
def test_event_type_for_returns_email_constant():
assert events.event_type_for(_action()) == _topics.ORCHESTRATOR_EMAIL

View File

@@ -0,0 +1,129 @@
"""record / list / count / prune orchestrator_emails on a real SQLite repo."""
from __future__ import annotations
import json
from datetime import datetime, timedelta, timezone
import pytest
import pytest_asyncio
from decnet.web.db.sqlite.repository import SQLiteRepository
@pytest_asyncio.fixture
async def repo(tmp_path):
r = SQLiteRepository(db_path=str(tmp_path / "decnet.db"))
await r.initialize()
yield r
await r.engine.dispose()
def _row(
mail="d1",
thread="thr1",
msg="<m1@x>",
sender="john@corp.com",
recipient="sarah@corp.com",
subject="Q3 budget",
success=True,
in_reply_to=None,
ts=None,
):
return {
"ts": ts or datetime.now(timezone.utc),
"mail_decky_uuid": mail,
"thread_id": thread,
"message_id": msg,
"in_reply_to": in_reply_to,
"sender_email": sender,
"recipient_email": recipient,
"subject": subject,
"language": "en",
"eml_path": f"/var/spool/decnet-emails/{thread}/{msg}.eml",
"success": success,
"payload": {"model": "llama3.1"},
}
@pytest.mark.asyncio
async def test_record_returns_uuid_and_serialises_payload(repo):
uuid = await repo.record_orchestrator_email(_row())
assert isinstance(uuid, str) and len(uuid) == 36
rows = await repo.list_orchestrator_emails()
assert len(rows) == 1
# payload is stored as JSON text, list endpoint hands it back as the
# raw column value — we just verify it round-trips intact.
assert json.loads(rows[0]["payload"])["model"] == "llama3.1"
@pytest.mark.asyncio
async def test_list_filters_by_thread_and_mail_decky(repo):
await repo.record_orchestrator_email(_row(thread="t1", msg="<a@x>"))
await repo.record_orchestrator_email(_row(thread="t2", msg="<b@x>"))
await repo.record_orchestrator_email(_row(mail="d2", msg="<c@x>"))
by_thread = await repo.list_orchestrator_emails(thread_id="t1")
assert {r["message_id"] for r in by_thread} == {"<a@x>"}
by_mail = await repo.list_orchestrator_emails(mail_decky_uuid="d1")
assert len(by_mail) == 2
everything = await repo.list_orchestrator_emails()
assert len(everything) == 3
@pytest.mark.asyncio
async def test_count_orchestrator_emails(repo):
for i in range(3):
await repo.record_orchestrator_email(_row(msg=f"<m{i}@x>"))
assert await repo.count_orchestrator_emails() == 3
assert await repo.count_orchestrator_emails(mail_decky_uuid="d1") == 3
assert await repo.count_orchestrator_emails(mail_decky_uuid="other") == 0
@pytest.mark.asyncio
async def test_thread_lookup_only_returns_pair_threads(repo):
await repo.record_orchestrator_email(
_row(sender="john@corp.com", recipient="sarah@corp.com", msg="<a@x>")
)
# Reverse direction (Sarah → John) should still match the same pair.
await repo.record_orchestrator_email(
_row(sender="sarah@corp.com", recipient="john@corp.com", msg="<b@x>")
)
# Unrelated pair must not match.
await repo.record_orchestrator_email(
_row(sender="mike@corp.com", recipient="sarah@corp.com", msg="<c@x>")
)
threads = await repo.list_orchestrator_email_threads(
"d1", "john@corp.com", "sarah@corp.com",
)
assert {t["message_id"] for t in threads} == {"<a@x>", "<b@x>"}
@pytest.mark.asyncio
async def test_thread_lookup_excludes_failed_rows(repo):
await repo.record_orchestrator_email(_row(msg="<ok@x>", success=True))
await repo.record_orchestrator_email(_row(msg="<bad@x>", success=False))
threads = await repo.list_orchestrator_email_threads(
"d1", "john@corp.com", "sarah@corp.com",
)
assert {t["message_id"] for t in threads} == {"<ok@x>"}
@pytest.mark.asyncio
async def test_prune_caps_per_decky(repo):
# Insert 5 rows on d1 with strictly-increasing timestamps so the
# prune's "newest-first keep, drop the rest" deterministically picks
# the older two.
base = datetime.now(timezone.utc) - timedelta(hours=10)
for i in range(5):
await repo.record_orchestrator_email(
_row(msg=f"<m{i}@x>", ts=base + timedelta(minutes=i))
)
# Cap at 3 — expect 2 deleted.
deleted = await repo.prune_orchestrator_emails(per_decky_cap=3)
assert deleted == 2
remaining = await repo.list_orchestrator_emails()
assert len(remaining) == 3
# The three newest survived.
assert {r["message_id"] for r in remaining} == {"<m2@x>", "<m3@x>", "<m4@x>"}

View File

@@ -0,0 +1,240 @@
"""Scheduler.pick() — async, takes a repo-shaped object."""
from __future__ import annotations
import json
from datetime import datetime
from typing import Any
import pytest
from decnet.orchestrator.emailgen import scheduler
from decnet.realism import personas_pool as global_pool
@pytest.fixture(autouse=True)
def _reset_global_pool():
global_pool.reset_cache()
yield
global_pool.reset_cache()
_PERSONAS_TWO = [
{
"name": "John Smith",
"email": "john@corp.com",
"role": "COO",
"tone": "formal",
"mannerisms": ["uses 'Best regards'"],
},
{
"name": "Sarah Johnson",
"email": "sarah@corp.com",
"role": "PM",
"tone": "direct",
"mannerisms": ["uses bullets"],
},
]
class _FakeRepo:
"""Minimal repo stub matching the methods scheduler.pick() uses."""
def __init__(
self,
*,
deckies: list[dict[str, Any]] | None = None,
topologies: dict[str, dict[str, Any]] | None = None,
threads: list[dict[str, Any]] | None = None,
):
self.deckies = deckies or []
self.topologies = topologies or {}
self.threads = threads or []
self.thread_calls = 0
async def list_running_deckies(self):
return self.deckies
async def get_topology(self, topology_id: str):
return self.topologies.get(topology_id)
async def list_orchestrator_email_threads(self, *args, **kwargs):
self.thread_calls += 1
return list(self.threads)
def _decky(
uuid="d1",
name="mailhost",
services=("imap",),
topology_id="t1",
source="topology",
):
return {
"uuid": uuid,
"name": name,
"services": list(services),
"topology_id": topology_id,
"source": source,
}
def _topology(personas=_PERSONAS_TWO, language_default="en"):
return {
"id": "t1",
"email_personas": json.dumps(personas),
"language_default": language_default,
}
@pytest.mark.asyncio
async def test_pick_no_mail_decky_returns_none():
repo = _FakeRepo(deckies=[_decky(services=("ssh",))])
assert await scheduler.pick(repo) is None
@pytest.mark.asyncio
async def test_pick_unknown_topology_returns_none():
repo = _FakeRepo(deckies=[_decky()])
# No topology row for "t1" — scheduler should bail.
assert await scheduler.pick(repo) is None
@pytest.mark.asyncio
async def test_pick_topology_with_one_persona_returns_none():
repo = _FakeRepo(
deckies=[_decky()],
topologies={"t1": _topology(personas=_PERSONAS_TWO[:1])},
)
assert await scheduler.pick(repo) is None
@pytest.mark.asyncio
async def test_pick_returns_action_for_valid_setup():
repo = _FakeRepo(
deckies=[_decky()],
topologies={"t1": _topology()},
)
action = await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0))
assert action is not None
assert action.mail_decky_uuid == "d1"
assert action.sender.email != action.recipient.email
assert action.thread_id # populated for both new and reply branches
@pytest.mark.asyncio
async def test_pick_active_hours_filter_kicks_in_at_midnight():
repo = _FakeRepo(
deckies=[_decky()],
topologies={"t1": _topology()},
)
# Default active_hours is 09:00-18:00; midnight => everyone out of office.
action = await scheduler.pick(repo, now=datetime(2026, 4, 26, 3, 0, 0))
assert action is None
@pytest.mark.asyncio
async def test_pick_uses_pop3_decky_too():
repo = _FakeRepo(
deckies=[_decky(services=("pop3",))],
topologies={"t1": _topology()},
)
action = await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0))
assert action is not None
@pytest.mark.asyncio
async def test_pick_for_fleet_source_uses_global_pool(tmp_path, monkeypatch):
"""Fleet (MACVLAN/IPVLAN) mail decky has no parent topology row;
personas come from the host-wide JSON file."""
pool_file = tmp_path / "personas.json"
pool_file.write_text(json.dumps(_PERSONAS_TWO))
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(pool_file))
repo = _FakeRepo(
deckies=[_decky(source="fleet", topology_id=None)],
# No topology row — confirms we never walk back to the topology.
)
action = await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0))
assert action is not None
assert action.mail_decky_uuid == "d1"
@pytest.mark.asyncio
async def test_pick_for_shard_source_uses_global_pool(tmp_path, monkeypatch):
"""SWARM shards are non-topology too — same path as fleet."""
pool_file = tmp_path / "personas.json"
pool_file.write_text(json.dumps(_PERSONAS_TWO))
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(pool_file))
repo = _FakeRepo(
deckies=[_decky(source="shard", topology_id=None)],
)
action = await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0))
assert action is not None
@pytest.mark.asyncio
async def test_pick_fleet_with_empty_global_pool_returns_none(tmp_path, monkeypatch):
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(tmp_path / "missing.json"))
repo = _FakeRepo(deckies=[_decky(source="fleet", topology_id=None)])
assert await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0)) is None
@pytest.mark.asyncio
async def test_topology_personas_isolated_from_global_pool(tmp_path, monkeypatch):
"""A topology with its own personas must NOT leak into / pull from
the global pool — per-topology richness is the whole point."""
pool_file = tmp_path / "personas.json"
pool_file.write_text(json.dumps([{
"name": "Pool Persona",
"email": "pool@corp.com",
"role": "Pooler",
"tone": "casual",
"mannerisms": [],
}]))
monkeypatch.setenv("DECNET_REALISM_PERSONAS", str(pool_file))
repo = _FakeRepo(
deckies=[_decky()],
topologies={"t1": _topology()}, # topology has _PERSONAS_TWO
)
action = await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0))
assert action is not None
# The chosen sender + recipient must come from the topology's pool,
# not the global one — pool@corp.com would be a leak.
assert action.sender.email != "pool@corp.com"
assert action.recipient.email != "pool@corp.com"
@pytest.mark.asyncio
async def test_pick_reply_chain_sets_in_reply_to():
threads = [{
"thread_id": "thr1",
"message_id": "<old@corp.com>",
"subject": "Q3 budget",
}]
repo = _FakeRepo(
deckies=[_decky()],
topologies={"t1": _topology()},
threads=threads,
)
# Force the "reply" branch by stubbing the RNG: random() < 0.6 is True.
class _Rng:
def __init__(self):
self.calls = 0
def choice(self, seq):
return seq[0]
def random(self):
return 0.0 # always reply
action = await scheduler.pick(
repo, rand=_Rng(), now=datetime(2026, 4, 26, 12, 0, 0),
)
assert action is not None
assert action.is_reply is True
assert action.parent_message_id == "<old@corp.com>"
assert action.thread_id == "thr1"
assert action.subject_hint == "Re: Q3 budget"

View File

@@ -0,0 +1,61 @@
"""Thread-chain helpers."""
from __future__ import annotations
from decnet.orchestrator.emailgen.threads import (
ThreadChain,
new_message_id,
new_thread_id,
references_for_reply,
reply_subject,
)
def test_new_thread_id_is_uuid_string():
tid = new_thread_id()
assert len(tid) == 36
assert tid.count("-") == 4
def test_new_message_id_format_with_domain():
mid = new_message_id("example.com")
assert mid.startswith("<") and mid.endswith(">")
assert "@example.com" in mid
def test_new_message_id_handles_blank_domain():
mid = new_message_id(" ")
assert "@localhost" in mid
def test_reply_subject_prepends_re():
assert reply_subject("Q3 budget") == "Re: Q3 budget"
def test_reply_subject_collapses_existing_re():
assert reply_subject("Re: Re: Q3 budget") == "Re: Q3 budget"
assert reply_subject("RE: Q3 budget") == "Re: Q3 budget"
def test_references_for_reply_root_is_empty():
assert references_for_reply(None) == ""
def test_references_for_reply_appends_parent():
chain = ThreadChain(
thread_id="t1",
parent_message_id="<m2@x>",
references=("<m1@x>",),
parent_subject="Re: budget",
)
refs = references_for_reply(chain)
assert refs == "<m1@x> <m2@x>"
def test_references_empty_chain_starts_with_parent_only():
chain = ThreadChain(
thread_id="t1",
parent_message_id="<m1@x>",
references=(),
parent_subject="budget",
)
assert references_for_reply(chain) == "<m1@x>"