feat(emailgen): global persona pool + Date-stamped EML mtimes
Two changes that unwind earlier MazeNET-only assumptions and fix a
realism tell:
1. Persona resolution is now per-decky-source, not topology-only. The
scheduler walks the union view (list_running_deckies, including
fleet MACVLAN/IPVLAN + SWARM shards) and picks the right persona
list for each source:
* topology decky -> Topology.email_personas (per-topology richness
preserved)
* fleet / shard -> a single host-wide pool loaded from disk
(DECNET_EMAILGEN_PERSONAS, /etc/decnet/email_personas.json, or
~/.decnet/email_personas.json)
Operators install the global pool via 'decnet emailgen
import-personas <file>' which validates with the same Pydantic
schema the worker uses.
2. The driver now runs 'touch -d <Date>' inside the docker exec right
after the EML write so file mtime matches the email's RFC 2822
Date: header. Without this an attacker 'ls -lt'ing the spool sees
every email clustered inside the worker's tick window — the
cluster itself was a stylometric tell.
CLI now exposes 'decnet emailgen' as a sub-app with 'run' (default,
backwards-compatible with bare 'decnet emailgen') and 'import-personas'.
list_running_deckies carries topology_id through so consumers can resolve
the parent topology without a second round-trip.
This commit is contained in:
@@ -135,6 +135,15 @@ async def test_driver_run_success_path(monkeypatch):
|
||||
# Two subprocess calls: ollama, then docker exec.
|
||||
assert calls[0][0] == "ollama"
|
||||
assert calls[1][0] == "docker"
|
||||
# docker exec shell command must include `touch -d` so the file's
|
||||
# mtime matches the EML's Date: header — otherwise the spool's
|
||||
# `ls -lt` clusters every email inside the worker tick window.
|
||||
docker_sh = calls[1][-1]
|
||||
assert "touch -d" in docker_sh
|
||||
assert "tee" in docker_sh
|
||||
# And tee must come before touch so we don't touch a file that
|
||||
# doesn't exist yet.
|
||||
assert docker_sh.index("tee") < docker_sh.index("touch -d")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
99
tests/orchestrator/emailgen/test_global_pool.py
Normal file
99
tests/orchestrator/emailgen/test_global_pool.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""Global persona pool — disk-backed source for fleet/shard mail deckies."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.orchestrator.emailgen import global_pool
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset():
|
||||
global_pool.reset_cache()
|
||||
yield
|
||||
global_pool.reset_cache()
|
||||
|
||||
|
||||
_TWO = [
|
||||
{
|
||||
"name": "John Smith",
|
||||
"email": "john@corp.com",
|
||||
"role": "COO",
|
||||
"tone": "formal",
|
||||
"mannerisms": ["uses 'Best regards'"],
|
||||
},
|
||||
{
|
||||
"name": "Sarah Johnson",
|
||||
"email": "sarah@corp.com",
|
||||
"role": "PM",
|
||||
"tone": "direct",
|
||||
"mannerisms": ["uses bullets"],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_load_returns_empty_when_file_missing(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv(
|
||||
"DECNET_EMAILGEN_PERSONAS", str(tmp_path / "does-not-exist.json")
|
||||
)
|
||||
assert global_pool.load() == []
|
||||
|
||||
|
||||
def test_load_returns_parsed_personas(tmp_path, monkeypatch):
|
||||
f = tmp_path / "personas.json"
|
||||
f.write_text(json.dumps(_TWO))
|
||||
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
|
||||
personas = global_pool.load()
|
||||
assert len(personas) == 2
|
||||
assert {p.email for p in personas} == {"john@corp.com", "sarah@corp.com"}
|
||||
|
||||
|
||||
def test_load_resolves_language_default(tmp_path, monkeypatch):
|
||||
f = tmp_path / "personas.json"
|
||||
f.write_text(json.dumps(_TWO))
|
||||
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
|
||||
personas = global_pool.load(language_default="es")
|
||||
assert all(p.language == "es" for p in personas)
|
||||
|
||||
|
||||
def test_load_invalid_json_returns_empty(tmp_path, monkeypatch):
|
||||
f = tmp_path / "personas.json"
|
||||
f.write_text("{not valid")
|
||||
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
|
||||
assert global_pool.load() == []
|
||||
|
||||
|
||||
def test_load_caches_until_mtime_changes(tmp_path, monkeypatch):
|
||||
f = tmp_path / "personas.json"
|
||||
f.write_text(json.dumps(_TWO))
|
||||
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(f))
|
||||
|
||||
first = global_pool.load()
|
||||
assert len(first) == 2
|
||||
|
||||
# Re-write with a single persona; bump mtime so the cache invalidates.
|
||||
import time as _time
|
||||
_time.sleep(0.01)
|
||||
f.write_text(json.dumps(_TWO[:1]))
|
||||
import os
|
||||
os.utime(f, None)
|
||||
|
||||
second = global_pool.load()
|
||||
assert len(second) == 1
|
||||
|
||||
|
||||
def test_resolve_path_honours_env_override(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(tmp_path / "x.json"))
|
||||
assert global_pool.resolve_path() == tmp_path / "x.json"
|
||||
|
||||
|
||||
def test_resolve_path_falls_back_to_user_path_when_system_missing(monkeypatch):
|
||||
monkeypatch.delenv("DECNET_EMAILGEN_PERSONAS", raising=False)
|
||||
# In a typical dev box /etc/decnet/ doesn't exist; the resolver
|
||||
# should pick ~/.decnet/email_personas.json.
|
||||
p = global_pool.resolve_path()
|
||||
# We don't assert the exact path (depends on whether /etc/decnet
|
||||
# exists on the test host), only that it ends with the canonical
|
||||
# filename and isn't an empty path.
|
||||
assert p.name == "email_personas.json"
|
||||
@@ -7,7 +7,14 @@ from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.orchestrator.emailgen import scheduler
|
||||
from decnet.orchestrator.emailgen import global_pool, scheduler
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_global_pool():
|
||||
global_pool.reset_cache()
|
||||
yield
|
||||
global_pool.reset_cache()
|
||||
|
||||
|
||||
_PERSONAS_TWO = [
|
||||
@@ -43,7 +50,7 @@ class _FakeRepo:
|
||||
self.threads = threads or []
|
||||
self.thread_calls = 0
|
||||
|
||||
async def list_running_topology_deckies(self):
|
||||
async def list_running_deckies(self):
|
||||
return self.deckies
|
||||
|
||||
async def get_topology(self, topology_id: str):
|
||||
@@ -54,12 +61,19 @@ class _FakeRepo:
|
||||
return list(self.threads)
|
||||
|
||||
|
||||
def _decky(uuid="d1", name="mailhost", services=("imap",), topology_id="t1"):
|
||||
def _decky(
|
||||
uuid="d1",
|
||||
name="mailhost",
|
||||
services=("imap",),
|
||||
topology_id="t1",
|
||||
source="topology",
|
||||
):
|
||||
return {
|
||||
"uuid": uuid,
|
||||
"name": name,
|
||||
"services": list(services),
|
||||
"topology_id": topology_id,
|
||||
"source": source,
|
||||
}
|
||||
|
||||
|
||||
@@ -127,6 +141,70 @@ async def test_pick_uses_pop3_decky_too():
|
||||
assert action is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pick_for_fleet_source_uses_global_pool(tmp_path, monkeypatch):
|
||||
"""Fleet (MACVLAN/IPVLAN) mail decky has no parent topology row;
|
||||
personas come from the host-wide JSON file."""
|
||||
pool_file = tmp_path / "personas.json"
|
||||
pool_file.write_text(json.dumps(_PERSONAS_TWO))
|
||||
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(pool_file))
|
||||
|
||||
repo = _FakeRepo(
|
||||
deckies=[_decky(source="fleet", topology_id=None)],
|
||||
# No topology row — confirms we never walk back to the topology.
|
||||
)
|
||||
action = await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0))
|
||||
assert action is not None
|
||||
assert action.mail_decky_uuid == "d1"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pick_for_shard_source_uses_global_pool(tmp_path, monkeypatch):
|
||||
"""SWARM shards are non-topology too — same path as fleet."""
|
||||
pool_file = tmp_path / "personas.json"
|
||||
pool_file.write_text(json.dumps(_PERSONAS_TWO))
|
||||
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(pool_file))
|
||||
|
||||
repo = _FakeRepo(
|
||||
deckies=[_decky(source="shard", topology_id=None)],
|
||||
)
|
||||
action = await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0))
|
||||
assert action is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pick_fleet_with_empty_global_pool_returns_none(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(tmp_path / "missing.json"))
|
||||
repo = _FakeRepo(deckies=[_decky(source="fleet", topology_id=None)])
|
||||
assert await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0)) is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_topology_personas_isolated_from_global_pool(tmp_path, monkeypatch):
|
||||
"""A topology with its own personas must NOT leak into / pull from
|
||||
the global pool — per-topology richness is the whole point."""
|
||||
pool_file = tmp_path / "personas.json"
|
||||
pool_file.write_text(json.dumps([{
|
||||
"name": "Pool Persona",
|
||||
"email": "pool@corp.com",
|
||||
"role": "Pooler",
|
||||
"tone": "casual",
|
||||
"mannerisms": [],
|
||||
}]))
|
||||
monkeypatch.setenv("DECNET_EMAILGEN_PERSONAS", str(pool_file))
|
||||
|
||||
repo = _FakeRepo(
|
||||
deckies=[_decky()],
|
||||
topologies={"t1": _topology()}, # topology has _PERSONAS_TWO
|
||||
)
|
||||
action = await scheduler.pick(repo, now=datetime(2026, 4, 26, 12, 0, 0))
|
||||
assert action is not None
|
||||
# The chosen sender + recipient must come from the topology's pool,
|
||||
# not the global one — pool@corp.com would be a leak.
|
||||
assert action.sender.email != "pool@corp.com"
|
||||
assert action.recipient.email != "pool@corp.com"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pick_reply_chain_sets_in_reply_to():
|
||||
threads = [{
|
||||
|
||||
Reference in New Issue
Block a user