feat(emailgen): Ollama-driven fake email worker for IMAP/POP3 deckies

Second orchestrator worker (decnet emailgen) that drips persona-driven, threaded, multi-language fake emails into running mail deckies. Personas live on Topology.email_personas; topology-wide language_default falls through to any persona that doesn't pin its own. Em-dashes are suppressed at the prompt layer by default and only lifted for personas explicitly marked uses_llms_heavily — em-dashes are an LLM tell and a flat corpus of em-dashed mail is a giveaway. EML delivery writes into /var/spool/decnet-emails/<thread>/<msg>.eml on the mail decky via docker exec; wiring the IMAP/POP3 templates to read from that spool (replacing the hardcoded _BAIT_EMAILS) is the next step.
2026-04-26 22:16:19 -04:00
parent 674028d476
commit 3ee55ec341
25 changed files with 2343 additions and 1 deletions
--- a/decnet/orchestrator/emailgen/personas.py
+++ b/decnet/orchestrator/emailgen/personas.py
@@ -0,0 +1,119 @@
+"""Persona schema for the emailgen worker.
+
+Stored as a JSON list on :attr:`Topology.email_personas`.  Each persona
+describes one fictional employee whose mailbox lives on the topology's
+IMAP/POP3 decky.  The schema deliberately stays narrow: the LLM gets
+*enough* differentiation to write distinct voices, no more.
+
+Invalid entries are dropped with a warning (returned alongside the
+parsed list) rather than raising — a single typo in one persona must
+not stall the entire emailgen tick.
+"""
+from __future__ import annotations
+
+import json
+from typing import Literal, Optional
+
+from pydantic import BaseModel, Field, ValidationError, field_validator
+
+from decnet.logging import get_logger
+
+logger = get_logger("orchestrator.emailgen")
+
+Tone = Literal["formal", "direct", "casual", "technical"]
+ReplyLatency = Literal["fast", "normal", "slow"]
+
+
+class EmailPersona(BaseModel):
+    """One fake mailbox owner.
+
+    ``language`` is ISO 639-1 (``en``, ``es``, ``pt``…); when unset on the
+    persona it falls back to the topology's ``language_default``.
+    ``uses_llms_heavily`` lifts the prompt-layer em-dash suppression for
+    that persona — em-dashes are an LLM tell, but a persona explicitly
+    pegged as a heavy LLM user should *naturally* produce them.
+    """
+    name: str = Field(min_length=1, max_length=128)
+    email: str = Field(min_length=3, max_length=255)
+    role: str = Field(min_length=1, max_length=128)
+    tone: Tone = "formal"
+    mannerisms: list[str] = Field(default_factory=list, max_length=12)
+    language: Optional[str] = Field(default=None, max_length=8)
+    signature: Optional[str] = Field(default=None, max_length=512)
+    active_hours: str = Field(default="09:00-18:00", max_length=32)
+    reply_latency: ReplyLatency = "normal"
+    uses_llms_heavily: bool = False
+
+    @field_validator("email")
+    @classmethod
+    def _email_shape(cls, v: str) -> str:
+        # Cheap structural check — full RFC 5322 isn't worth the
+        # dependency.  We only need ``user@domain`` with non-empty parts
+        # for the prompt builder + Message-ID generator.
+        if "@" not in v:
+            raise ValueError("email must contain '@'")
+        local, _, domain = v.rpartition("@")
+        if not local or not domain or "." not in domain:
+            raise ValueError("email must look like user@domain.tld")
+        return v
+
+
+def parse_personas(
+    raw: str | list | None,
+    *,
+    language_default: str = "en",
+) -> list[EmailPersona]:
+    """Parse the JSON-or-list ``email_personas`` value into models.
+
+    Resolves ``language`` against *language_default* so downstream
+    consumers (prompt builder, scheduler) never need to know about
+    fallback semantics.
+    """
+    if not raw:
+        return []
+    if isinstance(raw, str):
+        try:
+            raw = json.loads(raw)
+        except json.JSONDecodeError as exc:
+            logger.warning("emailgen personas: invalid JSON, skipping: %s", exc)
+            return []
+    if not isinstance(raw, list):
+        logger.warning(
+            "emailgen personas: expected list, got %s", type(raw).__name__
+        )
+        return []
+    out: list[EmailPersona] = []
+    for i, entry in enumerate(raw):
+        try:
+            persona = EmailPersona.model_validate(entry)
+        except ValidationError as exc:
+            logger.warning(
+                "emailgen personas: dropping invalid entry index=%d: %s",
+                i, exc.errors(include_url=False),
+            )
+            continue
+        if persona.language is None:
+            persona = persona.model_copy(update={"language": language_default})
+        out.append(persona)
+    return out
+
+
+def in_active_hours(persona: EmailPersona, now_hour: int) -> bool:
+    """Return True if *now_hour* (0–23) falls in the persona's window.
+
+    Format: ``"HH:MM-HH:MM"``. Wrap-around windows (``"22:00-06:00"``)
+    are supported. Invalid windows treat the persona as always-on so a
+    config typo never silences the whole fleet.
+    """
+    try:
+        start_s, end_s = persona.active_hours.split("-")
+        start_h = int(start_s.split(":")[0])
+        end_h = int(end_s.split(":")[0])
+    except (ValueError, IndexError):
+        return True
+    if start_h == end_h:
+        return True
+    if start_h < end_h:
+        return start_h <= now_hour < end_h
+    # Wrap-around (e.g. 22:00-06:00).
+    return now_hour >= start_h or now_hour < end_h