Second orchestrator worker (decnet emailgen) that drips persona-driven, threaded, multi-language fake emails into running mail deckies. Personas live on Topology.email_personas; topology-wide language_default falls through to any persona that doesn't pin its own. Em-dashes are suppressed at the prompt layer by default and only lifted for personas explicitly marked uses_llms_heavily — em-dashes are an LLM tell and a flat corpus of em-dashed mail is a giveaway. EML delivery writes into /var/spool/decnet-emails/<thread>/<msg>.eml on the mail decky via docker exec; wiring the IMAP/POP3 templates to read from that spool (replacing the hardcoded _BAIT_EMAILS) is the next step.
120 lines
4.3 KiB
Python
120 lines
4.3 KiB
Python
"""Persona schema for the emailgen worker.
|
||
|
||
Stored as a JSON list on :attr:`Topology.email_personas`. Each persona
|
||
describes one fictional employee whose mailbox lives on the topology's
|
||
IMAP/POP3 decky. The schema deliberately stays narrow: the LLM gets
|
||
*enough* differentiation to write distinct voices, no more.
|
||
|
||
Invalid entries are dropped with a warning (returned alongside the
|
||
parsed list) rather than raising — a single typo in one persona must
|
||
not stall the entire emailgen tick.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
from typing import Literal, Optional
|
||
|
||
from pydantic import BaseModel, Field, ValidationError, field_validator
|
||
|
||
from decnet.logging import get_logger
|
||
|
||
logger = get_logger("orchestrator.emailgen")
|
||
|
||
Tone = Literal["formal", "direct", "casual", "technical"]
|
||
ReplyLatency = Literal["fast", "normal", "slow"]
|
||
|
||
|
||
class EmailPersona(BaseModel):
|
||
"""One fake mailbox owner.
|
||
|
||
``language`` is ISO 639-1 (``en``, ``es``, ``pt``…); when unset on the
|
||
persona it falls back to the topology's ``language_default``.
|
||
``uses_llms_heavily`` lifts the prompt-layer em-dash suppression for
|
||
that persona — em-dashes are an LLM tell, but a persona explicitly
|
||
pegged as a heavy LLM user should *naturally* produce them.
|
||
"""
|
||
name: str = Field(min_length=1, max_length=128)
|
||
email: str = Field(min_length=3, max_length=255)
|
||
role: str = Field(min_length=1, max_length=128)
|
||
tone: Tone = "formal"
|
||
mannerisms: list[str] = Field(default_factory=list, max_length=12)
|
||
language: Optional[str] = Field(default=None, max_length=8)
|
||
signature: Optional[str] = Field(default=None, max_length=512)
|
||
active_hours: str = Field(default="09:00-18:00", max_length=32)
|
||
reply_latency: ReplyLatency = "normal"
|
||
uses_llms_heavily: bool = False
|
||
|
||
@field_validator("email")
|
||
@classmethod
|
||
def _email_shape(cls, v: str) -> str:
|
||
# Cheap structural check — full RFC 5322 isn't worth the
|
||
# dependency. We only need ``user@domain`` with non-empty parts
|
||
# for the prompt builder + Message-ID generator.
|
||
if "@" not in v:
|
||
raise ValueError("email must contain '@'")
|
||
local, _, domain = v.rpartition("@")
|
||
if not local or not domain or "." not in domain:
|
||
raise ValueError("email must look like user@domain.tld")
|
||
return v
|
||
|
||
|
||
def parse_personas(
|
||
raw: str | list | None,
|
||
*,
|
||
language_default: str = "en",
|
||
) -> list[EmailPersona]:
|
||
"""Parse the JSON-or-list ``email_personas`` value into models.
|
||
|
||
Resolves ``language`` against *language_default* so downstream
|
||
consumers (prompt builder, scheduler) never need to know about
|
||
fallback semantics.
|
||
"""
|
||
if not raw:
|
||
return []
|
||
if isinstance(raw, str):
|
||
try:
|
||
raw = json.loads(raw)
|
||
except json.JSONDecodeError as exc:
|
||
logger.warning("emailgen personas: invalid JSON, skipping: %s", exc)
|
||
return []
|
||
if not isinstance(raw, list):
|
||
logger.warning(
|
||
"emailgen personas: expected list, got %s", type(raw).__name__
|
||
)
|
||
return []
|
||
out: list[EmailPersona] = []
|
||
for i, entry in enumerate(raw):
|
||
try:
|
||
persona = EmailPersona.model_validate(entry)
|
||
except ValidationError as exc:
|
||
logger.warning(
|
||
"emailgen personas: dropping invalid entry index=%d: %s",
|
||
i, exc.errors(include_url=False),
|
||
)
|
||
continue
|
||
if persona.language is None:
|
||
persona = persona.model_copy(update={"language": language_default})
|
||
out.append(persona)
|
||
return out
|
||
|
||
|
||
def in_active_hours(persona: EmailPersona, now_hour: int) -> bool:
|
||
"""Return True if *now_hour* (0–23) falls in the persona's window.
|
||
|
||
Format: ``"HH:MM-HH:MM"``. Wrap-around windows (``"22:00-06:00"``)
|
||
are supported. Invalid windows treat the persona as always-on so a
|
||
config typo never silences the whole fleet.
|
||
"""
|
||
try:
|
||
start_s, end_s = persona.active_hours.split("-")
|
||
start_h = int(start_s.split(":")[0])
|
||
end_h = int(end_s.split(":")[0])
|
||
except (ValueError, IndexError):
|
||
return True
|
||
if start_h == end_h:
|
||
return True
|
||
if start_h < end_h:
|
||
return start_h <= now_hour < end_h
|
||
# Wrap-around (e.g. 22:00-06:00).
|
||
return now_hour >= start_h or now_hour < end_h
|