Files
DECNET/decnet/orchestrator/emailgen/scheduler.py
anti 3ee55ec341 feat(emailgen): Ollama-driven fake email worker for IMAP/POP3 deckies
Second orchestrator worker (decnet emailgen) that drips persona-driven,
threaded, multi-language fake emails into running mail deckies.  Personas
live on Topology.email_personas; topology-wide language_default falls
through to any persona that doesn't pin its own.  Em-dashes are
suppressed at the prompt layer by default and only lifted for personas
explicitly marked uses_llms_heavily — em-dashes are an LLM tell and a
flat corpus of em-dashed mail is a giveaway.

EML delivery writes into /var/spool/decnet-emails/<thread>/<msg>.eml on
the mail decky via docker exec; wiring the IMAP/POP3 templates to read
from that spool (replacing the hardcoded _BAIT_EMAILS) is the next step.
2026-04-26 22:16:19 -04:00

229 lines
7.5 KiB
Python

"""Action picker for the emailgen worker.
One tick = one (mail-decky, sender, recipient, [thread]) decision.
Scope (v1):
- Only TopologyDeckies are eligible mail hosts. Fleet / SWARM-shard
mail-deckies are out of scope per the plan; they get covered when the
forwarder pattern lands for emailgen.
- Mail decky = a running TopologyDecky whose ``services`` includes
``imap`` or ``pop3``.
- Personas come from ``Topology.email_personas`` (JSON list of
:class:`EmailPersona`). Topology-wide ``language_default`` fills in
any persona that didn't set its own.
Returns ``None`` (skip tick) when:
- no running mail decky,
- the mail decky's topology has fewer than two valid personas,
- nobody is in their ``active_hours`` window right now.
"""
from __future__ import annotations
import secrets
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Optional
from decnet.logging import get_logger
from decnet.orchestrator.emailgen.personas import (
EmailPersona,
in_active_hours,
parse_personas,
)
from decnet.orchestrator.emailgen.threads import (
ThreadChain,
new_thread_id,
references_for_reply,
reply_subject,
)
logger = get_logger("orchestrator.emailgen")
_MAIL_SERVICES = ("imap", "pop3")
# Probability of replying on an existing thread when one exists. The
# inverse starts a fresh thread. 0.6 mirrors what mailbox studies find
# for active corporate inboxes — most messages are replies, but not
# overwhelmingly so.
_REPLY_PROBABILITY = 0.6
# Generic context hints fed to the LLM when starting a new thread.
# Deliberately broad — the persona's tone + role is what shapes the
# email; the hint just gives the model a topic to riff on.
_CONTEXT_HINTS: tuple[str, ...] = (
"Q3 budget review and approval",
"Client presentation feedback",
"Project deadline extension request",
"Team building event planning",
"IT system maintenance notification",
"Quarterly performance review",
"Vendor onboarding process",
"Holiday schedule announcement",
"Training session invitation",
"Department restructuring update",
"Client contract negotiation",
"Security audit findings",
"Sales strategy meeting",
"Product launch timeline",
"Office relocation update",
"Travel reimbursement policy change",
)
@dataclass(frozen=True)
class EmailAction:
"""One emailgen tick's decision.
``thread_id`` is non-None whenever this action is a reply; the
worker writes it back to the DB so future ticks can chain further
replies. ``in_reply_to`` / ``references`` mirror the RFC 2822
headers we'll set on the EML.
``mail_decky_name`` / ``mail_decky_services`` are denormalised onto
the action so the driver doesn't need a second repo round-trip just
to resolve the container name.
"""
mail_decky_uuid: str
mail_decky_name: str
mail_decky_services: tuple[str, ...]
sender: EmailPersona
recipient: EmailPersona
thread_id: str
parent_message_id: Optional[str]
references: str
subject_hint: Optional[str] # used as parent subject when replying
parent_excerpt: Optional[str] # excerpt from the parent body
context_hint: str # only meaningful on new threads
is_reply: bool
description: str = "email:send"
def _is_mail_decky(decky: dict[str, Any]) -> bool:
services = decky.get("services") or []
if isinstance(services, str):
return False
return any(s in services for s in _MAIL_SERVICES)
async def pick(
repo: Any,
*,
rand: Optional[secrets.SystemRandom] = None,
now: Optional[datetime] = None,
) -> Optional[EmailAction]:
"""Pick one email action against the running fleet.
*repo* is a :class:`BaseRepository`; we fetch running topology
deckies + their parent topology row directly. *now* is the
wall-clock used for ``active_hours`` filtering — injected so tests
can pin the hour deterministically.
"""
rng = rand or secrets.SystemRandom()
now_dt = now or datetime.now()
deckies = await repo.list_running_topology_deckies()
mail_deckies = [d for d in deckies if _is_mail_decky(d)]
if not mail_deckies:
logger.debug("emailgen pick: no running mail decky")
return None
mail_decky = rng.choice(mail_deckies)
topology_id = mail_decky.get("topology_id")
if not topology_id:
logger.debug("emailgen pick: mail decky has no topology_id")
return None
topology = await repo.get_topology(topology_id)
if not topology:
logger.debug("emailgen pick: topology %s not found", topology_id)
return None
personas = parse_personas(
topology.get("email_personas"),
language_default=topology.get("language_default") or "en",
)
if len(personas) < 2:
logger.debug(
"emailgen pick: topology=%s has only %d personas; need >=2",
topology_id, len(personas),
)
return None
active = [p for p in personas if in_active_hours(p, now_dt.hour)]
if len(active) < 2:
logger.debug(
"emailgen pick: topology=%s only %d personas in-hours",
topology_id, len(active),
)
return None
sender = rng.choice(active)
recipient = rng.choice([p for p in active if p.email != sender.email])
# Look up open threads between this pair on this mail decky.
chain = await _maybe_pick_chain(
repo, mail_decky["uuid"], sender, recipient, rng=rng,
)
services = tuple(mail_decky.get("services") or ())
decky_name = mail_decky.get("name") or ""
if chain is not None:
return EmailAction(
mail_decky_uuid=mail_decky["uuid"],
mail_decky_name=decky_name,
mail_decky_services=services,
sender=sender,
recipient=recipient,
thread_id=chain.thread_id,
parent_message_id=chain.parent_message_id,
references=references_for_reply(chain),
subject_hint=chain.parent_subject,
parent_excerpt=None, # repo can populate later if useful
context_hint=chain.parent_subject,
is_reply=True,
)
return EmailAction(
mail_decky_uuid=mail_decky["uuid"],
mail_decky_name=decky_name,
mail_decky_services=services,
sender=sender,
recipient=recipient,
thread_id=new_thread_id(),
parent_message_id=None,
references="",
subject_hint=None,
parent_excerpt=None,
context_hint=rng.choice(_CONTEXT_HINTS),
is_reply=False,
)
async def _maybe_pick_chain(
repo: Any,
mail_decky_uuid: str,
sender: EmailPersona,
recipient: EmailPersona,
*,
rng: secrets.SystemRandom,
) -> Optional[ThreadChain]:
"""Probabilistically pick an open thread between the pair, or None."""
if rng.random() >= _REPLY_PROBABILITY:
return None
threads = await repo.list_orchestrator_email_threads(
mail_decky_uuid, sender.email, recipient.email, limit=20,
)
if not threads:
return None
head = threads[0]
return ThreadChain(
thread_id=head["thread_id"],
parent_message_id=head["message_id"],
# We don't reconstruct the full ancestry from row history here —
# the parent's References + parent's Message-ID would do that.
# For v1, single-step references is fine; mail clients still
# group correctly by (Subject + In-Reply-To).
references=tuple(),
parent_subject=reply_subject(head["subject"]),
)