merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
20
decnet/orchestrator/emailgen/__init__.py
Normal file
20
decnet/orchestrator/emailgen/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""Emailgen — email-specific delivery, scheduling, and threading.
|
||||
|
||||
After stage 5 of the realism migration, ``emailgen`` is no longer a
|
||||
separate worker / systemd unit / CLI subcommand. It exposes:
|
||||
|
||||
* :mod:`decnet.orchestrator.emailgen.scheduler` — the
|
||||
``EmailAction`` shape and the ``pick(repo)`` policy that decides
|
||||
which mail decky / sender / recipient / thread an email belongs to.
|
||||
* :mod:`decnet.orchestrator.emailgen.threads` — RFC 2822 thread chain
|
||||
helpers (Message-ID generation, Re: / In-Reply-To bookkeeping).
|
||||
* :mod:`decnet.orchestrator.emailgen.events` — DB-row + bus-topic
|
||||
builders for email events.
|
||||
|
||||
The orchestrator's main worker (:mod:`decnet.orchestrator.worker`)
|
||||
calls into these modules per tick. LLM glue, persona schema, prompt
|
||||
builder, and the global persona pool moved to :mod:`decnet.realism`
|
||||
in stage 2 of the migration; this package keeps only the
|
||||
email-specific delivery surface.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
49
decnet/orchestrator/emailgen/events.py
Normal file
49
decnet/orchestrator/emailgen/events.py
Normal file
@@ -0,0 +1,49 @@
|
||||
"""DB-row + bus-topic helpers for the emailgen worker.
|
||||
|
||||
Mirror of :mod:`decnet.orchestrator.events` for the email action class.
|
||||
Kept in its own module so the SSH-flavoured orchestrator and the
|
||||
emailgen worker don't accumulate cross-imports of each other's action
|
||||
types.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from decnet.bus import topics as _topics
|
||||
from decnet.orchestrator.drivers.base import ActivityResult
|
||||
from decnet.orchestrator.emailgen.scheduler import EmailAction
|
||||
|
||||
|
||||
def to_row(action: EmailAction, result: ActivityResult) -> dict[str, Any]:
|
||||
"""Build the kwargs dict for ``OrchestratorEmail(**...)``.
|
||||
|
||||
Pulls ``message_id`` / ``subject`` / ``language`` out of the
|
||||
driver's ``payload`` rather than off the action — the EML's
|
||||
Message-ID is generated inside the driver after the LLM call so
|
||||
we know it matches what landed on disk.
|
||||
"""
|
||||
payload = result.payload or {}
|
||||
return {
|
||||
"ts": datetime.now(timezone.utc),
|
||||
"mail_decky_uuid": action.mail_decky_uuid,
|
||||
"thread_id": action.thread_id,
|
||||
"message_id": payload.get("message_id", ""),
|
||||
"in_reply_to": action.parent_message_id,
|
||||
"sender_email": action.sender.email,
|
||||
"recipient_email": action.recipient.email,
|
||||
"subject": payload.get("subject", ""),
|
||||
"language": payload.get("language", action.sender.language or "en"),
|
||||
"eml_path": payload.get("eml_path", ""),
|
||||
"success": result.success,
|
||||
"payload": payload, # repo serialises dict→json
|
||||
}
|
||||
|
||||
|
||||
def topic_for(action: EmailAction) -> str:
|
||||
"""Map an email action to its bus topic."""
|
||||
return _topics.orchestrator(_topics.ORCHESTRATOR_EMAIL, action.mail_decky_uuid)
|
||||
|
||||
|
||||
def event_type_for(action: EmailAction) -> str: # noqa: ARG001 — symmetry
|
||||
return _topics.ORCHESTRATOR_EMAIL
|
||||
255
decnet/orchestrator/emailgen/scheduler.py
Normal file
255
decnet/orchestrator/emailgen/scheduler.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""Action picker for the emailgen worker.
|
||||
|
||||
One tick = one (mail-decky, sender, recipient, [thread]) decision.
|
||||
|
||||
Scope (v1):
|
||||
- Only TopologyDeckies are eligible mail hosts. Fleet / SWARM-shard
|
||||
mail-deckies are out of scope per the plan; they get covered when the
|
||||
forwarder pattern lands for emailgen.
|
||||
- Mail decky = a running TopologyDecky whose ``services`` includes
|
||||
``imap`` or ``pop3``.
|
||||
- Personas come from ``Topology.email_personas`` (JSON list of
|
||||
:class:`EmailPersona`). Topology-wide ``language_default`` fills in
|
||||
any persona that didn't set its own.
|
||||
|
||||
Returns ``None`` (skip tick) when:
|
||||
- no running mail decky,
|
||||
- the mail decky's topology has fewer than two valid personas,
|
||||
- nobody is in their ``active_hours`` window right now.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.orchestrator.emailgen.threads import (
|
||||
ThreadChain,
|
||||
new_thread_id,
|
||||
references_for_reply,
|
||||
reply_subject,
|
||||
)
|
||||
from decnet.realism import personas_pool as global_pool
|
||||
from decnet.realism.personas import (
|
||||
EmailPersona,
|
||||
in_active_hours,
|
||||
parse_personas,
|
||||
)
|
||||
|
||||
logger = get_logger("orchestrator.emailgen")
|
||||
|
||||
_MAIL_SERVICES = ("imap", "pop3")
|
||||
# Probability of replying on an existing thread when one exists. The
|
||||
# inverse starts a fresh thread. 0.6 mirrors what mailbox studies find
|
||||
# for active corporate inboxes — most messages are replies, but not
|
||||
# overwhelmingly so.
|
||||
_REPLY_PROBABILITY = 0.6
|
||||
|
||||
# Generic context hints fed to the LLM when starting a new thread.
|
||||
# Deliberately broad — the persona's tone + role is what shapes the
|
||||
# email; the hint just gives the model a topic to riff on.
|
||||
_CONTEXT_HINTS: tuple[str, ...] = (
|
||||
"Q3 budget review and approval",
|
||||
"Client presentation feedback",
|
||||
"Project deadline extension request",
|
||||
"Team building event planning",
|
||||
"IT system maintenance notification",
|
||||
"Quarterly performance review",
|
||||
"Vendor onboarding process",
|
||||
"Holiday schedule announcement",
|
||||
"Training session invitation",
|
||||
"Department restructuring update",
|
||||
"Client contract negotiation",
|
||||
"Security audit findings",
|
||||
"Sales strategy meeting",
|
||||
"Product launch timeline",
|
||||
"Office relocation update",
|
||||
"Travel reimbursement policy change",
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EmailAction:
|
||||
"""One emailgen tick's decision.
|
||||
|
||||
``thread_id`` is non-None whenever this action is a reply; the
|
||||
worker writes it back to the DB so future ticks can chain further
|
||||
replies. ``in_reply_to`` / ``references`` mirror the RFC 2822
|
||||
headers we'll set on the EML.
|
||||
|
||||
``mail_decky_name`` / ``mail_decky_services`` are denormalised onto
|
||||
the action so the driver doesn't need a second repo round-trip just
|
||||
to resolve the container name.
|
||||
"""
|
||||
mail_decky_uuid: str
|
||||
mail_decky_name: str
|
||||
mail_decky_services: tuple[str, ...]
|
||||
sender: EmailPersona
|
||||
recipient: EmailPersona
|
||||
thread_id: str
|
||||
parent_message_id: Optional[str]
|
||||
references: str
|
||||
subject_hint: Optional[str] # used as parent subject when replying
|
||||
parent_excerpt: Optional[str] # excerpt from the parent body
|
||||
context_hint: str # only meaningful on new threads
|
||||
is_reply: bool
|
||||
description: str = "email:send"
|
||||
|
||||
|
||||
def _is_mail_decky(decky: dict[str, Any]) -> bool:
|
||||
services = decky.get("services") or []
|
||||
if isinstance(services, str):
|
||||
return False
|
||||
return any(s in services for s in _MAIL_SERVICES)
|
||||
|
||||
|
||||
async def _resolve_personas(
|
||||
repo: Any, mail_decky: dict[str, Any],
|
||||
) -> tuple[list[EmailPersona], str]:
|
||||
"""Pick the right persona source for *mail_decky* and return the list.
|
||||
|
||||
Returns ``(personas, source_label)`` so logs can disambiguate why a
|
||||
tick was skipped. Source label is the same string ``list_running_deckies``
|
||||
sets on the row (``"topology" | "fleet" | "shard"``) so the logger
|
||||
reads consistently against the rest of the orchestrator.
|
||||
|
||||
Resolution rules (matches the design discussion):
|
||||
* **topology** source → walk to ``Topology.email_personas``; the
|
||||
topology owns its own list. Each topology can have different
|
||||
personas.
|
||||
* **fleet** / **shard** source → unihost MACVLAN/IPVLAN deckies and
|
||||
SWARM shards have no parent topology row, so they share a single
|
||||
host-wide pool loaded from disk by :mod:`global_pool`.
|
||||
"""
|
||||
source = mail_decky.get("source") or "unknown"
|
||||
if source == "topology":
|
||||
topology_id = mail_decky.get("topology_id")
|
||||
if not topology_id:
|
||||
return [], source
|
||||
topology = await repo.get_topology(topology_id)
|
||||
if not topology:
|
||||
return [], source
|
||||
return (
|
||||
parse_personas(
|
||||
topology.get("email_personas"),
|
||||
language_default=topology.get("language_default") or "en",
|
||||
),
|
||||
source,
|
||||
)
|
||||
# Fleet / shard / anything else → global pool.
|
||||
return global_pool.load(), source
|
||||
|
||||
|
||||
async def pick(
|
||||
repo: Any,
|
||||
*,
|
||||
rand: Optional[secrets.SystemRandom] = None,
|
||||
now: Optional[datetime] = None,
|
||||
) -> Optional[EmailAction]:
|
||||
"""Pick one email action against any running mail decky.
|
||||
|
||||
Mail-decky discovery uses the **union view** (``list_running_deckies``):
|
||||
MazeNET topology deckies, unihost fleet deckies, and SWARM shards are
|
||||
all eligible. Persona source is per-decky-source; see
|
||||
:func:`_resolve_personas`. *now* is the wall-clock used for
|
||||
``active_hours`` filtering — injected so tests can pin the hour
|
||||
deterministically.
|
||||
"""
|
||||
rng = rand or secrets.SystemRandom()
|
||||
now_dt = now or datetime.now()
|
||||
|
||||
deckies = await repo.list_running_deckies()
|
||||
mail_deckies = [d for d in deckies if _is_mail_decky(d)]
|
||||
if not mail_deckies:
|
||||
logger.debug("emailgen pick: no running mail decky")
|
||||
return None
|
||||
|
||||
mail_decky = rng.choice(mail_deckies)
|
||||
personas, source = await _resolve_personas(repo, mail_decky)
|
||||
if len(personas) < 2:
|
||||
logger.debug(
|
||||
"emailgen pick: source=%s mail_decky=%s only %d personas; need >=2",
|
||||
source, mail_decky.get("uuid"), len(personas),
|
||||
)
|
||||
return None
|
||||
|
||||
active = [p for p in personas if in_active_hours(p, now_dt.hour)]
|
||||
if len(active) < 2:
|
||||
logger.debug(
|
||||
"emailgen pick: source=%s mail_decky=%s only %d personas in-hours",
|
||||
source, mail_decky.get("uuid"), len(active),
|
||||
)
|
||||
return None
|
||||
|
||||
sender = rng.choice(active)
|
||||
recipient = rng.choice([p for p in active if p.email != sender.email])
|
||||
|
||||
# Look up open threads between this pair on this mail decky.
|
||||
chain = await _maybe_pick_chain(
|
||||
repo, mail_decky["uuid"], sender, recipient, rng=rng,
|
||||
)
|
||||
|
||||
services = tuple(mail_decky.get("services") or ())
|
||||
decky_name = mail_decky.get("name") or ""
|
||||
|
||||
if chain is not None:
|
||||
return EmailAction(
|
||||
mail_decky_uuid=mail_decky["uuid"],
|
||||
mail_decky_name=decky_name,
|
||||
mail_decky_services=services,
|
||||
sender=sender,
|
||||
recipient=recipient,
|
||||
thread_id=chain.thread_id,
|
||||
parent_message_id=chain.parent_message_id,
|
||||
references=references_for_reply(chain),
|
||||
subject_hint=chain.parent_subject,
|
||||
parent_excerpt=None, # repo can populate later if useful
|
||||
context_hint=chain.parent_subject,
|
||||
is_reply=True,
|
||||
)
|
||||
|
||||
return EmailAction(
|
||||
mail_decky_uuid=mail_decky["uuid"],
|
||||
mail_decky_name=decky_name,
|
||||
mail_decky_services=services,
|
||||
sender=sender,
|
||||
recipient=recipient,
|
||||
thread_id=new_thread_id(),
|
||||
parent_message_id=None,
|
||||
references="",
|
||||
subject_hint=None,
|
||||
parent_excerpt=None,
|
||||
context_hint=rng.choice(_CONTEXT_HINTS),
|
||||
is_reply=False,
|
||||
)
|
||||
|
||||
|
||||
async def _maybe_pick_chain(
|
||||
repo: Any,
|
||||
mail_decky_uuid: str,
|
||||
sender: EmailPersona,
|
||||
recipient: EmailPersona,
|
||||
*,
|
||||
rng: secrets.SystemRandom,
|
||||
) -> Optional[ThreadChain]:
|
||||
"""Probabilistically pick an open thread between the pair, or None."""
|
||||
if rng.random() >= _REPLY_PROBABILITY:
|
||||
return None
|
||||
threads = await repo.list_orchestrator_email_threads(
|
||||
mail_decky_uuid, sender.email, recipient.email, limit=20,
|
||||
)
|
||||
if not threads:
|
||||
return None
|
||||
head = threads[0]
|
||||
return ThreadChain(
|
||||
thread_id=head["thread_id"],
|
||||
parent_message_id=head["message_id"],
|
||||
# We don't reconstruct the full ancestry from row history here —
|
||||
# the parent's References + parent's Message-ID would do that.
|
||||
# For v1, single-step references is fine; mail clients still
|
||||
# group correctly by (Subject + In-Reply-To).
|
||||
references=tuple(),
|
||||
parent_subject=reply_subject(head["subject"]),
|
||||
)
|
||||
75
decnet/orchestrator/emailgen/threads.py
Normal file
75
decnet/orchestrator/emailgen/threads.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""RFC 2822 thread-chain bookkeeping.
|
||||
|
||||
A thread is a worker-side UUID that groups one or more emails between
|
||||
the same two personas. ``In-Reply-To`` carries the immediate parent's
|
||||
``Message-ID``; ``References`` carries the full ancestry chain.
|
||||
|
||||
The emailgen scheduler queries the repository for the most recent email
|
||||
in any thread between (sender, recipient); if it finds one, it emits a
|
||||
reply (continuing the chain). Otherwise it starts a new thread.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ThreadChain:
|
||||
"""Immutable view of a thread's chain at a point in time.
|
||||
|
||||
``thread_id`` is opaque (UUID). ``parent_message_id`` is the most
|
||||
recent message in the chain — the new reply's ``In-Reply-To`` field.
|
||||
``references`` is the dot-separated history fed into the
|
||||
``References:`` header (oldest-first per RFC 2822 §3.6.4).
|
||||
``parent_subject`` carries the subject we're replying to, so the
|
||||
reply can prepend ``Re:`` correctly.
|
||||
"""
|
||||
thread_id: str
|
||||
parent_message_id: str
|
||||
references: tuple[str, ...]
|
||||
parent_subject: str
|
||||
|
||||
|
||||
def new_thread_id() -> str:
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def reply_subject(parent_subject: str) -> str:
|
||||
"""Prepend ``Re:`` to *parent_subject* if not already a reply.
|
||||
|
||||
Folds repeat ``Re: Re: Re:`` into a single ``Re:`` — Outlook /
|
||||
Thunderbird both do this and an attacker reading the maildir would
|
||||
notice the corpus's missing convention immediately.
|
||||
"""
|
||||
s = parent_subject.strip()
|
||||
lowered = s.lower()
|
||||
while lowered.startswith("re:"):
|
||||
s = s[3:].lstrip()
|
||||
lowered = s.lower()
|
||||
return f"Re: {s}"
|
||||
|
||||
|
||||
def references_for_reply(chain: Optional[ThreadChain]) -> str:
|
||||
"""Build the ``References:`` header value for a reply.
|
||||
|
||||
Returns a space-separated list of message-ids, oldest-first, with
|
||||
the parent appended. Empty string when *chain* is None (root).
|
||||
"""
|
||||
if chain is None:
|
||||
return ""
|
||||
refs = list(chain.references) + [chain.parent_message_id]
|
||||
return " ".join(refs)
|
||||
|
||||
|
||||
def new_message_id(domain: str) -> str:
|
||||
"""Build an RFC 2822 ``Message-ID`` value (incl. angle brackets).
|
||||
|
||||
Worker side — the value is also stored in the DB so a future reply
|
||||
can be threaded against it. Domain mirrors the sender's email
|
||||
domain so an attacker grepping for tells doesn't find every
|
||||
fake-corp email tagged with ``@example.com``.
|
||||
"""
|
||||
safe_domain = domain.strip() or "localhost"
|
||||
return f"<{uuid.uuid4().hex}@{safe_domain}>"
|
||||
Reference in New Issue
Block a user