merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

153
decnet/realism/personas.py Normal file
View File

@@ -0,0 +1,153 @@
"""Persona schema for realism content generation.
Stored as a JSON list on :attr:`Topology.email_personas`. Each persona
describes one fictional employee — sender of email *and* author of
files (notes, TODOs, drafts, scripts) on the deckies they're sampled
onto. The schema deliberately stays narrow: the LLM gets *enough*
differentiation to write distinct voices, no more.
The class is still named :class:`EmailPersona` because every persona
in the pool today carries a mandatory email address (used for IMAP/
POP3 spool delivery). Future per-decky personas without mailboxes
would justify a rename / superclass; not in scope for the realism
migration.
Invalid entries are dropped with a warning (returned alongside the
parsed list) rather than raising — a single typo in one persona must
not stall the entire realism tick.
"""
from __future__ import annotations
import json
from typing import Literal, Optional
from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
from decnet.logging import get_logger
logger = get_logger("realism.personas")
Tone = Literal["formal", "direct", "casual", "technical", "custom"]
ReplyLatency = Literal["fast", "normal", "slow"]
class EmailPersona(BaseModel):
"""One fake mailbox owner.
``language`` is ISO 639-1 (``en``, ``es``, ``pt``…); when unset on the
persona it falls back to the topology's ``language_default``.
``uses_llms_heavily`` lifts the prompt-layer em-dash suppression for
that persona — em-dashes are an LLM tell, but a persona explicitly
pegged as a heavy LLM user should *naturally* produce them.
"""
name: str = Field(min_length=1, max_length=128)
email: str = Field(min_length=3, max_length=255)
role: str = Field(min_length=1, max_length=128)
tone: Tone = "formal"
tone_custom: Optional[str] = Field(default=None, max_length=128)
mannerisms: list[str] = Field(default_factory=list, max_length=12)
language: Optional[str] = Field(default=None, max_length=8)
signature: Optional[str] = Field(default=None, max_length=512)
active_hours: str = Field(default="09:00-18:00", max_length=32)
reply_latency: ReplyLatency = "normal"
uses_llms_heavily: bool = False
@model_validator(mode="after")
def _custom_tone_requires_text(self) -> "EmailPersona":
# ``tone="custom"`` lets operators describe a voice the four canned
# tones don't capture (sarcastic, deadpan, terse, etc.). The free
# text is interpolated into the prompt verbatim, so an empty
# value would just leave the LLM with the literal word "custom" —
# reject it loudly instead of silently producing a useless prompt.
if self.tone == "custom" and not (self.tone_custom and self.tone_custom.strip()):
raise ValueError("tone_custom is required when tone is 'custom'")
return self
@field_validator("email")
@classmethod
def _email_shape(cls, v: str) -> str:
# Cheap structural check — full RFC 5322 isn't worth the
# dependency. We only need ``user@domain`` with non-empty parts
# for the prompt builder + Message-ID generator.
if "@" not in v:
raise ValueError("email must contain '@'")
local, _, domain = v.rpartition("@")
if not local or not domain or "." not in domain:
raise ValueError("email must look like user@domain.tld")
return v
def parse_personas(
raw: str | list | None,
*,
language_default: str = "en",
) -> list[EmailPersona]:
"""Parse the JSON-or-list ``email_personas`` value into models.
Resolves ``language`` against *language_default* so downstream
consumers (prompt builder, scheduler) never need to know about
fallback semantics.
"""
if not raw:
return []
if isinstance(raw, str):
try:
raw = json.loads(raw)
except json.JSONDecodeError as exc:
logger.warning("realism personas: invalid JSON, skipping: %s", exc)
return []
if not isinstance(raw, list):
logger.warning(
"realism personas: expected list, got %s", type(raw).__name__
)
return []
out: list[EmailPersona] = []
for i, entry in enumerate(raw):
try:
persona = EmailPersona.model_validate(entry)
except ValidationError as exc:
logger.warning(
"realism personas: dropping invalid entry index=%d: %s",
i, exc.errors(include_url=False),
)
continue
if persona.language is None:
persona = persona.model_copy(update={"language": language_default})
out.append(persona)
return out
def login_for(persona: str) -> str:
"""Return the linux login derived from a persona's display name.
Lowercase, strip spaces; if the result isn't a plausible POSIX
login (alnum ASCII), fall back to ``user`` so the path doesn't
leak the persona's display name onto the decky filesystem.
Shared by realism path naming (``decnet/realism/naming.py``) and
canary cultivation (``decnet/canary/cultivator.py``).
"""
candidate = persona.lower().replace(" ", "")
if candidate.isalnum() and candidate.isascii() and candidate:
return candidate
return "user"
def in_active_hours(persona: EmailPersona, now_hour: int) -> bool:
"""Return True if *now_hour* (023) falls in the persona's window.
Format: ``"HH:MM-HH:MM"``. Wrap-around windows (``"22:00-06:00"``)
are supported. Invalid windows treat the persona as always-on so a
config typo never silences the whole fleet.
"""
try:
start_s, end_s = persona.active_hours.split("-")
start_h = int(start_s.split(":")[0])
end_h = int(end_s.split(":")[0])
except (ValueError, IndexError):
return True
if start_h == end_h:
return True
if start_h < end_h:
return start_h <= now_hour < end_h
# Wrap-around (e.g. 22:00-06:00).
return now_hour >= start_h or now_hour < end_h