refactor(realism): move emailgen LLM/personas/prompt into shared library

Lift the format-agnostic pieces from decnet/orchestrator/emailgen/
into the new decnet/realism/ library so file-class content generation
(stage 3 of the realism migration) can reuse them. Email-specific
delivery (RFC 2822 EML, IMAP/POP3 spool, thread chains) stays in
orchestrator/.

Renames (history-preserving git mv):
  emailgen/personas.py     -> realism/personas.py
  emailgen/prompt.py       -> realism/prompts/email.py
  emailgen/global_pool.py  -> realism/personas_pool.py
  emailgen/llm/            -> realism/llm/

Env-var clean break (pre-v1, no aliases):
  DECNET_EMAILGEN_LLM      -> DECNET_REALISM_LLM
  DECNET_EMAILGEN_MODEL    -> DECNET_REALISM_MODEL
  DECNET_EMAILGEN_TIMEOUT  -> DECNET_REALISM_TIMEOUT
  DECNET_EMAILGEN_PERSONAS -> DECNET_REALISM_PERSONAS
  DECNET_EMAILGEN_FAKE_OUTPUT -> DECNET_REALISM_FAKE_OUTPUT

Importers rewritten in: orchestrator/emailgen/scheduler.py,
orchestrator/drivers/email.py, web/router/{emailgen,topology}/
api_personas.py, cli/emailgen.py. Tests for moved modules relocated
to tests/realism/; tests for stay-put modules updated in place.

API URL `/api/v1/emailgen/personas` and CLI `decnet emailgen
import-personas` keep their public names until the service-collapse
commit (stage 5).
This commit is contained in:
2026-04-27 16:05:43 -04:00
parent f57c621117
commit 0b9873982d
34 changed files with 455 additions and 298 deletions

View File

@@ -113,7 +113,7 @@ api_router.include_router(orchestrator_events_router)
# Emailgen — global persona pool CRUD for the dashboard's
# "Persona Generation" page. The worker reads from the same on-disk
# JSON file directly (see decnet.orchestrator.emailgen.global_pool).
# JSON file directly (see decnet.realism.personas_pool).
api_router.include_router(emailgen_personas_router)
# Observability

View File

@@ -1,10 +1,10 @@
"""GET/PUT ``/api/v1/emailgen/personas`` — global persona pool CRUD.
The "global pool" is a JSON file consumed by the emailgen worker for
fleet (MACVLAN/IPVLAN) and SWARM-shard mail deckies — see
:mod:`decnet.orchestrator.emailgen.global_pool`. MazeNET topology
mail deckies use ``Topology.email_personas`` instead and are
configured per-topology elsewhere.
The "global pool" is a JSON file consumed by the realism content
engine for fleet (MACVLAN/IPVLAN) and SWARM-shard deckies — see
:mod:`decnet.realism.personas_pool`. MazeNET topology deckies use
``Topology.email_personas`` instead and are configured per-topology
elsewhere.
This endpoint is the API surface behind the dashboard's "Persona
Generation" page. Reads accept admin or viewer; writes are admin-only
@@ -22,8 +22,8 @@ from typing import Any
from fastapi import APIRouter, Depends, HTTPException
from decnet.logging import get_logger
from decnet.orchestrator.emailgen import global_pool
from decnet.orchestrator.emailgen.personas import EmailPersona, parse_personas
from decnet.realism import personas_pool as global_pool
from decnet.realism.personas import EmailPersona, parse_personas
from decnet.telemetry import traced as _traced
from decnet.web.dependencies import require_admin, require_viewer
from decnet.web.db.models.common import MessageResponse # noqa: F401 - response shape
@@ -110,11 +110,28 @@ async def replace_personas(
)
dest = global_pool.resolve_path()
dest.parent.mkdir(parents=True, exist_ok=True)
dest.write_text(
json.dumps(_serialize(parsed), indent=2, ensure_ascii=False),
encoding="utf-8",
)
try:
dest.parent.mkdir(parents=True, exist_ok=True)
dest.write_text(
json.dumps(_serialize(parsed), indent=2, ensure_ascii=False),
encoding="utf-8",
)
except OSError as exc:
# Most common cause on dev boxes: ``/etc/decnet`` exists but is
# not writable by the API process. Surface a 500 with the
# actionable hint instead of leaking a traceback.
log.warning(
"api.emailgen.replace_personas write failed path=%s err=%s",
dest, exc,
)
raise HTTPException(
status_code=500,
detail=(
f"Could not write persona pool at {dest}: {exc.strerror or exc}. "
f"Set DECNET_EMAILGEN_PERSONAS to a writable path "
f"(e.g. ~/.decnet/email_personas.json) and restart the API."
),
) from exc
global_pool.reset_cache()
log.info(
"api.emailgen.replace_personas user=%s wrote=%d path=%s",

View File

@@ -21,6 +21,7 @@ from .api_get_topology import router as _get_router
from .api_lan_crud import router as _lan_router
from .api_list_topologies import router as _list_router
from .api_mutations import router as _mutations_router
from .api_personas import router as _personas_router
from .api_reap_orphans import router as _reap_router
from .api_teardown_topology import router as _teardown_router
@@ -44,6 +45,10 @@ topology_router.include_router(_decky_router)
topology_router.include_router(_edge_router)
topology_router.include_router(_mutations_router)
topology_router.include_router(_events_router)
# Personas use a literal-suffix path (`/{id}/personas`) — register
# before the bare `/{id}` getter so FastAPI's trie sees the literal
# segment first.
topology_router.include_router(_personas_router)
topology_router.include_router(_get_router)

View File

@@ -0,0 +1,131 @@
"""GET/PUT ``/topologies/{id}/personas`` — per-topology email persona pool.
The global pool (``decnet/web/router/emailgen/api_personas.py``) drives
non-MazeNET fleet/SWARM-shard mail deckies. MazeNET topology mail
deckies use ``Topology.email_personas`` instead — one JSON-serialized
list per topology, parsed by the emailgen scheduler each tick.
This endpoint is the API surface behind the dashboard's per-topology
"Personas" editor. Reads accept admin or viewer; writes are admin-only.
Concurrency: last-write-wins. The list is operator-curated and small
(typically <20 entries); no need for optimistic versioning here.
"""
from __future__ import annotations
import json
from typing import Any
from fastapi import APIRouter, Depends, HTTPException
from decnet.logging import get_logger
from decnet.realism.personas import EmailPersona, parse_personas
from decnet.telemetry import traced as _traced
from decnet.web.dependencies import repo, require_admin, require_viewer
router = APIRouter()
log = get_logger("api.topology.personas")
def _serialize(personas: list[EmailPersona]) -> list[dict[str, Any]]:
return [p.model_dump(exclude_none=False) for p in personas]
@router.get(
"/{topology_id}/personas",
tags=["MazeNET Topologies"],
responses={
401: {"description": "Could not validate credentials"},
403: {"description": "Insufficient permissions"},
404: {"description": "Topology not found"},
},
)
@_traced("api.topology.list_personas")
async def list_topology_personas(
topology_id: str,
_viewer: dict = Depends(require_viewer),
) -> dict[str, Any]:
"""Return the topology's persona list and its language default.
``language_default`` is included so the editor can show which
language unset entries fall back to — same fallback the scheduler
applies when building prompts.
"""
topo = await repo.get_topology(topology_id)
if topo is None:
raise HTTPException(status_code=404, detail="Topology not found")
language_default = topo.get("language_default") or "en"
personas = parse_personas(
topo.get("email_personas"), language_default=language_default,
)
return {
"topology_id": topology_id,
"topology_name": topo.get("name", ""),
"language_default": language_default,
"personas": _serialize(personas),
}
@router.put(
"/{topology_id}/personas",
tags=["MazeNET Topologies"],
responses={
400: {"description": "Invalid persona payload"},
401: {"description": "Could not validate credentials"},
403: {"description": "Insufficient permissions"},
404: {"description": "Topology not found"},
},
)
@_traced("api.topology.replace_personas")
async def replace_topology_personas(
topology_id: str,
body: dict[str, Any],
user: dict = Depends(require_admin),
) -> dict[str, Any]:
"""Replace the topology's persona list.
Body shape: ``{"personas": [<EmailPersona>, ...]}``.
Drop-invalid semantics mirror the global-pool endpoint: bad entries
are skipped with a warning rather than failing the whole request, but
a wholly invalid payload returns 400 so a schema mistake doesn't
silently wipe the list.
"""
raw = body.get("personas")
if not isinstance(raw, list):
raise HTTPException(
status_code=400, detail="body.personas must be a list",
)
topo = await repo.get_topology(topology_id)
if topo is None:
raise HTTPException(status_code=404, detail="Topology not found")
language_default = topo.get("language_default") or "en"
parsed = parse_personas(raw, language_default=language_default)
if raw and not parsed:
raise HTTPException(
status_code=400,
detail=(
"All persona entries failed validation. Required fields: "
"name, email (user@host.tld), role, tone, mannerisms."
),
)
serialized = _serialize(parsed)
payload = json.dumps(serialized, ensure_ascii=False)
updated = await repo.set_topology_email_personas(topology_id, payload)
if not updated:
# Race: row vanished between the get and the update.
raise HTTPException(status_code=404, detail="Topology not found")
log.info(
"api.topology.replace_personas user=%s topology=%s wrote=%d",
user.get("username", user.get("uuid")), topology_id, len(parsed),
)
return {
"topology_id": topology_id,
"topology_name": topo.get("name", ""),
"language_default": language_default,
"personas": serialized,
}