Stage 3 of the realism migration. Replaces orchestrator/scheduler.py's
hardcoded _FILE_TEMPLATES/_USERS (3 templates emitting epoch-suffixed
filenames like notes-1777315854.txt with identical bodies per
template) with a persona-driven realism engine.
New surface:
- SyntheticFile SQLModel (synthetic_files table, UNIQUE on
decky_uuid+path) — per-(decky, path) state for the future
edit-in-place flow. Pre-v1, no _migrate_* helper.
- BaseRepository methods: record_synthetic_file,
update_synthetic_file, list_synthetic_files,
pick_random_synthetic_file_for_edit (used by stage 3b).
- realism/naming.py: per-content-class filename templates,
persona-conditioned. /var/log/cron.log + logrotate skeleton for
system-class; /home/<persona>/TODO.md, scratch.md, etc. for
user-class. Anti-regression test pins "no 8+ digit decimals in
basenames" (the realism failure today).
- realism/bodies.py: deterministic body templates per content_class.
TODO body uses checkbox markdown, script body has a shebang, cron
body matches syslog cron shape ("CRON[PID]: (user) CMD (...)").
- realism/planner.py: pick(deckies, now, rng) returns a Plan.
Diurnal-gated, weighted user/system content split (70/30 user
bias). Create-only in stage 3; edit branch lands in stage 3b.
Scheduler split:
- scheduler.pick is now traffic-only (sync).
- scheduler.pick_file is async, takes a repo, resolves personas
(Topology.email_personas for topology-source deckies; global
realism.personas_pool otherwise), and maps Plan -> FileAction.
- FileAction gains persona/content_class/mtime fields.
Worker:
- _one_tick rolls 50/50 between traffic and file each tick. After a
successful FileAction plant, _record_synthetic_file persists or
patches the synthetic_files row (catching the unique-constraint
collision on re-plant of the same path).
- SSHDriver._run_file passes action.mtime through to plant_file so
files don't all stamp at wall-clock-now.
193 lines
7.1 KiB
Python
193 lines
7.1 KiB
Python
"""Per-content-class filename generators.
|
|
|
|
The pre-realism orchestrator emitted ``notes-1777315854.txt``
|
|
(unix-epoch suffix) — a tell on first glance. Real users name
|
|
``notes.txt``, ``TODO.md``, ``backup-2025-04.sql.gz``. Real systems
|
|
write ``cron.log``, ``cron.log.1``, ``cron.log.2.gz`` (logrotate
|
|
shape, no epoch).
|
|
|
|
Stage 3 ships **deterministic templates only**, persona-conditioned.
|
|
Stage 6 wires LLM enrichment for the user-classes (``note``, ``todo``,
|
|
``draft``, ``script``); the deterministic templates remain the
|
|
fallback when LLM is disabled or times out.
|
|
|
|
The factory mirrors :func:`decnet.canary.factory.get_generator`:
|
|
caller passes a :class:`~decnet.realism.taxonomy.ContentClass`; we
|
|
return the namer registered for it. Renaming a content_class is a
|
|
schema change and would invalidate ``synthetic_files.path`` lookups,
|
|
so the dispatch is exhaustive — no silent fallbacks for unknown
|
|
classes.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import secrets
|
|
import string
|
|
from typing import Callable, Optional
|
|
|
|
from decnet.realism.taxonomy import ContentClass
|
|
|
|
|
|
# Persona → home-dir convention. Most personas are linux-style; the
|
|
# rare "windows" persona gets ``C:\\Users\\<persona>\\Documents`` style
|
|
# paths (out of scope until per-OS personas land). For now everything
|
|
# is POSIX.
|
|
def _home(persona: str) -> str:
|
|
"""Return the canonical home directory for *persona*.
|
|
|
|
The persona's ``name`` is used as the linux username when it's a
|
|
plausible login (lowercase, no spaces); otherwise we fall back to
|
|
a generic ``user`` so the path doesn't reveal a persona display
|
|
name on the decky filesystem.
|
|
"""
|
|
candidate = persona.lower().replace(" ", "")
|
|
if candidate.isalnum() and candidate.isascii() and candidate:
|
|
return f"/home/{candidate}"
|
|
return "/home/user"
|
|
|
|
|
|
def _random_token(rng: secrets.SystemRandom, length: int = 6) -> str:
|
|
"""Lowercase-alphanum token of length *length* — like ``mkstemp``."""
|
|
return "".join(rng.choice(string.ascii_lowercase + string.digits) for _ in range(length))
|
|
|
|
|
|
# ── User-class namers ──────────────────────────────────────────────────────
|
|
|
|
|
|
_NOTE_NAMES: tuple[str, ...] = (
|
|
"notes.txt", "scratch.md", "ideas.txt", "Untitled-3.txt",
|
|
"draft.md", "keys.txt", "passwords.txt", "TODO.md",
|
|
)
|
|
|
|
_TODO_NAMES: tuple[str, ...] = (
|
|
"TODO.md", "todo.txt", "things.md", "tasks.txt", "punchlist.md",
|
|
)
|
|
|
|
_DRAFT_NAMES: tuple[str, ...] = (
|
|
"Q3-budget-DRAFT.md", "proposal.md", "letter.txt",
|
|
"rfc-internal.md", "memo.txt", "1on1-notes.md",
|
|
)
|
|
|
|
_SCRIPT_NAMES: tuple[str, ...] = (
|
|
"backup.sh", "deploy.sh", "cleanup.sh", "rotate.sh",
|
|
"fix.py", "tmp.py", "scratch.py",
|
|
)
|
|
|
|
|
|
def _name_user(
|
|
persona: str, names: tuple[str, ...], rng: secrets.SystemRandom,
|
|
) -> str:
|
|
return f"{_home(persona)}/{rng.choice(names)}"
|
|
|
|
|
|
def _name_note(persona: str, rng: secrets.SystemRandom) -> str:
|
|
return _name_user(persona, _NOTE_NAMES, rng)
|
|
|
|
|
|
def _name_todo(persona: str, rng: secrets.SystemRandom) -> str:
|
|
return _name_user(persona, _TODO_NAMES, rng)
|
|
|
|
|
|
def _name_draft(persona: str, rng: secrets.SystemRandom) -> str:
|
|
return _name_user(persona, _DRAFT_NAMES, rng)
|
|
|
|
|
|
def _name_script(persona: str, rng: secrets.SystemRandom) -> str:
|
|
return _name_user(persona, _SCRIPT_NAMES, rng)
|
|
|
|
|
|
# ── System-class namers ────────────────────────────────────────────────────
|
|
|
|
|
|
# logrotate skeleton: cron.log, cron.log.1, cron.log.2.gz. No epoch
|
|
# suffix — the realism failure today is `cron-1777317867.log`.
|
|
_CRON_LOGROTATE: tuple[str, ...] = (
|
|
"/var/log/cron.log", "/var/log/cron.log.1", "/var/log/cron.log.2.gz",
|
|
)
|
|
_DAEMON_LOGROTATE: tuple[str, ...] = (
|
|
"/var/log/daemon.log", "/var/log/syslog", "/var/log/messages",
|
|
"/var/log/auth.log", "/var/log/auth.log.1",
|
|
)
|
|
|
|
|
|
def _name_log_cron(persona: str, rng: secrets.SystemRandom) -> str:
|
|
return rng.choice(_CRON_LOGROTATE)
|
|
|
|
|
|
def _name_log_daemon(persona: str, rng: secrets.SystemRandom) -> str:
|
|
return rng.choice(_DAEMON_LOGROTATE)
|
|
|
|
|
|
def _name_cache_tmp(persona: str, rng: secrets.SystemRandom) -> str:
|
|
# mkstemp shape: /tmp/.cache-XXXXXX with random alphanumerics.
|
|
# Hidden dot keeps it out of `ls` by default — same as glibc/python.
|
|
# Bandit B108 fires on the literal "/tmp/" path; suppressed at the
|
|
# site because this is a path we are *generating for a target
|
|
# decky*, not a file we are opening on the host.
|
|
return f"/tmp/.cache-{_random_token(rng, 6)}" # nosec B108
|
|
|
|
|
|
# ── Email + canary placeholders ────────────────────────────────────────────
|
|
# Email "names" (paths) are produced by the email driver's spool logic,
|
|
# not by realism naming. Canary paths are advisory — operators usually
|
|
# specify ``placement_path`` directly. Stage 7 of the realism migration
|
|
# refines canary placement based on persona + content_class.
|
|
|
|
|
|
def _name_email(persona: str, rng: secrets.SystemRandom) -> str:
|
|
raise NotImplementedError(
|
|
"email paths come from the email driver's spool logic, not "
|
|
"realism.naming"
|
|
)
|
|
|
|
|
|
def _name_canary(persona: str, rng: secrets.SystemRandom) -> str:
|
|
raise NotImplementedError(
|
|
"canary placement is set by the canary cultivator (stage 7), "
|
|
"not realism.naming"
|
|
)
|
|
|
|
|
|
# ── Dispatch ───────────────────────────────────────────────────────────────
|
|
|
|
|
|
_NAMERS: dict[ContentClass, Callable[[str, secrets.SystemRandom], str]] = {
|
|
ContentClass.NOTE: _name_note,
|
|
ContentClass.TODO: _name_todo,
|
|
ContentClass.DRAFT: _name_draft,
|
|
ContentClass.SCRIPT: _name_script,
|
|
ContentClass.LOG_CRON: _name_log_cron,
|
|
ContentClass.LOG_DAEMON: _name_log_daemon,
|
|
ContentClass.CACHE_TMP: _name_cache_tmp,
|
|
ContentClass.EMAIL: _name_email,
|
|
ContentClass.CANARY_AWS_CREDS: _name_canary,
|
|
ContentClass.CANARY_ENV_FILE: _name_canary,
|
|
ContentClass.CANARY_GIT_CONFIG: _name_canary,
|
|
ContentClass.CANARY_SSH_KEY: _name_canary,
|
|
ContentClass.CANARY_HONEYDOC: _name_canary,
|
|
ContentClass.CANARY_HONEYDOC_DOCX: _name_canary,
|
|
ContentClass.CANARY_HONEYDOC_PDF: _name_canary,
|
|
ContentClass.CANARY_MYSQL_DUMP: _name_canary,
|
|
}
|
|
|
|
|
|
def make_path(
|
|
content_class: ContentClass,
|
|
persona: str,
|
|
*,
|
|
rand: Optional[secrets.SystemRandom] = None,
|
|
) -> str:
|
|
"""Return a plausible absolute container-side path for *content_class*.
|
|
|
|
Persona-conditioned for user-classes (``/home/<persona>/…``).
|
|
System-classes ignore persona and pick from a logrotate-shaped
|
|
skeleton. Email and canary classes raise — those paths come
|
|
from the respective drivers, not from realism naming.
|
|
"""
|
|
rng = rand or secrets.SystemRandom()
|
|
namer = _NAMERS.get(content_class)
|
|
if namer is None:
|
|
raise KeyError(
|
|
f"no namer registered for content_class={content_class!r}"
|
|
)
|
|
return namer(persona, rng)
|