feat(realism): synthetic_files table + planner wiring + scheduler swap

Stage 3 of the realism migration. Replaces orchestrator/scheduler.py's
hardcoded _FILE_TEMPLATES/_USERS (3 templates emitting epoch-suffixed
filenames like notes-1777315854.txt with identical bodies per
template) with a persona-driven realism engine.

New surface:

- SyntheticFile SQLModel (synthetic_files table, UNIQUE on
  decky_uuid+path) — per-(decky, path) state for the future
  edit-in-place flow. Pre-v1, no _migrate_* helper.
- BaseRepository methods: record_synthetic_file,
  update_synthetic_file, list_synthetic_files,
  pick_random_synthetic_file_for_edit (used by stage 3b).
- realism/naming.py: per-content-class filename templates,
  persona-conditioned. /var/log/cron.log + logrotate skeleton for
  system-class; /home/<persona>/TODO.md, scratch.md, etc. for
  user-class. Anti-regression test pins "no 8+ digit decimals in
  basenames" (the realism failure today).
- realism/bodies.py: deterministic body templates per content_class.
  TODO body uses checkbox markdown, script body has a shebang, cron
  body matches syslog cron shape ("CRON[PID]: (user) CMD (...)").
- realism/planner.py: pick(deckies, now, rng) returns a Plan.
  Diurnal-gated, weighted user/system content split (70/30 user
  bias). Create-only in stage 3; edit branch lands in stage 3b.

Scheduler split:

- scheduler.pick is now traffic-only (sync).
- scheduler.pick_file is async, takes a repo, resolves personas
  (Topology.email_personas for topology-source deckies; global
  realism.personas_pool otherwise), and maps Plan -> FileAction.
- FileAction gains persona/content_class/mtime fields.

Worker:

- _one_tick rolls 50/50 between traffic and file each tick. After a
  successful FileAction plant, _record_synthetic_file persists or
  patches the synthetic_files row (catching the unique-constraint
  collision on re-plant of the same path).
- SSHDriver._run_file passes action.mtime through to plant_file so
  files don't all stamp at wall-clock-now.
This commit is contained in:
2026-04-27 16:22:07 -04:00
parent 636c057cc5
commit cb1872c52f
15 changed files with 1541 additions and 105 deletions

View File

@@ -77,6 +77,10 @@ from .orchestrator import (
OrchestratorEvent,
OrchestratorEventsResponse,
)
from .realism import (
SyntheticFile,
SyntheticFilesResponse,
)
from .logs import (
Bounty,
BountyResponse,
@@ -226,6 +230,9 @@ __all__ = [
"OrchestratorEmailsResponse",
"OrchestratorEvent",
"OrchestratorEventsResponse",
# realism
"SyntheticFile",
"SyntheticFilesResponse",
# logs
"Bounty",
"BountyResponse",

View File

@@ -0,0 +1,72 @@
"""Realism — synthetic-file state across orchestrator ticks.
The orchestrator's pre-realism file generator forgot every file the
moment it was planted: each tick wrote a brand-new ``notes-{ts}.txt``
with a literal unix-epoch suffix. No edits, no rotation, no diurnal
shape — three of the realism failures the migration is fixing.
:class:`SyntheticFile` is the per-(decky, path) memory that lets the
realism engine read back yesterday's ``TODO.md``, mutate it, write
back the new body, and let the dashboard inspect the lineage.
Pre-v1: schema lives directly in the SQLModel; no ``_migrate_*``
helper (per the project's "no new migrations pre-v1" rule —
``feedback_no_new_migrations_prev1.md``). Alembic lands at v1.
"""
from datetime import datetime, timezone
from typing import Any, List
from uuid import uuid4
from pydantic import BaseModel
from sqlalchemy import Column, Index, Text, UniqueConstraint
from sqlmodel import Field, SQLModel
class SyntheticFile(SQLModel, table=True):
"""One realism-planted file on one decky.
The unique key is ``(decky_uuid, path)`` — there's at most one
realism record per location, even if the planter has rotated the
file (rotation updates ``edit_count`` and ``last_modified``, not
a new row).
``last_body`` is capped — large blobs (DOCX/PDF, future canary
artifacts) are truncated at write time. The edit-in-place flow
(stage 3b) only needs the body when the content class supports
body-level mutation (``note``, ``todo``, ``draft``, ``script``),
so storing the canonical bytes for binary blobs would be wasted.
``content_hash`` is sha256 of the *body bytes only* — never of
metadata or wrapper headers — so a hash compare is a cheap
"did the body change?" check across edits.
"""
__tablename__ = "synthetic_files"
__table_args__ = (
UniqueConstraint(
"decky_uuid", "path", name="uq_synthetic_files_decky_path",
),
Index("ix_synthetic_files_decky_modified", "decky_uuid", "last_modified"),
)
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
decky_uuid: str = Field(index=True, max_length=64)
path: str = Field(max_length=1024)
persona: str = Field(max_length=128) # EmailPersona.name
content_class: str = Field(max_length=32, index=True) # ContentClass enum value
created_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True,
)
last_modified: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc),
)
edit_count: int = Field(default=0)
content_hash: str = Field(max_length=64) # sha256 hex
last_body: str = Field(
sa_column=Column("last_body", Text, nullable=False, default="")
)
class SyntheticFilesResponse(BaseModel):
total: int
limit: int
offset: int
data: List[dict[str, Any]]

View File

@@ -1100,3 +1100,60 @@ class BaseRepository(ABC):
this on a periodic tick.
"""
raise NotImplementedError
# ------------------------------------------------------------- realism
async def record_synthetic_file(self, data: dict[str, Any]) -> str:
"""Insert a new synthetic_files row, returning its uuid.
The ``(decky_uuid, path)`` pair has a UNIQUE constraint, so two
creates for the same target raise — callers either use this for
first-time plants and :meth:`update_synthetic_file` for edits,
or wrap in a transaction that catches the conflict.
"""
raise NotImplementedError
async def update_synthetic_file(
self, uuid: str, data: dict[str, Any],
) -> None:
"""Patch an existing synthetic_files row.
Used by the realism edit-in-place flow (stage 3b): bumps
``last_body``, ``content_hash``, ``last_modified``, and
``edit_count``. No-op when *uuid* doesn't exist (the row may
have been pruned between pick and apply).
"""
raise NotImplementedError
async def list_synthetic_files(
self,
*,
decky_uuid: Optional[str] = None,
persona: Optional[str] = None,
limit: int = 100,
offset: int = 0,
) -> list[dict[str, Any]]:
"""Paginated synthetic_files newest-first.
Optional filters narrow to one decky and/or one persona, used by
the dashboard's "files this decky has grown" view.
"""
raise NotImplementedError
async def pick_random_synthetic_file_for_edit(
self,
decky_uuid: str,
*,
max_age_days: int = 30,
) -> Optional[dict[str, Any]]:
"""Return a random eligible synthetic_files row for re-edit.
"Eligible" = belongs to *decky_uuid*, last_modified within
*max_age_days*, content_class supports body-level mutation
(``note``, ``todo``, ``draft``, ``script``, ``log_*``).
Returns ``None`` when nothing matches.
Used by the realism planner's ``action="edit"`` branch
(stage 3b).
"""
raise NotImplementedError

View File

@@ -53,6 +53,7 @@ from decnet.web.db.models import (
TopologyMutation,
OrchestratorEmail,
OrchestratorEvent,
SyntheticFile,
WebhookSubscription,
CanaryBlob,
CanaryToken,
@@ -3330,3 +3331,80 @@ class SQLModelRepository(BaseRepository):
deleted += res.rowcount or 0
await session.commit()
return deleted
# ------------------------------------------------------------ realism
async def record_synthetic_file(self, data: dict[str, Any]) -> str:
async with self._session() as session:
row = SyntheticFile(**data)
session.add(row)
await session.commit()
await session.refresh(row)
return row.uuid
async def update_synthetic_file(
self, row_uuid: str, data: dict[str, Any],
) -> None:
async with self._session() as session:
stmt = (
update(SyntheticFile)
.where(SyntheticFile.uuid == row_uuid)
.values(**data)
)
await session.execute(stmt)
await session.commit()
async def list_synthetic_files(
self,
*,
decky_uuid: Optional[str] = None,
persona: Optional[str] = None,
limit: int = 100,
offset: int = 0,
) -> list[dict[str, Any]]:
async with self._session() as session:
stmt = select(SyntheticFile)
if decky_uuid is not None:
stmt = stmt.where(SyntheticFile.decky_uuid == decky_uuid)
if persona is not None:
stmt = stmt.where(SyntheticFile.persona == persona)
stmt = (
stmt.order_by(desc(SyntheticFile.last_modified))
.offset(offset)
.limit(limit)
)
result = await session.execute(stmt)
return [r.model_dump(mode="json") for r in result.scalars().all()]
async def pick_random_synthetic_file_for_edit(
self,
decky_uuid: str,
*,
max_age_days: int = 30,
) -> Optional[dict[str, Any]]:
# Editable classes: anything whose body is plain text we can
# mutate idempotently. Binary canary artifacts are out — they
# rotate via a fresh plant, not an edit.
editable = (
"note", "todo", "draft", "script", "log_cron", "log_daemon",
)
from datetime import timedelta
cutoff = datetime.now(timezone.utc) - timedelta(days=max_age_days)
async with self._session() as session:
stmt = (
select(SyntheticFile)
.where(
SyntheticFile.decky_uuid == decky_uuid,
SyntheticFile.content_class.in_(editable), # type: ignore[attr-defined]
SyntheticFile.last_modified >= cutoff,
)
# SQLite + MySQL both support func.random() / RAND() —
# SQLAlchemy's func.random() compiles per-dialect.
.order_by(func.random())
.limit(1)
)
result = await session.execute(stmt)
row = result.scalars().first()
if row is None:
return None
return row.model_dump(mode="json")