feat(realism): synthetic_files table + planner wiring + scheduler swap
Stage 3 of the realism migration. Replaces orchestrator/scheduler.py's
hardcoded _FILE_TEMPLATES/_USERS (3 templates emitting epoch-suffixed
filenames like notes-1777315854.txt with identical bodies per
template) with a persona-driven realism engine.
New surface:
- SyntheticFile SQLModel (synthetic_files table, UNIQUE on
decky_uuid+path) — per-(decky, path) state for the future
edit-in-place flow. Pre-v1, no _migrate_* helper.
- BaseRepository methods: record_synthetic_file,
update_synthetic_file, list_synthetic_files,
pick_random_synthetic_file_for_edit (used by stage 3b).
- realism/naming.py: per-content-class filename templates,
persona-conditioned. /var/log/cron.log + logrotate skeleton for
system-class; /home/<persona>/TODO.md, scratch.md, etc. for
user-class. Anti-regression test pins "no 8+ digit decimals in
basenames" (the realism failure today).
- realism/bodies.py: deterministic body templates per content_class.
TODO body uses checkbox markdown, script body has a shebang, cron
body matches syslog cron shape ("CRON[PID]: (user) CMD (...)").
- realism/planner.py: pick(deckies, now, rng) returns a Plan.
Diurnal-gated, weighted user/system content split (70/30 user
bias). Create-only in stage 3; edit branch lands in stage 3b.
Scheduler split:
- scheduler.pick is now traffic-only (sync).
- scheduler.pick_file is async, takes a repo, resolves personas
(Topology.email_personas for topology-source deckies; global
realism.personas_pool otherwise), and maps Plan -> FileAction.
- FileAction gains persona/content_class/mtime fields.
Worker:
- _one_tick rolls 50/50 between traffic and file each tick. After a
successful FileAction plant, _record_synthetic_file persists or
patches the synthetic_files row (catching the unique-constraint
collision on re-plant of the same path).
- SSHDriver._run_file passes action.mtime through to plant_file so
files don't all stamp at wall-clock-now.
This commit is contained in:
@@ -77,6 +77,10 @@ from .orchestrator import (
|
||||
OrchestratorEvent,
|
||||
OrchestratorEventsResponse,
|
||||
)
|
||||
from .realism import (
|
||||
SyntheticFile,
|
||||
SyntheticFilesResponse,
|
||||
)
|
||||
from .logs import (
|
||||
Bounty,
|
||||
BountyResponse,
|
||||
@@ -226,6 +230,9 @@ __all__ = [
|
||||
"OrchestratorEmailsResponse",
|
||||
"OrchestratorEvent",
|
||||
"OrchestratorEventsResponse",
|
||||
# realism
|
||||
"SyntheticFile",
|
||||
"SyntheticFilesResponse",
|
||||
# logs
|
||||
"Bounty",
|
||||
"BountyResponse",
|
||||
|
||||
72
decnet/web/db/models/realism.py
Normal file
72
decnet/web/db/models/realism.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Realism — synthetic-file state across orchestrator ticks.
|
||||
|
||||
The orchestrator's pre-realism file generator forgot every file the
|
||||
moment it was planted: each tick wrote a brand-new ``notes-{ts}.txt``
|
||||
with a literal unix-epoch suffix. No edits, no rotation, no diurnal
|
||||
shape — three of the realism failures the migration is fixing.
|
||||
|
||||
:class:`SyntheticFile` is the per-(decky, path) memory that lets the
|
||||
realism engine read back yesterday's ``TODO.md``, mutate it, write
|
||||
back the new body, and let the dashboard inspect the lineage.
|
||||
|
||||
Pre-v1: schema lives directly in the SQLModel; no ``_migrate_*``
|
||||
helper (per the project's "no new migrations pre-v1" rule —
|
||||
``feedback_no_new_migrations_prev1.md``). Alembic lands at v1.
|
||||
"""
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, List
|
||||
from uuid import uuid4
|
||||
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import Column, Index, Text, UniqueConstraint
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
|
||||
class SyntheticFile(SQLModel, table=True):
|
||||
"""One realism-planted file on one decky.
|
||||
|
||||
The unique key is ``(decky_uuid, path)`` — there's at most one
|
||||
realism record per location, even if the planter has rotated the
|
||||
file (rotation updates ``edit_count`` and ``last_modified``, not
|
||||
a new row).
|
||||
|
||||
``last_body`` is capped — large blobs (DOCX/PDF, future canary
|
||||
artifacts) are truncated at write time. The edit-in-place flow
|
||||
(stage 3b) only needs the body when the content class supports
|
||||
body-level mutation (``note``, ``todo``, ``draft``, ``script``),
|
||||
so storing the canonical bytes for binary blobs would be wasted.
|
||||
|
||||
``content_hash`` is sha256 of the *body bytes only* — never of
|
||||
metadata or wrapper headers — so a hash compare is a cheap
|
||||
"did the body change?" check across edits.
|
||||
"""
|
||||
__tablename__ = "synthetic_files"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"decky_uuid", "path", name="uq_synthetic_files_decky_path",
|
||||
),
|
||||
Index("ix_synthetic_files_decky_modified", "decky_uuid", "last_modified"),
|
||||
)
|
||||
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
|
||||
decky_uuid: str = Field(index=True, max_length=64)
|
||||
path: str = Field(max_length=1024)
|
||||
persona: str = Field(max_length=128) # EmailPersona.name
|
||||
content_class: str = Field(max_length=32, index=True) # ContentClass enum value
|
||||
created_at: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc), index=True,
|
||||
)
|
||||
last_modified: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc),
|
||||
)
|
||||
edit_count: int = Field(default=0)
|
||||
content_hash: str = Field(max_length=64) # sha256 hex
|
||||
last_body: str = Field(
|
||||
sa_column=Column("last_body", Text, nullable=False, default="")
|
||||
)
|
||||
|
||||
|
||||
class SyntheticFilesResponse(BaseModel):
|
||||
total: int
|
||||
limit: int
|
||||
offset: int
|
||||
data: List[dict[str, Any]]
|
||||
@@ -1100,3 +1100,60 @@ class BaseRepository(ABC):
|
||||
this on a periodic tick.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
# ------------------------------------------------------------- realism
|
||||
|
||||
async def record_synthetic_file(self, data: dict[str, Any]) -> str:
|
||||
"""Insert a new synthetic_files row, returning its uuid.
|
||||
|
||||
The ``(decky_uuid, path)`` pair has a UNIQUE constraint, so two
|
||||
creates for the same target raise — callers either use this for
|
||||
first-time plants and :meth:`update_synthetic_file` for edits,
|
||||
or wrap in a transaction that catches the conflict.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def update_synthetic_file(
|
||||
self, uuid: str, data: dict[str, Any],
|
||||
) -> None:
|
||||
"""Patch an existing synthetic_files row.
|
||||
|
||||
Used by the realism edit-in-place flow (stage 3b): bumps
|
||||
``last_body``, ``content_hash``, ``last_modified``, and
|
||||
``edit_count``. No-op when *uuid* doesn't exist (the row may
|
||||
have been pruned between pick and apply).
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def list_synthetic_files(
|
||||
self,
|
||||
*,
|
||||
decky_uuid: Optional[str] = None,
|
||||
persona: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Paginated synthetic_files newest-first.
|
||||
|
||||
Optional filters narrow to one decky and/or one persona, used by
|
||||
the dashboard's "files this decky has grown" view.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def pick_random_synthetic_file_for_edit(
|
||||
self,
|
||||
decky_uuid: str,
|
||||
*,
|
||||
max_age_days: int = 30,
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""Return a random eligible synthetic_files row for re-edit.
|
||||
|
||||
"Eligible" = belongs to *decky_uuid*, last_modified within
|
||||
*max_age_days*, content_class supports body-level mutation
|
||||
(``note``, ``todo``, ``draft``, ``script``, ``log_*``).
|
||||
Returns ``None`` when nothing matches.
|
||||
|
||||
Used by the realism planner's ``action="edit"`` branch
|
||||
(stage 3b).
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -53,6 +53,7 @@ from decnet.web.db.models import (
|
||||
TopologyMutation,
|
||||
OrchestratorEmail,
|
||||
OrchestratorEvent,
|
||||
SyntheticFile,
|
||||
WebhookSubscription,
|
||||
CanaryBlob,
|
||||
CanaryToken,
|
||||
@@ -3330,3 +3331,80 @@ class SQLModelRepository(BaseRepository):
|
||||
deleted += res.rowcount or 0
|
||||
await session.commit()
|
||||
return deleted
|
||||
|
||||
# ------------------------------------------------------------ realism
|
||||
|
||||
async def record_synthetic_file(self, data: dict[str, Any]) -> str:
|
||||
async with self._session() as session:
|
||||
row = SyntheticFile(**data)
|
||||
session.add(row)
|
||||
await session.commit()
|
||||
await session.refresh(row)
|
||||
return row.uuid
|
||||
|
||||
async def update_synthetic_file(
|
||||
self, row_uuid: str, data: dict[str, Any],
|
||||
) -> None:
|
||||
async with self._session() as session:
|
||||
stmt = (
|
||||
update(SyntheticFile)
|
||||
.where(SyntheticFile.uuid == row_uuid)
|
||||
.values(**data)
|
||||
)
|
||||
await session.execute(stmt)
|
||||
await session.commit()
|
||||
|
||||
async def list_synthetic_files(
|
||||
self,
|
||||
*,
|
||||
decky_uuid: Optional[str] = None,
|
||||
persona: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
) -> list[dict[str, Any]]:
|
||||
async with self._session() as session:
|
||||
stmt = select(SyntheticFile)
|
||||
if decky_uuid is not None:
|
||||
stmt = stmt.where(SyntheticFile.decky_uuid == decky_uuid)
|
||||
if persona is not None:
|
||||
stmt = stmt.where(SyntheticFile.persona == persona)
|
||||
stmt = (
|
||||
stmt.order_by(desc(SyntheticFile.last_modified))
|
||||
.offset(offset)
|
||||
.limit(limit)
|
||||
)
|
||||
result = await session.execute(stmt)
|
||||
return [r.model_dump(mode="json") for r in result.scalars().all()]
|
||||
|
||||
async def pick_random_synthetic_file_for_edit(
|
||||
self,
|
||||
decky_uuid: str,
|
||||
*,
|
||||
max_age_days: int = 30,
|
||||
) -> Optional[dict[str, Any]]:
|
||||
# Editable classes: anything whose body is plain text we can
|
||||
# mutate idempotently. Binary canary artifacts are out — they
|
||||
# rotate via a fresh plant, not an edit.
|
||||
editable = (
|
||||
"note", "todo", "draft", "script", "log_cron", "log_daemon",
|
||||
)
|
||||
from datetime import timedelta
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=max_age_days)
|
||||
async with self._session() as session:
|
||||
stmt = (
|
||||
select(SyntheticFile)
|
||||
.where(
|
||||
SyntheticFile.decky_uuid == decky_uuid,
|
||||
SyntheticFile.content_class.in_(editable), # type: ignore[attr-defined]
|
||||
SyntheticFile.last_modified >= cutoff,
|
||||
)
|
||||
# SQLite + MySQL both support func.random() / RAND() —
|
||||
# SQLAlchemy's func.random() compiles per-dialect.
|
||||
.order_by(func.random())
|
||||
.limit(1)
|
||||
)
|
||||
result = await session.execute(stmt)
|
||||
row = result.scalars().first()
|
||||
if row is None:
|
||||
return None
|
||||
return row.model_dump(mode="json")
|
||||
|
||||
Reference in New Issue
Block a user