feat(realism): synthetic_files table + planner wiring + scheduler swap
Stage 3 of the realism migration. Replaces orchestrator/scheduler.py's
hardcoded _FILE_TEMPLATES/_USERS (3 templates emitting epoch-suffixed
filenames like notes-1777315854.txt with identical bodies per
template) with a persona-driven realism engine.
New surface:
- SyntheticFile SQLModel (synthetic_files table, UNIQUE on
decky_uuid+path) — per-(decky, path) state for the future
edit-in-place flow. Pre-v1, no _migrate_* helper.
- BaseRepository methods: record_synthetic_file,
update_synthetic_file, list_synthetic_files,
pick_random_synthetic_file_for_edit (used by stage 3b).
- realism/naming.py: per-content-class filename templates,
persona-conditioned. /var/log/cron.log + logrotate skeleton for
system-class; /home/<persona>/TODO.md, scratch.md, etc. for
user-class. Anti-regression test pins "no 8+ digit decimals in
basenames" (the realism failure today).
- realism/bodies.py: deterministic body templates per content_class.
TODO body uses checkbox markdown, script body has a shebang, cron
body matches syslog cron shape ("CRON[PID]: (user) CMD (...)").
- realism/planner.py: pick(deckies, now, rng) returns a Plan.
Diurnal-gated, weighted user/system content split (70/30 user
bias). Create-only in stage 3; edit branch lands in stage 3b.
Scheduler split:
- scheduler.pick is now traffic-only (sync).
- scheduler.pick_file is async, takes a repo, resolves personas
(Topology.email_personas for topology-source deckies; global
realism.personas_pool otherwise), and maps Plan -> FileAction.
- FileAction gains persona/content_class/mtime fields.
Worker:
- _one_tick rolls 50/50 between traffic and file each tick. After a
successful FileAction plant, _record_synthetic_file persists or
patches the synthetic_files row (catching the unique-constraint
collision on re-plant of the same path).
- SSHDriver._run_file passes action.mtime through to plant_file so
files don't all stamp at wall-clock-now.
This commit is contained in:
@@ -98,11 +98,29 @@ async def orchestrator_worker(
|
||||
|
||||
|
||||
async def _one_tick(repo: BaseRepository, driver, bus) -> None:
|
||||
import secrets as _secrets
|
||||
|
||||
# Union view: MazeNET topology + unihost fleet + SWARM shards. Pre-fleet
|
||||
# this only saw topology_deckies and was permanently blind to MACVLAN /
|
||||
# IPVLAN unihost decoys.
|
||||
deckies = await repo.list_running_deckies()
|
||||
action = scheduler.pick(deckies)
|
||||
rng = _secrets.SystemRandom()
|
||||
|
||||
# Action-kind roll: 50/50 traffic vs file. Stage 5 of the realism
|
||||
# migration adds an email branch (when emailgen folds in). When a
|
||||
# roll yields nothing actionable (e.g. file branch with no personas
|
||||
# in any persona's work hours), we fall through to the other side
|
||||
# so a quiet half doesn't silence the whole tick.
|
||||
action = None
|
||||
if rng.random() < 0.5:
|
||||
action = scheduler.pick(deckies, rand=rng)
|
||||
if action is None:
|
||||
action = await scheduler.pick_file(deckies, repo, rand=rng)
|
||||
else:
|
||||
action = await scheduler.pick_file(deckies, repo, rand=rng)
|
||||
if action is None:
|
||||
action = scheduler.pick(deckies, rand=rng)
|
||||
|
||||
if action is None:
|
||||
# Report the actual SSH-eligible count (what the scheduler filters
|
||||
# to), not just len(deckies) — the old "running+ssh count=N" line
|
||||
@@ -128,6 +146,18 @@ async def _one_tick(repo: BaseRepository, driver, bus) -> None:
|
||||
result = await driver.run(action)
|
||||
row = events.to_row(action, result)
|
||||
await repo.record_orchestrator_event(row)
|
||||
# Persist realism state for FileAction so stage 3b's edit-in-place
|
||||
# has something to read back. Failure here is logged but doesn't
|
||||
# tank the tick — the orchestrator event is the source of truth
|
||||
# for "this action happened."
|
||||
if isinstance(action, scheduler.FileAction) and result.success:
|
||||
try:
|
||||
await _record_synthetic_file(repo, action, result)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"orchestrator: synthetic_files write failed dst=%s path=%s: %s",
|
||||
action.dst_uuid, action.path, exc,
|
||||
)
|
||||
|
||||
if bus is not None:
|
||||
topic = events.topic_for(action)
|
||||
@@ -151,3 +181,57 @@ async def _one_tick(repo: BaseRepository, driver, bus) -> None:
|
||||
"orchestrator tick kind=%s success=%s dst=%s",
|
||||
row["kind"], row["success"], row["dst_decky_uuid"],
|
||||
)
|
||||
|
||||
|
||||
async def _record_synthetic_file(repo, action, result) -> None:
|
||||
"""Persist a synthetic_files row after a successful FileAction plant.
|
||||
|
||||
Idempotent on ``(decky_uuid, path)``: when the unique constraint
|
||||
fires (the file existed already), we instead patch the existing
|
||||
row's ``last_modified`` / ``content_hash`` / ``last_body`` / bump
|
||||
``edit_count`` so the dashboard's "files this decky has grown"
|
||||
view stays accurate even when the orchestrator re-plants the same
|
||||
location.
|
||||
"""
|
||||
import hashlib
|
||||
from datetime import datetime, timezone
|
||||
|
||||
body = action.content or ""
|
||||
content_hash = hashlib.sha256(body.encode("utf-8")).hexdigest()
|
||||
now = datetime.now(timezone.utc)
|
||||
row = {
|
||||
"decky_uuid": action.dst_uuid,
|
||||
"path": action.path,
|
||||
"persona": action.persona,
|
||||
"content_class": action.content_class,
|
||||
"created_at": now,
|
||||
"last_modified": now,
|
||||
"edit_count": 0,
|
||||
"content_hash": content_hash,
|
||||
# Cap the persisted body — large blobs (DOCX/PDF/canary
|
||||
# artifacts in stage 7) are wasted disk on this side; the
|
||||
# decky filesystem holds the canonical bytes.
|
||||
"last_body": body[:65536],
|
||||
}
|
||||
try:
|
||||
await repo.record_synthetic_file(row)
|
||||
except Exception: # noqa: BLE001
|
||||
# Most likely the unique constraint on (decky_uuid, path)
|
||||
# fired — flip to update mode by looking up the existing row.
|
||||
existing = await repo.list_synthetic_files(
|
||||
decky_uuid=action.dst_uuid, limit=200,
|
||||
)
|
||||
match = next(
|
||||
(r for r in existing if r.get("path") == action.path), None,
|
||||
)
|
||||
if match is None:
|
||||
raise
|
||||
await repo.update_synthetic_file(
|
||||
match["uuid"],
|
||||
{
|
||||
"last_modified": now,
|
||||
"content_hash": content_hash,
|
||||
"last_body": body[:65536],
|
||||
"edit_count": int(match.get("edit_count", 0)) + 1,
|
||||
},
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user