feat(realism): synthetic_files table + planner wiring + scheduler swap
Stage 3 of the realism migration. Replaces orchestrator/scheduler.py's
hardcoded _FILE_TEMPLATES/_USERS (3 templates emitting epoch-suffixed
filenames like notes-1777315854.txt with identical bodies per
template) with a persona-driven realism engine.
New surface:
- SyntheticFile SQLModel (synthetic_files table, UNIQUE on
decky_uuid+path) — per-(decky, path) state for the future
edit-in-place flow. Pre-v1, no _migrate_* helper.
- BaseRepository methods: record_synthetic_file,
update_synthetic_file, list_synthetic_files,
pick_random_synthetic_file_for_edit (used by stage 3b).
- realism/naming.py: per-content-class filename templates,
persona-conditioned. /var/log/cron.log + logrotate skeleton for
system-class; /home/<persona>/TODO.md, scratch.md, etc. for
user-class. Anti-regression test pins "no 8+ digit decimals in
basenames" (the realism failure today).
- realism/bodies.py: deterministic body templates per content_class.
TODO body uses checkbox markdown, script body has a shebang, cron
body matches syslog cron shape ("CRON[PID]: (user) CMD (...)").
- realism/planner.py: pick(deckies, now, rng) returns a Plan.
Diurnal-gated, weighted user/system content split (70/30 user
bias). Create-only in stage 3; edit branch lands in stage 3b.
Scheduler split:
- scheduler.pick is now traffic-only (sync).
- scheduler.pick_file is async, takes a repo, resolves personas
(Topology.email_personas for topology-source deckies; global
realism.personas_pool otherwise), and maps Plan -> FileAction.
- FileAction gains persona/content_class/mtime fields.
Worker:
- _one_tick rolls 50/50 between traffic and file each tick. After a
successful FileAction plant, _record_synthetic_file persists or
patches the synthetic_files row (catching the unique-constraint
collision on re-plant of the same path).
- SSHDriver._run_file passes action.mtime through to plant_file so
files don't all stamp at wall-clock-now.
This commit is contained in:
@@ -139,11 +139,14 @@ class SSHDriver(ActivityDriver):
|
||||
# FileAction's content is a string; the realism path uses
|
||||
# bytes-typed plant_file so binary blobs (DOCX/PDF, future
|
||||
# canary artifacts) survive the wire. Encode-once here.
|
||||
# mtime carries through from the realism planner so the file
|
||||
# doesn't stamp at wall-clock-now (the realism failure today).
|
||||
return await self.plant_file(
|
||||
action.dst_name,
|
||||
action.path,
|
||||
action.content.encode("utf-8"),
|
||||
mode=0o644,
|
||||
mtime=action.mtime,
|
||||
)
|
||||
|
||||
async def plant_file(
|
||||
|
||||
@@ -1,28 +1,30 @@
|
||||
"""Action picker for the orchestrator.
|
||||
|
||||
MVP policy: flat random — pick one (src, dst) pair where both deckies
|
||||
expose SSH, then choose one of {ssh-traffic, file-touch}. No diurnal
|
||||
shaping, no role-aware pairing — those land in v1.
|
||||
Stage-3 realism: file actions are sourced from
|
||||
:func:`decnet.realism.planner.pick`, not the old hardcoded
|
||||
``_FILE_TEMPLATES``/``_USERS`` constants. Persona resolution per
|
||||
decky still belongs here (the realism planner is pure of
|
||||
:class:`~decnet.web.db.repository.BaseRepository` knowledge) — we
|
||||
walk each decky to either ``Topology.email_personas`` or the
|
||||
``decnet.realism.personas_pool`` global pool, depending on
|
||||
``decky["source"]``, then hand the resolved set to the planner.
|
||||
|
||||
TrafficAction stays untouched: still a flat random pair-pick of
|
||||
SSH-capable deckies. Email actions land in stage 5 of the realism
|
||||
migration when the emailgen worker collapses into the orchestrator.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import secrets
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional, Sequence
|
||||
|
||||
# A small set of plausible filenames the orchestrator drops or refreshes.
|
||||
# Scope on purpose: the file driver is "prove the docker-exec write path
|
||||
# works", not "generate believable user activity". Realism is v2.
|
||||
# Paths target the filesystem *inside* a decoy container, not the host.
|
||||
# Bandit B108 is a host-side concern; suppressed at the data definition.
|
||||
_FILE_TEMPLATES: tuple[tuple[str, str], ...] = ( # nosec B108
|
||||
("/tmp/.cache-{ts}.tmp", "session={ts}\n"), # nosec B108
|
||||
("/var/log/cron-{ts}.log", "{ts} CRON[{n}]: ({user}) CMD (run-parts /etc/cron.daily)\n"),
|
||||
("/home/{user}/notes-{ts}.txt", "todo: rotate keys; check on backup task\n"),
|
||||
)
|
||||
|
||||
_USERS = ("admin", "ubuntu", "service")
|
||||
from decnet.realism import personas_pool
|
||||
from decnet.realism.personas import EmailPersona, parse_personas
|
||||
from decnet.realism.planner import pick as _realism_pick
|
||||
from decnet.realism.taxonomy import ContentClass, Plan
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -38,10 +40,21 @@ class TrafficAction:
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FileAction:
|
||||
"""One file plant request the SSH driver materialises.
|
||||
|
||||
Stage-3 realism: ``persona`` / ``content_class`` / ``mtime`` are
|
||||
populated when the action came through :func:`pick_file`. Older
|
||||
direct constructions (tests, manual operator drives) leave them
|
||||
at the defaults — back-compat for the pre-realism call sites
|
||||
that haven't migrated yet.
|
||||
"""
|
||||
dst_uuid: str
|
||||
dst_name: str
|
||||
path: str
|
||||
content: str
|
||||
persona: str = ""
|
||||
content_class: str = ContentClass.NOTE.value
|
||||
mtime: Optional[datetime] = None
|
||||
description: str = "file:create"
|
||||
|
||||
|
||||
@@ -60,38 +73,139 @@ def pick(
|
||||
*,
|
||||
rand: Optional[secrets.SystemRandom] = None,
|
||||
) -> Optional[Action]:
|
||||
"""Pick one action against the given decky set.
|
||||
"""Pick one *traffic* action against the given decky set.
|
||||
|
||||
Returns ``None`` when no action is possible (fewer than two SSH-capable
|
||||
deckies for traffic, or no deckies at all for file ops). The worker
|
||||
treats ``None`` as "skip this tick".
|
||||
Returns ``None`` when no SSH-capable pair is available. File
|
||||
actions are produced by :func:`pick_file` (async — needs the repo
|
||||
for persona resolution). The orchestrator worker calls one or the
|
||||
other per tick, weighted 50/50.
|
||||
"""
|
||||
rng = rand or secrets.SystemRandom()
|
||||
ssh_deckies = [d for d in deckies if _has_ssh(d) and d.get("ip")]
|
||||
if not ssh_deckies:
|
||||
if len(ssh_deckies) < 2:
|
||||
return None
|
||||
|
||||
kind = "traffic" if (len(ssh_deckies) >= 2 and rng.random() < 0.5) else "file"
|
||||
|
||||
if kind == "traffic":
|
||||
src, dst = rng.sample(ssh_deckies, 2)
|
||||
return TrafficAction(
|
||||
src_uuid=src["uuid"],
|
||||
src_name=src["name"],
|
||||
dst_uuid=dst["uuid"],
|
||||
dst_name=dst["name"],
|
||||
dst_ip=dst["ip"],
|
||||
)
|
||||
|
||||
dst = rng.choice(ssh_deckies)
|
||||
template, content_template = rng.choice(_FILE_TEMPLATES)
|
||||
ts = int(datetime.now(timezone.utc).timestamp())
|
||||
user = rng.choice(_USERS)
|
||||
path = template.format(ts=ts, user=user)
|
||||
content = content_template.format(ts=ts, user=user, n=rng.randint(1000, 99999))
|
||||
return FileAction(
|
||||
src, dst = rng.sample(ssh_deckies, 2)
|
||||
return TrafficAction(
|
||||
src_uuid=src["uuid"],
|
||||
src_name=src["name"],
|
||||
dst_uuid=dst["uuid"],
|
||||
dst_name=dst["name"],
|
||||
path=path,
|
||||
content=content,
|
||||
dst_ip=dst["ip"],
|
||||
)
|
||||
|
||||
|
||||
async def pick_file(
|
||||
deckies: Sequence[dict[str, Any]],
|
||||
repo: Any,
|
||||
*,
|
||||
now: Optional[datetime] = None,
|
||||
rand: Optional[secrets.SystemRandom] = None,
|
||||
) -> Optional[FileAction]:
|
||||
"""Realism-driven file action.
|
||||
|
||||
Resolves personas per decky (topology pool when the decky has a
|
||||
parent topology; global pool otherwise), filters to deckies in any
|
||||
persona's work hours, asks :func:`decnet.realism.planner.pick` to
|
||||
pick the (decky, persona, content_class, path, body, mtime), and
|
||||
maps the resulting :class:`Plan` to a :class:`FileAction` the
|
||||
SSH driver can dispatch.
|
||||
|
||||
Returns ``None`` when no decky has a non-empty persona pool with a
|
||||
persona currently in its active-hours window.
|
||||
"""
|
||||
rng = rand or secrets.SystemRandom()
|
||||
when = now or datetime.now(timezone.utc)
|
||||
|
||||
enriched = await _resolve_personas(deckies, repo)
|
||||
plan = _realism_pick(enriched, when, rand=rng)
|
||||
if plan is None:
|
||||
return None
|
||||
return FileAction(
|
||||
dst_uuid=plan.decky_uuid,
|
||||
dst_name=plan.decky_name,
|
||||
path=plan.target_path,
|
||||
content=plan.body_hint or "",
|
||||
persona=plan.persona,
|
||||
content_class=plan.content_class.value,
|
||||
mtime=plan.mtime,
|
||||
)
|
||||
|
||||
|
||||
async def _resolve_personas(
|
||||
deckies: Sequence[dict[str, Any]],
|
||||
repo: Any,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Attach a resolved persona list to each decky dict.
|
||||
|
||||
The realism planner expects each decky to carry
|
||||
``_realism_personas`` (list of :class:`EmailPersona`). We do the
|
||||
repo lookups here so the planner stays pure-of-DB.
|
||||
|
||||
Topology-source deckies pull from ``Topology.email_personas``.
|
||||
Fleet/shard deckies pull from the global pool
|
||||
(:func:`decnet.realism.personas_pool.load`). Decky source unknown
|
||||
→ fall back to global pool too; better noisy than silent.
|
||||
"""
|
||||
enriched: list[dict[str, Any]] = []
|
||||
topology_cache: dict[str, list[EmailPersona]] = {}
|
||||
global_personas: Optional[list[EmailPersona]] = None
|
||||
|
||||
for decky in deckies:
|
||||
# Files are planted via the SSH service, same as TrafficAction.
|
||||
# A decky without ssh has no realism file path today (windows
|
||||
# personas / SMB writes land in a future stage).
|
||||
if not _has_ssh(decky):
|
||||
continue
|
||||
|
||||
source = (decky.get("source") or "").lower()
|
||||
topology_id = decky.get("topology_id")
|
||||
|
||||
personas: list[EmailPersona] = []
|
||||
if source == "topology" and topology_id:
|
||||
if topology_id not in topology_cache:
|
||||
try:
|
||||
topology = await repo.get_topology(topology_id)
|
||||
except Exception: # noqa: BLE001
|
||||
topology = None
|
||||
topology_cache[topology_id] = _topology_personas(topology)
|
||||
personas = topology_cache[topology_id]
|
||||
else:
|
||||
if global_personas is None:
|
||||
# Lazy-load once per call; the global-pool cache inside
|
||||
# personas_pool already mtime-checks.
|
||||
global_personas = personas_pool.load()
|
||||
personas = global_personas
|
||||
|
||||
if not personas:
|
||||
continue
|
||||
enriched.append({**decky, "_realism_personas": personas})
|
||||
|
||||
return enriched
|
||||
|
||||
|
||||
def _topology_personas(topology: Optional[dict[str, Any]]) -> list[EmailPersona]:
|
||||
if not topology:
|
||||
return []
|
||||
raw = topology.get("email_personas")
|
||||
if raw is None:
|
||||
return []
|
||||
if isinstance(raw, list):
|
||||
return parse_personas(raw, language_default=topology.get("language_default") or "en")
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
return parse_personas(json.loads(raw), language_default=topology.get("language_default") or "en")
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
return []
|
||||
|
||||
|
||||
# Lightweight no-op alias kept so external callers that already import
|
||||
# ``Plan`` from the scheduler keep working through the migration.
|
||||
__all__ = [
|
||||
"Action",
|
||||
"FileAction",
|
||||
"Plan",
|
||||
"TrafficAction",
|
||||
"pick",
|
||||
"pick_file",
|
||||
]
|
||||
|
||||
@@ -98,11 +98,29 @@ async def orchestrator_worker(
|
||||
|
||||
|
||||
async def _one_tick(repo: BaseRepository, driver, bus) -> None:
|
||||
import secrets as _secrets
|
||||
|
||||
# Union view: MazeNET topology + unihost fleet + SWARM shards. Pre-fleet
|
||||
# this only saw topology_deckies and was permanently blind to MACVLAN /
|
||||
# IPVLAN unihost decoys.
|
||||
deckies = await repo.list_running_deckies()
|
||||
action = scheduler.pick(deckies)
|
||||
rng = _secrets.SystemRandom()
|
||||
|
||||
# Action-kind roll: 50/50 traffic vs file. Stage 5 of the realism
|
||||
# migration adds an email branch (when emailgen folds in). When a
|
||||
# roll yields nothing actionable (e.g. file branch with no personas
|
||||
# in any persona's work hours), we fall through to the other side
|
||||
# so a quiet half doesn't silence the whole tick.
|
||||
action = None
|
||||
if rng.random() < 0.5:
|
||||
action = scheduler.pick(deckies, rand=rng)
|
||||
if action is None:
|
||||
action = await scheduler.pick_file(deckies, repo, rand=rng)
|
||||
else:
|
||||
action = await scheduler.pick_file(deckies, repo, rand=rng)
|
||||
if action is None:
|
||||
action = scheduler.pick(deckies, rand=rng)
|
||||
|
||||
if action is None:
|
||||
# Report the actual SSH-eligible count (what the scheduler filters
|
||||
# to), not just len(deckies) — the old "running+ssh count=N" line
|
||||
@@ -128,6 +146,18 @@ async def _one_tick(repo: BaseRepository, driver, bus) -> None:
|
||||
result = await driver.run(action)
|
||||
row = events.to_row(action, result)
|
||||
await repo.record_orchestrator_event(row)
|
||||
# Persist realism state for FileAction so stage 3b's edit-in-place
|
||||
# has something to read back. Failure here is logged but doesn't
|
||||
# tank the tick — the orchestrator event is the source of truth
|
||||
# for "this action happened."
|
||||
if isinstance(action, scheduler.FileAction) and result.success:
|
||||
try:
|
||||
await _record_synthetic_file(repo, action, result)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"orchestrator: synthetic_files write failed dst=%s path=%s: %s",
|
||||
action.dst_uuid, action.path, exc,
|
||||
)
|
||||
|
||||
if bus is not None:
|
||||
topic = events.topic_for(action)
|
||||
@@ -151,3 +181,57 @@ async def _one_tick(repo: BaseRepository, driver, bus) -> None:
|
||||
"orchestrator tick kind=%s success=%s dst=%s",
|
||||
row["kind"], row["success"], row["dst_decky_uuid"],
|
||||
)
|
||||
|
||||
|
||||
async def _record_synthetic_file(repo, action, result) -> None:
|
||||
"""Persist a synthetic_files row after a successful FileAction plant.
|
||||
|
||||
Idempotent on ``(decky_uuid, path)``: when the unique constraint
|
||||
fires (the file existed already), we instead patch the existing
|
||||
row's ``last_modified`` / ``content_hash`` / ``last_body`` / bump
|
||||
``edit_count`` so the dashboard's "files this decky has grown"
|
||||
view stays accurate even when the orchestrator re-plants the same
|
||||
location.
|
||||
"""
|
||||
import hashlib
|
||||
from datetime import datetime, timezone
|
||||
|
||||
body = action.content or ""
|
||||
content_hash = hashlib.sha256(body.encode("utf-8")).hexdigest()
|
||||
now = datetime.now(timezone.utc)
|
||||
row = {
|
||||
"decky_uuid": action.dst_uuid,
|
||||
"path": action.path,
|
||||
"persona": action.persona,
|
||||
"content_class": action.content_class,
|
||||
"created_at": now,
|
||||
"last_modified": now,
|
||||
"edit_count": 0,
|
||||
"content_hash": content_hash,
|
||||
# Cap the persisted body — large blobs (DOCX/PDF/canary
|
||||
# artifacts in stage 7) are wasted disk on this side; the
|
||||
# decky filesystem holds the canonical bytes.
|
||||
"last_body": body[:65536],
|
||||
}
|
||||
try:
|
||||
await repo.record_synthetic_file(row)
|
||||
except Exception: # noqa: BLE001
|
||||
# Most likely the unique constraint on (decky_uuid, path)
|
||||
# fired — flip to update mode by looking up the existing row.
|
||||
existing = await repo.list_synthetic_files(
|
||||
decky_uuid=action.dst_uuid, limit=200,
|
||||
)
|
||||
match = next(
|
||||
(r for r in existing if r.get("path") == action.path), None,
|
||||
)
|
||||
if match is None:
|
||||
raise
|
||||
await repo.update_synthetic_file(
|
||||
match["uuid"],
|
||||
{
|
||||
"last_modified": now,
|
||||
"content_hash": content_hash,
|
||||
"last_body": body[:65536],
|
||||
"edit_count": int(match.get("edit_count", 0)) + 1,
|
||||
},
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user