diff --git a/decnet/realism/__init__.py b/decnet/realism/__init__.py new file mode 100644 index 00000000..61be3052 --- /dev/null +++ b/decnet/realism/__init__.py @@ -0,0 +1,27 @@ +"""Realism library — synthetic content + scheduling primitives. + +A shared, importable library that produces *plausible* artifacts (file +names, file bodies, email content) and the diurnal/persona machinery +that decides *when* and *for whom* to produce them. + +Workers (orchestrator, canary cultivator, future-emailgen-equivalents) +import from here. This package owns: + +* :mod:`decnet.realism.taxonomy` — :class:`ContentClass` enum and the + :class:`Plan` dataclass that planners emit. +* :mod:`decnet.realism.diurnal` — work-hours gating and a backdated + ``mtime`` sampler so planted files don't all stamp at wall-clock-now. +* :mod:`decnet.realism.planner` — picks ``(decky, persona, class, + action, mtime)`` tuples for the orchestrator's tick loop. +* :mod:`decnet.realism.personas` — persona schema (lifted from + ``orchestrator.emailgen.personas`` in stage 2 of the migration). +* :mod:`decnet.realism.prompts` — prompt builders, one per content + class, sharing an em-dash-suppression style helper. +* :mod:`decnet.realism.llm` — :class:`LLMBackend` ABC + factory + impl + subpackage; pluggable text-generation backend. + +The library has **no worker, no systemd unit, no CLI of its own** — +it's plain Python that consumers import. The CLI surface that does +exist (``decnet realism import-personas``) is registered by +:mod:`decnet.cli.realism` after stage 5 of the migration. +""" diff --git a/decnet/realism/diurnal.py b/decnet/realism/diurnal.py new file mode 100644 index 00000000..c81a3318 --- /dev/null +++ b/decnet/realism/diurnal.py @@ -0,0 +1,153 @@ +"""Work-hours gating and backdated mtime sampling. + +The current orchestrator stamps every planted file at wall-clock-now, +which is one of the realism failures driving this migration: a `cron.log` +that says it was last touched at 03:14:22 UTC on a workstation +attributed to a 9-to-5 admin reads as fake on first glance. + +Two helpers: + +* :func:`in_work_hours` — gate planner ticks so a persona's files only + appear inside the persona's ``active_hours`` window. Wrap-around + windows (``"22:00-06:00"``) are supported. +* :func:`sample_mtime` — return a backdated datetime whose hour-of-day + falls inside the persona's window, biased toward "recent but not + now". Drivers pass this to ``touch -d``. + +Clock and RNG are injectable so tests don't need to ``freeze_time`` or +patch :mod:`secrets`. +""" +from __future__ import annotations + +import secrets +from datetime import datetime, timedelta +from typing import Protocol + + +class _ClockLike(Protocol): + def __call__(self) -> datetime: ... + + +class _RandLike(Protocol): + def random(self) -> float: ... + def randint(self, a: int, b: int) -> int: ... + + +def _parse_window(window: str) -> tuple[int, int, int, int] | None: + """Parse ``"HH:MM-HH:MM"`` into ``(start_h, start_m, end_h, end_m)``. + + Returns ``None`` for malformed input — callers treat that as + "always-on" so a single config typo never silences the whole fleet + (mirrors :func:`decnet.orchestrator.emailgen.personas.in_active_hours` + semantics). + """ + try: + start_s, end_s = window.split("-") + start_h, start_m = (int(p) for p in start_s.split(":")) + end_h, end_m = (int(p) for p in end_s.split(":")) + except (ValueError, IndexError): + return None + if not (0 <= start_h < 24 and 0 <= end_h < 24): + return None + if not (0 <= start_m < 60 and 0 <= end_m < 60): + return None + return start_h, start_m, end_h, end_m + + +def in_work_hours(window: str, now: datetime) -> bool: + """Return ``True`` when *now* falls inside the persona window. + + *window* is ``"HH:MM-HH:MM"``. Wrap-around (``start > end``) means + "spans midnight." Equal ``start`` and ``end`` means always-on. + Malformed windows return ``True`` — fail-open so a typo doesn't + silence the fleet. + """ + parsed = _parse_window(window) + if parsed is None: + return True + start_h, start_m, end_h, end_m = parsed + if (start_h, start_m) == (end_h, end_m): + return True + cur = now.hour * 60 + now.minute + start = start_h * 60 + start_m + end = end_h * 60 + end_m + if start < end: + return start <= cur < end + # Wrap-around (e.g. 22:00-06:00). + return cur >= start or cur < end + + +def sample_mtime( + window: str, + now: datetime, + *, + rand: _RandLike | None = None, + backdate_min_hours: float = 0.5, + backdate_max_days: float = 14.0, +) -> datetime: + """Return a backdated ``datetime`` for ``touch -d`` after a write. + + The sampled time is in the past relative to *now*, capped at + *backdate_max_days* days ago and at least *backdate_min_hours* ago. + Weighted toward recent — half-life roughly 2 days — so most planted + files look "edited recently" without all clustering at +30min. + + The hour-of-day of the result is forced into *window* so an + `admin` persona's `TODO.md` doesn't carry an mtime of 03:14:22. + Wrap-around windows are honoured. + + Falls back to a uniform 0.5h–14d backdate if *window* is malformed. + """ + rng = rand or secrets.SystemRandom() + parsed = _parse_window(window) + + # Exponential-ish backdate via -ln(u): heavier mass near "recent". + # Cap by clipping; cheap and good enough for realism. + u = max(rng.random(), 1e-6) # avoid log(0) + import math + span_hours = max(backdate_min_hours, min(backdate_max_days * 24, -math.log(u) * 12.0)) + candidate = now - timedelta(hours=span_hours) + + if parsed is None: + return candidate + + start_h, start_m, end_h, end_m = parsed + if (start_h, start_m) == (end_h, end_m): + return candidate + + # If the candidate's hour-of-day is outside the window, snap it into + # the window on the same calendar date — preserves the "this many + # days ago" feel while making the clock-face credible. + cur = candidate.hour * 60 + candidate.minute + start = start_h * 60 + start_m + end = end_h * 60 + end_m + if start < end: + in_window = start <= cur < end + snap_minutes = rng.randint(start, max(start, end - 1)) + else: + # Wrap-around: in-window if cur is in either segment. + in_window = cur >= start or cur < end + # Snap into the larger of the two segments by total length. + before_midnight = (24 * 60) - start + after_midnight = end + if before_midnight >= after_midnight: + snap_minutes = rng.randint(start, 24 * 60 - 1) + else: + snap_minutes = rng.randint(0, max(0, end - 1)) + + if in_window: + return candidate + snapped = candidate.replace( + hour=snap_minutes // 60, + minute=snap_minutes % 60, + second=rng.randint(0, 59), + microsecond=0, + ) + # If the hour-snap pushed us too close to *now* (candidate was + # earlier today but the random in-window minute landed near or + # later than the current clock), shift back a full day so the + # result honours the min-backdate floor. + floor = now - timedelta(hours=backdate_min_hours) + while snapped > floor: + snapped -= timedelta(days=1) + return snapped diff --git a/decnet/realism/llm/__init__.py b/decnet/realism/llm/__init__.py new file mode 100644 index 00000000..887e3348 --- /dev/null +++ b/decnet/realism/llm/__init__.py @@ -0,0 +1,8 @@ +"""LLM backend ABC + factory + impls. + +Populated in stage 2 of the realism migration: lifts the existing +``orchestrator.emailgen.llm`` subpackage as-is (``base``, ``factory``, +``impl/ollama``, ``impl/fake``). Stage 6 adds ``circuit.py`` for +cross-call breaker behaviour. +""" +from __future__ import annotations diff --git a/decnet/realism/personas.py b/decnet/realism/personas.py new file mode 100644 index 00000000..13fce94b --- /dev/null +++ b/decnet/realism/personas.py @@ -0,0 +1,9 @@ +"""Persona schema — placeholder for stage 2. + +In stage 2 of the realism migration, this module receives the real +persona schema currently living at +``decnet.orchestrator.emailgen.personas`` (``EmailPersona``, +``parse_personas``, ``in_active_hours``). Stage 1 keeps it empty so +the import path is reserved without behaviour. +""" +from __future__ import annotations diff --git a/decnet/realism/planner.py b/decnet/realism/planner.py new file mode 100644 index 00000000..3d22e92f --- /dev/null +++ b/decnet/realism/planner.py @@ -0,0 +1,53 @@ +"""Realism planner — picks the next ``(decky, persona, class, action)`` tuple. + +Stage-1 stub: the public signature is in place so the orchestrator +worker (stage 3) can import it, but the body returns ``None`` ("nothing +to do this tick") until stage 3 wires the synthetic_files table and +naming/body generators. + +The eventual policy lives entirely in :func:`pick`; downstream +consumers should not branch on ``ContentClass`` themselves — let the +planner decide weights and rate-limits in one place. +""" +from __future__ import annotations + +import secrets +from datetime import datetime +from typing import Any, Optional, Sequence + +from decnet.realism.taxonomy import Plan + + +def pick( + deckies: Sequence[dict[str, Any]], + now: datetime, + *, + repo: Any = None, + rand: Optional[secrets.SystemRandom] = None, +) -> Optional[Plan]: + """Return the next :class:`Plan` for the orchestrator's tick. + + Stage-1 stub returns ``None`` unconditionally so the orchestrator + can import this function before the real implementation lands. The + full policy (diurnal gate, action distribution 60/30/10 + create/edit/leave, content-class weights, canary rate-limit) lands + in stage 3 of the realism migration. + + Parameters + ---------- + deckies : + Output of :meth:`BaseRepository.list_running_deckies`. Each + entry must carry ``uuid``, ``name``, ``services``, + ``email_personas`` (topology-pool JSON or list). + now : + Tick timestamp. Injected so tests don't need to monkey-patch + :func:`datetime.utcnow`. + repo : + :class:`BaseRepository` for synthetic_files lookup (edit + action). Optional in stage 1; required from stage 3 onward. + rand : + RNG for sampling. Defaults to a fresh + :class:`secrets.SystemRandom`. + """ + _ = (deckies, now, repo, rand) # silence unused-arg until stage 3 + return None diff --git a/decnet/realism/prompts/__init__.py b/decnet/realism/prompts/__init__.py new file mode 100644 index 00000000..301a05b4 --- /dev/null +++ b/decnet/realism/prompts/__init__.py @@ -0,0 +1,7 @@ +"""Prompt builders for LLM-enriched content. + +Populated in stage 2 (``email.py`` lifted from +``orchestrator.emailgen.prompt``) and stage 6 (``filebody.py``, +``filename.py``, ``_style.py`` for em-dash suppression). +""" +from __future__ import annotations diff --git a/decnet/realism/taxonomy.py b/decnet/realism/taxonomy.py new file mode 100644 index 00000000..703e4ae0 --- /dev/null +++ b/decnet/realism/taxonomy.py @@ -0,0 +1,150 @@ +"""Content classes and the :class:`Plan` dataclass. + +The planner emits :class:`Plan` instances; drivers consume them. Every +planted artifact (inert noise file, email, callback-bearing canary) +maps to exactly one :class:`ContentClass` member, which is what the +realism engine uses to dispatch to the right namer / body generator / +prompt template. + +Categories: + +* **User content** (LLM-eligible): ``note``, ``todo``, ``draft``, + ``script``. Created by humans on workstations; LLM enrichment makes + them feel lived-in. +* **System content** (deterministic only): ``log_cron``, ``log_daemon``, + ``cache_tmp``. These are *supposed* to look formulaic — that's how + cron/journald actually write them. LLM here would harm realism. +* **Email** (LLM-eligible): one persona writing to another. Owned by + the email driver, not the file driver. +* **Canary** (deterministic, callback-bearing): one ``canary_*`` member + per :mod:`decnet.canary.factory.KNOWN_GENERATORS` entry. Picked + rarely and rate-limited per-decky by the planner. +""" +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from enum import StrEnum +from typing import Literal, Optional + + +class ContentClass(StrEnum): + """The kind of artifact a planner has decided to produce. + + Values are stable over the wire — they're persisted on + ``synthetic_files.content_class`` and used as bus-event discriminants + so renaming a member is a schema change. Add new members at the + bottom; never reorder. + """ + + # User-generated, LLM-enrichable + NOTE = "note" + TODO = "todo" + DRAFT = "draft" + SCRIPT = "script" + + # System-generated, template-only (LLM would harm realism) + LOG_CRON = "log_cron" + LOG_DAEMON = "log_daemon" + CACHE_TMP = "cache_tmp" + + # Email — owned by the email driver, planner picks the action shape + EMAIL = "email" + + # Callback-bearing — provided by decnet.canary.cultivator at + # dispatch time, not by realism.bodies. One member per generator + # in decnet.canary.factory.KNOWN_GENERATORS. + CANARY_AWS_CREDS = "canary_aws_creds" + CANARY_ENV_FILE = "canary_env_file" + CANARY_GIT_CONFIG = "canary_git_config" + CANARY_SSH_KEY = "canary_ssh_key" + CANARY_HONEYDOC = "canary_honeydoc" + CANARY_HONEYDOC_DOCX = "canary_honeydoc_docx" + CANARY_HONEYDOC_PDF = "canary_honeydoc_pdf" + CANARY_MYSQL_DUMP = "canary_mysql_dump" + + def is_canary(self) -> bool: + return self.value.startswith("canary_") + + def is_user_class(self) -> bool: + return self in ( + ContentClass.NOTE, + ContentClass.TODO, + ContentClass.DRAFT, + ContentClass.SCRIPT, + ) + + def is_system_class(self) -> bool: + return self in ( + ContentClass.LOG_CRON, + ContentClass.LOG_DAEMON, + ContentClass.CACHE_TMP, + ) + + +PlanAction = Literal["create", "edit", "rotate"] + + +@dataclass(frozen=True) +class Plan: + """One realism decision: what to do, where, as whom, when. + + Frozen so the planner can return the same instance to multiple + consumers (e.g. orchestrator dispatcher + canary cultivator) without + them stomping each other's view of the schedule. + + Attributes + ---------- + decky_uuid, decky_name : + Target decky. Both carried so drivers don't need a repo + round-trip to map UUID → container name. + persona : + Persona name (``EmailPersona.name``) — this is the user the + action is "performed by." Sampled from the topology's persona + pool at plan time. + content_class : + :class:`ContentClass` member. Drives namer/body dispatch. + action : + ``"create"`` mints a new artifact; ``"edit"`` mutates a + previously-planted one (read-modify-write — requires + :attr:`previous_body`); ``"rotate"`` is the log-rotation shape + (``cron.log`` → ``cron.log.1``). + target_path : + Absolute container-side path the driver should write. Already + persona-aware (e.g. ``/home/admin/TODO.md`` not + ``/home/{user}/TODO.md``). + mtime : + Backdated wall-clock the driver should ``touch -d`` after + writing. Sampled by :func:`decnet.realism.diurnal.sample_mtime` + so files don't all stamp at the moment they were created. + body_hint : + Deterministic body the engine has *already* committed to. LLM + enrichment, when enabled, may replace it but on timeout/failure + the driver falls back to this — so the tick never blocks + unboundedly. + previous_body : + Required for ``action="edit"``. The bytes the driver read back + from the decky before mutating; passed to + :func:`decnet.realism.bodies.next_iteration`. + """ + + decky_uuid: str + decky_name: str + persona: str + content_class: ContentClass + action: PlanAction + target_path: str + mtime: datetime + body_hint: Optional[str] = None + previous_body: Optional[str] = None + notes: tuple[str, ...] = field(default_factory=tuple) + + def __post_init__(self) -> None: + if self.action == "edit" and self.previous_body is None: + # Belt-and-braces: the planner produced an edit Plan without + # the prior body. The driver would either have to make a + # second docker exec to re-read or silently degrade to + # create. Both bad. Fail loudly at construction. + raise ValueError( + "Plan.action='edit' requires previous_body; got None" + ) diff --git a/tests/realism/__init__.py b/tests/realism/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/realism/test_diurnal.py b/tests/realism/test_diurnal.py new file mode 100644 index 00000000..77c4bfc7 --- /dev/null +++ b/tests/realism/test_diurnal.py @@ -0,0 +1,120 @@ +"""Coverage for :mod:`decnet.realism.diurnal`. + +Two functions to exercise: + +* :func:`in_work_hours` — straightforward window membership including + the wrap-around (``22:00-06:00``) case and the fail-open behaviour + on malformed windows. +* :func:`sample_mtime` — must (a) return a ``datetime`` strictly in + the past, (b) clip to the configured backdate cap, and (c) snap the + hour-of-day into the persona's window when the unconstrained + candidate would land outside. +""" +from __future__ import annotations + +import random +from datetime import datetime, timedelta, timezone + +import pytest + +from decnet.realism.diurnal import in_work_hours, sample_mtime + + +# Fixed 'now' for reproducible tests — Monday 2026-04-27 14:00 UTC. +_NOW = datetime(2026, 4, 27, 14, 0, tzinfo=timezone.utc) + + +# ---- in_work_hours ----------------------------------------------------- + +@pytest.mark.parametrize( + "now_hour,now_min,window,expected", + [ + (10, 0, "09:00-18:00", True), + (8, 59, "09:00-18:00", False), + (9, 0, "09:00-18:00", True), + (18, 0, "09:00-18:00", False), # exclusive end + (17, 59, "09:00-18:00", True), + (23, 30, "22:00-06:00", True), # wrap-around: late + (3, 0, "22:00-06:00", True), # wrap-around: early + (12, 0, "22:00-06:00", False), # wrap-around: middle of day + ], +) +def test_in_work_hours_window_membership( + now_hour: int, now_min: int, window: str, expected: bool, +) -> None: + now = _NOW.replace(hour=now_hour, minute=now_min) + assert in_work_hours(window, now) is expected + + +def test_in_work_hours_equal_start_end_means_always_on() -> None: + # A persona pegged "00:00-00:00" should never be silenced by the + # diurnal gate — interpreted as "no schedule". + assert in_work_hours("00:00-00:00", _NOW) is True + + +@pytest.mark.parametrize( + "garbage", + ["not-a-window", "9-18", "09:00", "25:00-26:00", "09:00-18:99", ""], +) +def test_malformed_window_fails_open(garbage: str) -> None: + # The fleet must not silence on a typo — same fail-open semantics + # as decnet.orchestrator.emailgen.personas.in_active_hours. + assert in_work_hours(garbage, _NOW) is True + + +# ---- sample_mtime ------------------------------------------------------ + +def test_sample_mtime_is_in_the_past() -> None: + rng = random.Random(0) + for _ in range(20): + mt = sample_mtime("09:00-18:00", _NOW, rand=rng) + assert mt < _NOW, f"sample_mtime returned future: {mt} >= {_NOW}" + + +def test_sample_mtime_respects_backdate_cap() -> None: + rng = random.Random(0) + cap_days = 7.0 + for _ in range(50): + mt = sample_mtime( + "09:00-18:00", _NOW, rand=rng, + backdate_max_days=cap_days, backdate_min_hours=0.5, + ) + assert _NOW - mt <= timedelta(days=cap_days) + timedelta(hours=1) + assert _NOW - mt >= timedelta(hours=0.5) - timedelta(seconds=1) + + +def test_sample_mtime_snaps_hour_into_window() -> None: + # Force a tight window then assert the hour-of-day is always in it. + rng = random.Random(42) + window = "09:00-18:00" + for _ in range(60): + mt = sample_mtime(window, _NOW, rand=rng) + assert 9 <= mt.hour < 18, ( + f"hour {mt.hour} fell outside {window} on {mt.isoformat()}" + ) + + +def test_sample_mtime_handles_wrap_around_window() -> None: + rng = random.Random(123) + for _ in range(40): + mt = sample_mtime("22:00-06:00", _NOW, rand=rng) + assert mt.hour >= 22 or mt.hour < 6, ( + f"hour {mt.hour} fell outside wrap window on {mt.isoformat()}" + ) + + +def test_sample_mtime_malformed_window_does_not_snap() -> None: + # When the window can't be parsed, just return the unconstrained + # backdate. Belt-and-braces: shouldn't crash, shouldn't future-stamp. + rng = random.Random(0) + mt = sample_mtime("garbage", _NOW, rand=rng) + assert mt < _NOW + + +def test_sample_mtime_is_deterministic_per_seed() -> None: + # The diurnal sampler accepts a Random — pinning the seed must + # produce stable output, otherwise tests can't assert anything + # tighter than "returns a datetime in the past." + a = sample_mtime("09:00-18:00", _NOW, rand=random.Random(7)) + b = sample_mtime("09:00-18:00", _NOW, rand=random.Random(7)) + assert a == b diff --git a/tests/realism/test_taxonomy.py b/tests/realism/test_taxonomy.py new file mode 100644 index 00000000..34fcb0f0 --- /dev/null +++ b/tests/realism/test_taxonomy.py @@ -0,0 +1,102 @@ +"""Coverage for :mod:`decnet.realism.taxonomy`. + +The enum values are persisted on ``synthetic_files.content_class`` and +flow through bus topics — renaming a member is a schema change, so the +stable-list test pins the wire format. ``Plan`` invariants (frozen, +edit requires previous_body) are tested too because the planner relies +on construction-time validation rather than a separate validator pass. +""" +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest + +from decnet.realism.taxonomy import ContentClass, Plan + + +def test_content_class_values_are_stable() -> None: + # If anyone renames or reorders, the assertion explodes — the + # enum is wire-visible (synthetic_files.content_class column, + # bus event payloads) so changes need a schema bump elsewhere. + assert {c.value for c in ContentClass} == { + "note", "todo", "draft", "script", + "log_cron", "log_daemon", "cache_tmp", + "email", + "canary_aws_creds", "canary_env_file", "canary_git_config", + "canary_ssh_key", "canary_honeydoc", "canary_honeydoc_docx", + "canary_honeydoc_pdf", "canary_mysql_dump", + } + + +@pytest.mark.parametrize("name", ["NOTE", "TODO", "DRAFT", "SCRIPT"]) +def test_user_classes_classified(name: str) -> None: + cls = ContentClass[name] + assert cls.is_user_class() + assert not cls.is_system_class() + assert not cls.is_canary() + + +@pytest.mark.parametrize("name", ["LOG_CRON", "LOG_DAEMON", "CACHE_TMP"]) +def test_system_classes_classified(name: str) -> None: + cls = ContentClass[name] + assert cls.is_system_class() + assert not cls.is_user_class() + assert not cls.is_canary() + + +def test_canary_members_all_classified() -> None: + canaries = [c for c in ContentClass if c.value.startswith("canary_")] + assert canaries, "expected at least one canary content_class" + for c in canaries: + assert c.is_canary() + assert not c.is_user_class() + assert not c.is_system_class() + + +def test_email_is_neither_user_nor_system_nor_canary() -> None: + # Email lives on its own track — same content engine but a + # different driver and a different table. Classification helpers + # must not falsely group it into file-class buckets. + assert ContentClass.EMAIL.value == "email" + assert not ContentClass.EMAIL.is_user_class() + assert not ContentClass.EMAIL.is_system_class() + assert not ContentClass.EMAIL.is_canary() + + +def _plan(**kw): + defaults = dict( + decky_uuid="d-1", + decky_name="alpha", + persona="admin", + content_class=ContentClass.NOTE, + action="create", + target_path="/home/admin/notes.txt", + mtime=datetime(2026, 4, 25, 11, 30, tzinfo=timezone.utc), + body_hint="todo: rotate keys", + ) + defaults.update(kw) + return Plan(**defaults) + + +def test_plan_is_frozen() -> None: + p = _plan() + with pytest.raises(Exception): # FrozenInstanceError or AttributeError + p.persona = "ubuntu" # type: ignore[misc] + + +def test_edit_plan_requires_previous_body() -> None: + with pytest.raises(ValueError, match="previous_body"): + _plan(action="edit", previous_body=None) + + +def test_edit_plan_with_previous_body_succeeds() -> None: + p = _plan(action="edit", previous_body="- [ ] rotate keys\n") + assert p.action == "edit" + assert p.previous_body == "- [ ] rotate keys\n" + + +def test_create_plan_does_not_need_previous_body() -> None: + p = _plan(action="create", previous_body=None) + assert p.action == "create" + assert p.previous_body is None