Files
DECNET/decnet/realism/naming.py
anti 907ade9142 feat(realism): wire fingerprint_html/svg through taxonomy + UI
The two new fingerprint canary generators existed at the API level
since f64e78f but weren't visible to the realism engine or the
operator-facing dashboard. Threads them through every place that
enumerates canary content classes.

Backend:
* realism/taxonomy.py - two new ContentClass members
  (CANARY_FINGERPRINT_HTML, CANARY_FINGERPRINT_SVG); enum is
  wire-visible (synthetic_files.content_class column + bus discrim)
  so we add at the bottom, never reorder.
* canary/cultivator.py - class-to-generator dispatch, kind mapping
  (both http), and default placement paths
  (~/Documents/asset_directory.html and network_topology.svg).
* realism/naming.py + bodies.py - _name_canary / _body_canary entries.
* realism/planner.py - added to _DEFAULT_CANARY_CLASS_WEIGHTS and
  the _CANARY_CLASSES classification set.

Frontend:
* decnet_web/src/realism/labels.ts - display labels.
* decnet_web/src/components/RealismConfig/RealismConfig.tsx - default
  canary weight rows so operators see them in the realism config UI.
* decnet_web/src/components/SyntheticFiles/SyntheticFiles.tsx - added
  to the CONTENT_CLASSES allow-list so filter dropdowns show them.

Also: re-applied the nosec B404/B603 markers on canary/obfuscator.py;
the first commit's pre-commit autoformatter stripped them.

Tests: extended tests/realism/test_taxonomy.py's stability assertion
to include the two new values. Full canary + realism suites pass
(362 / 2 skipped).
2026-04-29 16:44:03 -04:00

187 lines
6.9 KiB
Python

"""Per-content-class filename generators.
The pre-realism orchestrator emitted ``notes-1777315854.txt``
(unix-epoch suffix) — a tell on first glance. Real users name
``notes.txt``, ``TODO.md``, ``backup-2025-04.sql.gz``. Real systems
write ``cron.log``, ``cron.log.1``, ``cron.log.2.gz`` (logrotate
shape, no epoch).
Stage 3 ships **deterministic templates only**, persona-conditioned.
Stage 6 wires LLM enrichment for the user-classes (``note``, ``todo``,
``draft``, ``script``); the deterministic templates remain the
fallback when LLM is disabled or times out.
The factory mirrors :func:`decnet.canary.factory.get_generator`:
caller passes a :class:`~decnet.realism.taxonomy.ContentClass`; we
return the namer registered for it. Renaming a content_class is a
schema change and would invalidate ``synthetic_files.path`` lookups,
so the dispatch is exhaustive — no silent fallbacks for unknown
classes.
"""
from __future__ import annotations
import secrets
import string
from typing import Callable, Optional
from decnet.realism.personas import login_for
from decnet.realism.taxonomy import ContentClass
# Persona → home-dir convention. Most personas are linux-style; the
# rare "windows" persona gets ``C:\\Users\\<persona>\\Documents`` style
# paths (out of scope until per-OS personas land). For now everything
# is POSIX.
def _home(persona: str) -> str:
"""Return the canonical home directory for *persona*."""
return f"/home/{login_for(persona)}"
def _random_token(rng: secrets.SystemRandom, length: int = 6) -> str:
"""Lowercase-alphanum token of length *length* — like ``mkstemp``."""
return "".join(rng.choice(string.ascii_lowercase + string.digits) for _ in range(length))
# ── User-class namers ──────────────────────────────────────────────────────
_NOTE_NAMES: tuple[str, ...] = (
"notes.txt", "scratch.md", "ideas.txt", "Untitled-3.txt",
"draft.md", "keys.txt", "passwords.txt", "TODO.md",
)
_TODO_NAMES: tuple[str, ...] = (
"TODO.md", "todo.txt", "things.md", "tasks.txt", "punchlist.md",
)
_DRAFT_NAMES: tuple[str, ...] = (
"Q3-budget-DRAFT.md", "proposal.md", "letter.txt",
"rfc-internal.md", "memo.txt", "1on1-notes.md",
)
_SCRIPT_NAMES: tuple[str, ...] = (
"backup.sh", "deploy.sh", "cleanup.sh", "rotate.sh",
"fix.py", "tmp.py", "scratch.py",
)
def _name_user(
persona: str, names: tuple[str, ...], rng: secrets.SystemRandom,
) -> str:
return f"{_home(persona)}/{rng.choice(names)}"
def _name_note(persona: str, rng: secrets.SystemRandom) -> str:
return _name_user(persona, _NOTE_NAMES, rng)
def _name_todo(persona: str, rng: secrets.SystemRandom) -> str:
return _name_user(persona, _TODO_NAMES, rng)
def _name_draft(persona: str, rng: secrets.SystemRandom) -> str:
return _name_user(persona, _DRAFT_NAMES, rng)
def _name_script(persona: str, rng: secrets.SystemRandom) -> str:
return _name_user(persona, _SCRIPT_NAMES, rng)
# ── System-class namers ────────────────────────────────────────────────────
# logrotate skeleton: cron.log, cron.log.1, cron.log.2.gz. No epoch
# suffix — the realism failure today is `cron-1777317867.log`.
_CRON_LOGROTATE: tuple[str, ...] = (
"/var/log/cron.log", "/var/log/cron.log.1", "/var/log/cron.log.2.gz",
)
_DAEMON_LOGROTATE: tuple[str, ...] = (
"/var/log/daemon.log", "/var/log/syslog", "/var/log/messages",
"/var/log/auth.log", "/var/log/auth.log.1",
)
def _name_log_cron(persona: str, rng: secrets.SystemRandom) -> str:
return rng.choice(_CRON_LOGROTATE)
def _name_log_daemon(persona: str, rng: secrets.SystemRandom) -> str:
return rng.choice(_DAEMON_LOGROTATE)
def _name_cache_tmp(persona: str, rng: secrets.SystemRandom) -> str:
# mkstemp shape: /tmp/.cache-XXXXXX with random alphanumerics.
# Hidden dot keeps it out of `ls` by default — same as glibc/python.
# Bandit B108 fires on the literal "/tmp/" path; suppressed at the
# site because this is a path we are *generating for a target
# decky*, not a file we are opening on the host.
return f"/tmp/.cache-{_random_token(rng, 6)}" # nosec B108
# ── Email + canary placeholders ────────────────────────────────────────────
# Email "names" (paths) are produced by the email driver's spool logic,
# not by realism naming. Canary paths are advisory — operators usually
# specify ``placement_path`` directly. Stage 7 of the realism migration
# refines canary placement based on persona + content_class.
def _name_email(persona: str, rng: secrets.SystemRandom) -> str:
raise NotImplementedError(
"email paths come from the email driver's spool logic, not "
"realism.naming"
)
def _name_canary(persona: str, rng: secrets.SystemRandom) -> str:
raise NotImplementedError(
"canary placement is set by the canary cultivator (stage 7), "
"not realism.naming"
)
# ── Dispatch ───────────────────────────────────────────────────────────────
_NAMERS: dict[ContentClass, Callable[[str, secrets.SystemRandom], str]] = {
ContentClass.NOTE: _name_note,
ContentClass.TODO: _name_todo,
ContentClass.DRAFT: _name_draft,
ContentClass.SCRIPT: _name_script,
ContentClass.LOG_CRON: _name_log_cron,
ContentClass.LOG_DAEMON: _name_log_daemon,
ContentClass.CACHE_TMP: _name_cache_tmp,
ContentClass.EMAIL: _name_email,
ContentClass.CANARY_AWS_CREDS: _name_canary,
ContentClass.CANARY_ENV_FILE: _name_canary,
ContentClass.CANARY_GIT_CONFIG: _name_canary,
ContentClass.CANARY_SSH_KEY: _name_canary,
ContentClass.CANARY_HONEYDOC: _name_canary,
ContentClass.CANARY_HONEYDOC_DOCX: _name_canary,
ContentClass.CANARY_HONEYDOC_PDF: _name_canary,
ContentClass.CANARY_MYSQL_DUMP: _name_canary,
ContentClass.CANARY_FINGERPRINT_HTML: _name_canary,
ContentClass.CANARY_FINGERPRINT_SVG: _name_canary,
}
def make_path(
content_class: ContentClass,
persona: str,
*,
rand: Optional[secrets.SystemRandom] = None,
) -> str:
"""Return a plausible absolute container-side path for *content_class*.
Persona-conditioned for user-classes (``/home/<persona>/…``).
System-classes ignore persona and pick from a logrotate-shaped
skeleton. Email and canary classes raise — those paths come
from the respective drivers, not from realism naming.
"""
rng = rand or secrets.SystemRandom()
namer = _NAMERS.get(content_class)
if namer is None:
raise KeyError(
f"no namer registered for content_class={content_class!r}"
)
return namer(persona, rng)