Files
DECNET/decnet/canary/cultivator.py
anti b26dd8f529 feat(canary): API-trashing defense — 4-layer fingerprint validation
Adds per-mint nonce gating, structural shape validation, mint UUID
consistency checks, and a per-(token, IP) rate limiter to the canary
worker so attackers who extract a canary from a decky filesystem cannot
poison fingerprint forensics by replaying or forging ?d= submissions.

Changes:

base.py
  fingerprint_nonce: Optional[str] added to CanaryArtifact so generators
  can surface the nonce to the cultivator without coupling the generator
  directly to DB code.

obfuscator.py
  nonce_for(callback_token, mint_uuid): HMAC-SHA256 keyed on
  DECNET_CANARY_FINGERPRINT_SECRET, truncated to 16 hex chars.
  FingerprintSecretMissing raised at mint time if env var is unset.
  render_fingerprint_js() now accepts nonce= and substitutes MINT_NONCE.

fingerprint_payload.js
  New MINT_NONCE placeholder. Appended as &k= on all beacon URLs (bare-open,
  single-shot, chunked). Using &k= avoids colliding with &n= (chunk total).

fingerprint_html.py / fingerprint_svg.py
  Derive nonce via nonce_for() and pass to render_fingerprint_js(). Set
  artifact.fingerprint_nonce so the cultivator can persist it.

cultivator.py
  Passes fingerprint_nonce into create_canary_token() when present on the
  artifact; NULL for all non-fingerprint generators.

canary.py (model)
  fingerprint_nonce: Optional[str] = Field(default=None, max_length=16)
  added to CanaryToken. None for non-fingerprint tokens.

worker.py
  _extract_fingerprint now returns (meta_dict, parsed_fp) tuple.
  _record_hit accepts parsed_fp + raw_nonce and runs 4 layers after
  token lookup: nonce match, shape check, mint UUID consistency, rate limit.
  Each failure sets _fp_invalid_* flag and drops structured _fp.
  Trigger row always lands regardless.

tests/canary/conftest.py
  Session-scoped autouse fixture sets DECNET_CANARY_FINGERPRINT_SECRET so
  fingerprint generator and worker tests work offline.

tests
  5 new worker HTTP tests and 2 new generator tests covering each
  validation layer.
2026-04-29 17:41:04 -04:00

191 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Realism contract adapter for canary generators.
Stage 7 of the realism migration. The orchestrator's planner picks a
``canary_*`` :class:`~decnet.realism.taxonomy.ContentClass` 13% of
the time on file ticks; this module turns that pick into a
:class:`~decnet.canary.base.CanaryArtifact` (bytes the SSH driver
plants) plus a persisted :class:`~decnet.web.db.models.CanaryToken`
row so the canary worker recognises the slug when an attacker trips
it.
What this is NOT: it doesn't pick *when* canaries fire — that's the
realism planner's job. It doesn't decide *where* on the filesystem
the canary lands beyond what realism naming + persona conventions
already produce. It's a thin bytes-and-row factory bolted onto the
realism contract.
Stealth (per ``feedback_stealth.md``): we never leak the
``DECNET`` literal into anything that survives to the planted file.
The underlying generators are already stealth-clean; this wrapper
must not undo that.
"""
from __future__ import annotations
import os
import secrets as _secrets
from datetime import datetime, timezone
from typing import Any, Optional
from decnet.canary.base import CanaryArtifact, CanaryContext
from decnet.canary.factory import get_generator
from decnet.logging import get_logger
from decnet.realism.personas import login_for
from decnet.realism.taxonomy import ContentClass, Plan
log = get_logger("canary.cultivator")
# realism content_class → canary generator name. Mirrors
# :data:`decnet.canary.factory.KNOWN_GENERATORS`.
_CLASS_TO_GENERATOR: dict[ContentClass, str] = {
ContentClass.CANARY_AWS_CREDS: "aws_creds",
ContentClass.CANARY_ENV_FILE: "env_file",
ContentClass.CANARY_GIT_CONFIG: "git_config",
ContentClass.CANARY_SSH_KEY: "ssh_key",
ContentClass.CANARY_HONEYDOC: "honeydoc",
ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
ContentClass.CANARY_FINGERPRINT_HTML: "fingerprint_html",
ContentClass.CANARY_FINGERPRINT_SVG: "fingerprint_svg",
}
# Generator → CanaryKind. The trip surface (HTTP slug callback / DNS
# resolution / passive bait) determines how the canary worker matches
# an attacker callback to this token. Aligned with
# :data:`decnet.web.db.models.canary.CanaryKind`.
_GENERATOR_TO_KIND: dict[str, str] = {
"aws_creds": "aws_passive", # no embedded callback; passive bait
"env_file": "http",
"git_config": "http",
"honeydoc": "http",
"honeydoc_docx": "http",
"honeydoc_pdf": "http",
"ssh_key": "dns", # trip is DNS resolution of host comment
"mysql_dump": "dns", # trip is DNS resolution of subdomain
"fingerprint_html": "http", # obfuscated JS beacons GET /c/<slug>
"fingerprint_svg": "http", # same, embedded inside SVG <script>
}
# Path conventions per generator. The realism planner doesn't know
# about decoy-realistic credential locations (``~/.aws/credentials``,
# ``~/.git/config``); we map them per-class here so the planted
# artifact lands somewhere an attacker would actually look.
_DEFAULT_PATH: dict[ContentClass, str] = {
ContentClass.CANARY_AWS_CREDS: "/home/{persona}/.aws/credentials",
ContentClass.CANARY_ENV_FILE: "/home/{persona}/app/.env",
ContentClass.CANARY_GIT_CONFIG: "/home/{persona}/.git/config",
ContentClass.CANARY_SSH_KEY: "/home/{persona}/.ssh/id_rsa",
ContentClass.CANARY_HONEYDOC: "/home/{persona}/Documents/notes.html",
ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
ContentClass.CANARY_FINGERPRINT_HTML: "/home/{persona}/Documents/asset_directory.html",
ContentClass.CANARY_FINGERPRINT_SVG: "/home/{persona}/Documents/network_topology.svg",
}
def _path_for(plan: Plan) -> str:
"""Produce the canary placement path for *plan*.
The realism planner already filled in ``plan.target_path`` from
the namer, but canary placements have stronger conventions
(``~/.aws/credentials``, ``~/.ssh/id_rsa``) than the realism
namer's vocabulary. When :data:`_DEFAULT_PATH` has an entry,
that wins.
"""
template = _DEFAULT_PATH.get(plan.content_class)
if template is None:
return plan.target_path
return template.format(persona=login_for(plan.persona))
def _new_callback_token() -> str:
"""16 url-safe bytes — same shape canary slug fields use elsewhere."""
return _secrets.token_urlsafe(16)
async def cultivate(
plan: Plan,
repo: Any,
*,
http_base: Optional[str] = None,
dns_zone: Optional[str] = None,
created_by: str = "system",
) -> CanaryArtifact:
"""Realism-driven canary plant.
Build a :class:`CanaryContext`, ask the right generator for bytes,
persist a ``canary_tokens`` row so the canary worker can attribute
callbacks to this token, and return the artifact for the SSH
driver to plant.
*http_base* and *dns_zone* default to ``DECNET_CANARY_HTTP_BASE``
and ``DECNET_CANARY_DNS_ZONE`` env vars respectively — same
pattern the canary worker uses. When both are empty, generators
that need a callback host (``ssh_key`` DNS, ``mysql_dump``)
raise; the planner's caller logs and falls back to a non-canary
plan.
"""
if not plan.content_class.is_canary():
raise ValueError(
f"cultivate() called with non-canary content_class="
f"{plan.content_class!r}"
)
gen_name = _CLASS_TO_GENERATOR.get(plan.content_class)
if gen_name is None:
raise KeyError(
f"no canary generator mapped for content_class="
f"{plan.content_class!r}"
)
callback_token = _new_callback_token()
ctx = CanaryContext(
callback_token=callback_token,
http_base=http_base or os.environ.get("DECNET_CANARY_HTTP_BASE", ""),
dns_zone=dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE", ""),
persona="linux", # all our deckies are POSIX in MVP
)
generator = get_generator(gen_name)
artifact = generator.generate(ctx)
# The generator returns ``path=""`` (planter fills it normally).
# We have a realism-derived path on hand; stuff it in for the SSH
# driver's plant_file call AND the canary_tokens row.
placement_path = _path_for(plan)
# Persist the token row before planting so the canary worker can
# attribute a callback if the artifact trips during the plant
# itself (improbable but possible — DOCX viewers can preview
# autoplay-style).
token_data: dict = {
"kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
"decky_name": plan.decky_name,
"instrumenter": None,
"generator": gen_name,
"placement_path": placement_path,
"callback_token": callback_token,
"secret_seed": callback_token, # deterministic re-seed compatible
"placed_at": datetime.now(timezone.utc),
"created_by": created_by,
"state": "planted",
}
if artifact.fingerprint_nonce is not None:
token_data["fingerprint_nonce"] = artifact.fingerprint_nonce
await repo.create_canary_token(token_data)
# Carry the placement_path on the artifact so the orchestrator's
# plant_file call uses it. We don't mutate the generator's
# original — copy with the new path.
return CanaryArtifact(
path=placement_path,
content=artifact.content,
mode=artifact.mode,
mtime_offset=artifact.mtime_offset,
instrumenter=artifact.instrumenter,
generator=artifact.generator,
notes=list(artifact.notes),
)