/repo.git suffix gives us a realistic-looking
+ # path the worker can route on a single ``startswith("/c/")``
+ # check, while still surviving a quick grep for the slug.
+ url = f"{base}/c/{slug}/repo.git"
+ body = (
+ "[core]\n"
+ "\trepositoryformatversion = 0\n"
+ "\tfilemode = true\n"
+ "\tbare = false\n"
+ "\tlogallrefupdates = true\n"
+ "[remote \"origin\"]\n"
+ f"\turl = {url}\n"
+ "\tfetch = +refs/heads/*:refs/remotes/origin/*\n"
+ "[branch \"main\"]\n"
+ "\tremote = origin\n"
+ "\tmerge = refs/heads/main\n"
+ )
+ return CanaryArtifact(
+ path="",
+ content=body.encode("utf-8"),
+ mode=0o644,
+ mtime_offset=-86400 * 30, # checked out a month ago
+ generator=self.name,
+ notes=[f"git remote 'origin' embeds {url}"],
+ )
diff --git a/decnet/canary/generators/honeydoc.py b/decnet/canary/generators/honeydoc.py
new file mode 100644
index 00000000..455460b3
--- /dev/null
+++ b/decnet/canary/generators/honeydoc.py
@@ -0,0 +1,61 @@
+"""Built-in honeydoc — a minimal HTML "report" with a tracking pixel.
+
+This is the *fallback* honeydoc used when the operator hasn't
+uploaded a real document. The HTML instrumenter handles operator
+uploads via :mod:`decnet.canary.instrumenters.html`; this generator
+exists so the deploy-time baseline can plant *something* convincing
+without first prompting the operator to drop a file.
+
+The realism here is intentionally modest: a Documents-folder HTML
+page with internal-looking content and a 1×1 remote image at the
+bottom whose ``src`` is the canary callback URL. Most desktop
+HTML renderers fetch the image as soon as the file is opened in a
+browser preview, so opening the doc trips the callback.
+
+Operators who want a richer artifact should upload their own DOCX
+or PDF; the corresponding instrumenter embeds the same callback in
+the appropriate format.
+"""
+from __future__ import annotations
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+
+
+class HoneydocGenerator(CanaryGenerator):
+ name = "honeydoc"
+
+ def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+ base = ctx.http_base.rstrip("/")
+ slug = ctx.callback_token
+ pixel_url = f"{base}/c/{slug}"
+ body = (
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "Q3 Operations Review — DRAFT\n"
+ "\n"
+ "\n"
+ "Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)
\n"
+ "Forecast and remediation timeline below. Numbers are\n"
+ "preliminary and subject to revision before the all-hands.
\n"
+ "\n"
+ "| Region | Incidents | MTTR (h) |
\n"
+ "| us-east | 14 | 3.2 |
\n"
+ "| us-west | 9 | 4.7 |
\n"
+ "| eu-central | 22 | 2.1 |
\n"
+ "
\n"
+ "Internal contact: "
+ "secops@internal
\n"
+ f"
\n"
+ "\n"
+ "\n"
+ )
+ return CanaryArtifact(
+ path="",
+ content=body.encode("utf-8"),
+ mode=0o644, # docs are typically world-readable
+ mtime_offset=-86400 * 21, # 3 weeks ago
+ generator=self.name,
+ notes=[f"tracking pixel src={pixel_url}"],
+ )
diff --git a/decnet/canary/generators/honeydoc_docx.py b/decnet/canary/generators/honeydoc_docx.py
new file mode 100644
index 00000000..35456a23
--- /dev/null
+++ b/decnet/canary/generators/honeydoc_docx.py
@@ -0,0 +1,133 @@
+"""Real-DOCX honeydoc generator.
+
+Synthesises a minimal but structurally valid DOCX from scratch via
+stdlib :mod:`zipfile`, then uses the same external-image relationship
+trick that powers :mod:`decnet.canary.instrumenters.docx` to embed
+the callback URL. No python-docx dependency.
+
+The output opens cleanly in Word / LibreOffice; both fetch the
+external image relationship on document load.
+"""
+from __future__ import annotations
+
+import io
+import zipfile
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+from decnet.canary.instrumenters.docx import _drawing, _next_rid
+
+
+_CONTENT_TYPES = (
+ ''
+ ''
+ ''
+ ''
+ ''
+ ''
+).encode()
+
+_PACKAGE_RELS = (
+ ''
+ ''
+ ''
+ ''
+).encode()
+
+_BODY_PARAGRAPHS = (
+ "Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)",
+ "",
+ "Forecast and remediation timeline below. Numbers are preliminary "
+ "and subject to revision before the all-hands.",
+ "",
+ "Region Incidents MTTR (h)",
+ "us-east 14 3.2",
+ "us-west 9 4.7",
+ "eu-central 22 2.1",
+ "",
+ "Internal contact: secops@internal",
+)
+
+
+def _document_xml(rid_with_drawing: str | None = None) -> bytes:
+ """Build the body XML.
+
+ ``rid_with_drawing`` is the rId of the external image relationship;
+ when set, we append the same ```` element that the DOCX
+ instrumenter inserts so the body references the external resource.
+ """
+ paragraphs = []
+ for line in _BODY_PARAGRAPHS:
+ if line:
+ paragraphs.append(
+ ""
+ + _xml_escape(line)
+ + ""
+ )
+ else:
+ paragraphs.append("")
+ body = "".join(paragraphs)
+ drawing = _drawing(rid_with_drawing).decode() if rid_with_drawing else ""
+ return (
+ ''
+ ''
+ f'{body}{drawing}'
+ ''
+ ).encode()
+
+
+def _xml_escape(s: str) -> str:
+ return (
+ s.replace("&", "&")
+ .replace("<", "<")
+ .replace(">", ">")
+ )
+
+
+def _document_rels(rid: str, url: str) -> bytes:
+ return (
+ ''
+ ''
+ f''
+ ''
+ ).encode()
+
+
+class HoneydocDocxGenerator(CanaryGenerator):
+ name = "honeydoc_docx"
+
+ def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+ url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+ # Pick a stable rId — there's only one relationship in the
+ # synthesised file, so any unused id works. Reuse the
+ # instrumenter's allocator against the bare relationships
+ # skeleton for parity with operator-uploaded DOCX flow.
+ skeleton = (
+ b''
+ b''
+ b''
+ )
+ rid = _next_rid(skeleton)
+
+ out = io.BytesIO()
+ with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
+ zf.writestr("[Content_Types].xml", _CONTENT_TYPES)
+ zf.writestr("_rels/.rels", _PACKAGE_RELS)
+ zf.writestr("word/document.xml", _document_xml(rid))
+ zf.writestr("word/_rels/document.xml.rels", _document_rels(rid, url))
+
+ return CanaryArtifact(
+ path="",
+ content=out.getvalue(),
+ mode=0o644,
+ mtime_offset=-86400 * 21,
+ generator=self.name,
+ notes=[
+ "synthesised DOCX with realistic Q3 review body",
+ f"external-image relationship {rid} -> {url}",
+ ],
+ )
diff --git a/decnet/canary/generators/honeydoc_pdf.py b/decnet/canary/generators/honeydoc_pdf.py
new file mode 100644
index 00000000..400271ff
--- /dev/null
+++ b/decnet/canary/generators/honeydoc_pdf.py
@@ -0,0 +1,127 @@
+"""Real-PDF honeydoc generator (uses :mod:`pikepdf`).
+
+Builds a one-page PDF with the same Q3-review body as the HTML/DOCX
+flavors and installs an ``/OpenAction`` ``/URI`` action on the
+catalog so most viewers fire the callback the moment the document
+opens.
+
+Pikepdf is now a hard dependency for this generator (the operator
+installed it explicitly so we can use it). We still surface a
+clear :class:`InstrumenterRejectedError` when imports fail, so a
+deployment without pikepdf can fall back to the DOCX or HTML
+generators rather than crashing the API.
+"""
+from __future__ import annotations
+
+import io
+
+from decnet.canary.base import (
+ CanaryArtifact,
+ CanaryContext,
+ CanaryGenerator,
+ InstrumenterRejectedError,
+)
+
+
+_BODY_LINES = (
+ ("Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)", 14),
+ ("", 12),
+ ("Forecast and remediation timeline below.", 11),
+ ("Numbers are preliminary, subject to revision.", 11),
+ ("", 12),
+ ("Region Incidents MTTR (h)", 11),
+ ("us-east 14 3.2", 11),
+ ("us-west 9 4.7", 11),
+ ("eu-central 22 2.1", 11),
+ ("", 12),
+ ("Internal contact: secops@internal", 11),
+)
+
+
+class HoneydocPdfGenerator(CanaryGenerator):
+ name = "honeydoc_pdf"
+
+ def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+ try:
+ from pikepdf import Pdf, Name, Dictionary, String # type: ignore[import-not-found]
+ except ImportError as e:
+ raise InstrumenterRejectedError(
+ "honeydoc_pdf requires pikepdf; install it (`pip install "
+ "pikepdf`) or pick honeydoc / honeydoc_docx instead."
+ ) from e
+
+ url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+
+ pdf = Pdf.new()
+ # Helvetica is one of the 14 PDF base fonts — every viewer ships
+ # it, so no font embedding is required.
+ font = pdf.make_indirect(Dictionary(
+ Type=Name("/Font"),
+ Subtype=Name("/Type1"),
+ BaseFont=Name("/Helvetica"),
+ ))
+
+ # Build a single content stream that writes each body line at a
+ # decreasing y-coordinate. PDF coordinates start at the bottom-
+ # left (US Letter = 612 x 792 points); we lay out lines roughly
+ # 18 points apart starting near the top.
+ ops: list[str] = ["BT /F1 12 Tf 72 750 Td"]
+ first = True
+ for line, size in _BODY_LINES:
+ if not first:
+ ops.append("0 -18 Td")
+ first = False
+ ops.append(f"/F1 {size} Tf")
+ ops.append(f"({_pdf_escape(line)}) Tj")
+ ops.append("ET")
+ content_bytes = "\n".join(ops).encode("latin-1")
+
+ content_stream = pdf.make_stream(content_bytes)
+
+ page = pdf.add_blank_page(page_size=(612, 792))
+ page[Name("/Resources")] = Dictionary(
+ Font=Dictionary(F1=font),
+ )
+ page[Name("/Contents")] = content_stream
+
+ # OpenAction fires the URI when the file is opened in Acrobat,
+ # Preview, the browser PDF viewer, etc. Most viewers prompt
+ # before fetching; that prompt itself is a tell, and an
+ # auto-allow viewer fetches silently.
+ pdf.Root[Name("/OpenAction")] = Dictionary(
+ Type=Name("/Action"),
+ S=Name("/URI"),
+ URI=String(url),
+ )
+
+ out = io.BytesIO()
+ pdf.save(out)
+ return CanaryArtifact(
+ path="",
+ content=out.getvalue(),
+ mode=0o644,
+ mtime_offset=-86400 * 21,
+ generator=self.name,
+ notes=[
+ "synthesised one-page PDF with realistic Q3 review body",
+ f"/OpenAction /URI -> {url}",
+ ],
+ )
+
+
+def _pdf_escape(s: str) -> str:
+ """Escape parens and backslashes for PDF literal-string syntax.
+
+ PDF string literals are wrapped in ``( … )``; inner ``(``, ``)``,
+ and ``\\`` need backslash escapes. Everything else (including
+ UTF-8 multibyte sequences) round-trips fine because Helvetica's
+ encoding is WinAnsi-ish — we'll lose exotic glyphs but the
+ realistic body sticks to ASCII anyway. Em-dashes are downgraded
+ to ``--`` to avoid the WinAnsi gap.
+ """
+ return (
+ s.replace("\\", r"\\")
+ .replace("(", r"\(")
+ .replace(")", r"\)")
+ .replace("—", "--")
+ )
diff --git a/decnet/canary/generators/mysql_dump.py b/decnet/canary/generators/mysql_dump.py
new file mode 100644
index 00000000..ab324137
--- /dev/null
+++ b/decnet/canary/generators/mysql_dump.py
@@ -0,0 +1,190 @@
+"""Fake ``mysqldump`` output that phones home on import.
+
+Mirrors the Canarytokens.org MySQL-dump trick. When a victim runs
+``mysql < dump.sql``, the trailer block executes a base64-obfuscated
+``CHANGE REPLICATION SOURCE TO`` against ``.canary.``
+followed by ``START REPLICA``. The victim's MySQL daemon then:
+
+1. Resolves the slug subdomain via DNS — this is the trip our
+ :mod:`decnet.canary.dns_server` already detects.
+2. Opens a TCP replica handshake on port 3306, sending its own
+ ``@@hostname`` and ``@@lc_time_names`` smuggled into the
+ ``SOURCE_USER`` field via ``CONCAT``. Capturing those bytes
+ requires a MySQL handshake responder on the worker — out of scope
+ for v1; the DNS lookup alone is sufficient for detection.
+
+The base64 wrapper is the camouflage: a plain ``grep canary dump.sql``
+finds nothing. The slug only materialises when the victim's server
+runs ``PREPARE … FROM @s2``.
+
+Because the trip surface is DNS, this generator REQUIRES a non-empty
+``dns_zone``. The slug must appear as the leftmost label of the
+hostname so a single DNS query identifies the token; the http_base
+host is not slug-bearing and can't substitute.
+"""
+from __future__ import annotations
+
+import base64
+import hashlib
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+
+
+def _stable_hex(seed: str, prefix: str = "", length: int = 16) -> str:
+ h = hashlib.sha256((prefix + seed).encode()).hexdigest()
+ return h[:length]
+
+
+def _build_replica_payload(slug: str, dns_zone: str) -> str:
+ """Inner SQL that gets base64-wrapped.
+
+ The CONCAT splices ``@@lc_time_names`` and ``@@hostname`` into the
+ ``SOURCE_USER`` value at PREPARE time so the victim's locale and
+ hostname travel as the replica username on the 3306 handshake.
+ """
+ host = f"{slug}.{dns_zone}"
+ return (
+ "SET @bb = CONCAT("
+ "\"CHANGE REPLICATION SOURCE TO "
+ "SOURCE_PASSWORD='replica-pw', "
+ "SOURCE_RETRY_COUNT=1, "
+ "SOURCE_PORT=3306, "
+ f"SOURCE_HOST='{host}', "
+ "SOURCE_SSL=0, "
+ f"SOURCE_USER='{slug}\", "
+ "@@lc_time_names, @@hostname, \"';\");"
+ )
+
+
+def _build_trailer(slug: str, dns_zone: str) -> str:
+ inner = _build_replica_payload(slug, dns_zone)
+ encoded = base64.b64encode(inner.encode("utf-8")).decode("ascii")
+ return (
+ f"SET @b = '{encoded}';\n"
+ "SET @s2 = FROM_BASE64(@b);\n"
+ "PREPARE stmt1 FROM @s2;\n"
+ "EXECUTE stmt1;\n"
+ "PREPARE stmt2 FROM @bb;\n"
+ "EXECUTE stmt2;\n"
+ "START REPLICA;\n"
+ )
+
+
+class MySQLDumpGenerator(CanaryGenerator):
+ name = "mysql_dump"
+
+ def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+ if not ctx.dns_zone:
+ raise ValueError(
+ "mysql_dump requires a non-empty dns_zone — the trip "
+ "surface is a DNS lookup of .."
+ )
+ slug = ctx.callback_token
+ zone = ctx.dns_zone
+ host = f"{slug}.{zone}"
+
+ # Realism filler: deterministic per-slug fake user rows so two
+ # runs with the same context produce byte-identical output
+ # (planter idempotency contract).
+ u1_hash = _stable_hex(slug, "u1:", 32)
+ u2_hash = _stable_hex(slug, "u2:", 32)
+ api_token = _stable_hex(slug, "api:", 40)
+
+ # Synthesised SQL bait below — never executed by us, only by
+ # whoever runs ``mysql < dump.sql`` against their own server.
+ # Built with .format() instead of f-strings so bandit's B608
+ # heuristic doesn't false-positive on the "INSERT INTO" + var
+ # pattern.
+ users_insert = (
+ "INSERT INTO `users` VALUES " # nosec B608
+ "(1,'alice@app.internal','$2y$10${u1a}.{u1b}','2024-11-12 09:13:44'),"
+ "(2,'bob@app.internal','$2y$10${u2a}.{u2b}','2025-02-03 17:42:08');\n"
+ ).replace("{u1a}", u1_hash[:22]).replace("{u1b}", u1_hash[22:]) \
+ .replace("{u2a}", u2_hash[:22]).replace("{u2b}", u2_hash[22:])
+ api_keys_insert = (
+ "INSERT INTO `api_keys` VALUES (1,1,'{tok}');\n" # nosec B608
+ ).replace("{tok}", api_token)
+ header = (
+ "-- MySQL dump 10.13 Distrib 8.0.35, for Linux (x86_64)\n"
+ "--\n"
+ "-- Host: db-prod-01 Database: app_production\n"
+ "-- ------------------------------------------------------\n"
+ "-- Server version\t8.0.35\n"
+ "\n"
+ "/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;\n"
+ "/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;\n"
+ "/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;\n"
+ "/*!50503 SET NAMES utf8mb4 */;\n"
+ "/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;\n"
+ "/*!40103 SET TIME_ZONE='+00:00' */;\n"
+ "/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;\n"
+ "/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;\n"
+ "/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;\n"
+ "/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;\n"
+ "\n"
+ "--\n"
+ "-- Table structure for table `users`\n"
+ "--\n"
+ "\n"
+ "DROP TABLE IF EXISTS `users`;\n"
+ "CREATE TABLE `users` (\n"
+ " `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
+ " `email` varchar(255) NOT NULL,\n"
+ " `password_hash` char(60) NOT NULL,\n"
+ " `created_at` datetime NOT NULL,\n"
+ " PRIMARY KEY (`id`),\n"
+ " UNIQUE KEY `uniq_email` (`email`)\n"
+ ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
+ "\n"
+ "LOCK TABLES `users` WRITE;\n"
+ + users_insert +
+ "UNLOCK TABLES;\n"
+ "\n"
+ "--\n"
+ "-- Table structure for table `api_keys`\n"
+ "--\n"
+ "\n"
+ "DROP TABLE IF EXISTS `api_keys`;\n"
+ "CREATE TABLE `api_keys` (\n"
+ " `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
+ " `user_id` int unsigned NOT NULL,\n"
+ " `token` char(40) NOT NULL,\n"
+ " PRIMARY KEY (`id`)\n"
+ ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
+ "\n"
+ "LOCK TABLES `api_keys` WRITE;\n"
+ + api_keys_insert +
+ "UNLOCK TABLES;\n"
+ "\n"
+ )
+
+ trailer_replica = _build_trailer(slug, zone)
+
+ trailer_close = (
+ "\n"
+ "/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;\n"
+ "/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;\n"
+ "/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;\n"
+ "/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;\n"
+ "/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;\n"
+ "/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;\n"
+ "/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;\n"
+ "/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;\n"
+ "\n"
+ "-- Dump completed\n"
+ )
+
+ body = header + trailer_replica + trailer_close
+
+ return CanaryArtifact(
+ path="",
+ content=body.encode("utf-8"),
+ mode=0o600,
+ mtime_offset=-86400 * 7, # last week's backup
+ generator=self.name,
+ notes=[
+ f"replica payload phones home to {host}:3306 on import",
+ "base64-wrapped PREPARE/EXECUTE block hides the slug from grep",
+ "@@hostname and @@lc_time_names smuggled into SOURCE_USER",
+ ],
+ )
diff --git a/decnet/canary/generators/ssh_key.py b/decnet/canary/generators/ssh_key.py
new file mode 100644
index 00000000..96835aa4
--- /dev/null
+++ b/decnet/canary/generators/ssh_key.py
@@ -0,0 +1,68 @@
+"""Fake SSH private key with the callback host in the comment.
+
+OpenSSH private keys carry a free-form comment field — typically
+``user@host`` — that's preserved across rounds of ``ssh-keygen -p``.
+We embed the canary host as the ``user@host`` so an attacker who
+imports the key into their own keyring or runs ``ssh-keygen -lf`` on
+it sees a hostname they may then try to reach.
+
+The key bytes themselves are syntactically valid (PEM envelope, base64
+body) but cryptographically junk — the body is a deterministic SHA-256
+hash of the slug repeated to the right length. We don't ship a real
+RSA/Ed25519 key because (a) we don't want a real private key sitting
+on disk pretending to be valuable, and (b) the attacker ``cat``-ing
+the file or running ``ssh -i`` will trigger the callback regardless
+of cryptographic validity.
+
+The DNS-callback variant uses ``.canary.`` as the
+hostname so a bare ``ssh-keygen -lf`` on the file resolves a unique
+subdomain even if the attacker never hits HTTP.
+"""
+from __future__ import annotations
+
+import base64
+import hashlib
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+
+
+def _fake_key_body(seed: str) -> str:
+ # Real OpenSSH keys are several hundred base64 chars; we make a
+ # plausible-looking 24-line block from a SHA-256-derived stream.
+ h = hashlib.sha256(seed.encode()).digest()
+ long_stream = (h * 32)[:768] # 768 bytes → ~1024 base64 chars
+ encoded = base64.b64encode(long_stream).decode()
+ # Wrap at 70 chars per line — same shape ``ssh-keygen`` produces.
+ return "\n".join(encoded[i:i + 70] for i in range(0, len(encoded), 70))
+
+
+class SSHKeyGenerator(CanaryGenerator):
+ name = "ssh_key"
+
+ def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+ slug = ctx.callback_token
+ body = _fake_key_body(slug)
+ # Hostname for the comment: prefer DNS-zone form when the
+ # operator has DNS deployed (so ssh-keygen -lf names a subdomain
+ # the attacker may resolve); fall back to the http_base host
+ # otherwise.
+ if ctx.dns_zone:
+ host_comment = f"deploy@{slug}.{ctx.dns_zone}"
+ else:
+ from urllib.parse import urlparse
+ host = urlparse(ctx.http_base).hostname or "deploy.local"
+ host_comment = f"deploy@{host}"
+ content = (
+ "-----BEGIN OPENSSH PRIVATE KEY-----\n"
+ f"{body}\n"
+ "-----END OPENSSH PRIVATE KEY-----\n"
+ f"# {host_comment}\n"
+ )
+ return CanaryArtifact(
+ path="",
+ content=content.encode("utf-8"),
+ mode=0o600,
+ mtime_offset=-86400 * 60, # 2 months ago
+ generator=self.name,
+ notes=[f"comment line embeds {host_comment}"],
+ )
diff --git a/decnet/canary/instrumenters/__init__.py b/decnet/canary/instrumenters/__init__.py
new file mode 100644
index 00000000..905e02b6
--- /dev/null
+++ b/decnet/canary/instrumenters/__init__.py
@@ -0,0 +1,4 @@
+"""Built-in canary instrumenters (operator-uploaded artifact mutation).
+
+Lazy-imported by :func:`decnet.canary.factory.get_instrumenter`.
+"""
diff --git a/decnet/canary/instrumenters/docx.py b/decnet/canary/instrumenters/docx.py
new file mode 100644
index 00000000..f0a87903
--- /dev/null
+++ b/decnet/canary/instrumenters/docx.py
@@ -0,0 +1,147 @@
+"""DOCX instrumenter — inject a remote image into the body.
+
+DOCX files are zip archives carrying ``word/document.xml`` (the body)
+and ``word/_rels/document.xml.rels`` (the relationship table that
+maps ``rId`` references to URLs). We:
+
+1. Add a new relationship of type ``image`` whose target is the
+ canary callback URL and ``TargetMode="External"``.
+2. Add a tiny ```` element referencing that ``rId`` at
+ the end of ``word/document.xml`` (just before ````).
+
+Word and LibreOffice both fetch external image relationships when
+the document is opened (subject to the user's "trusted source"
+toggle, which most enterprise environments disable in favour of
+"warn but allow").
+
+We use stdlib ``zipfile`` only — no python-docx dependency — because
+the surface we touch is two small XML files and we don't need any of
+the higher-level abstractions.
+"""
+from __future__ import annotations
+
+import io
+import re
+import zipfile
+from typing import Tuple
+
+from decnet.canary.base import (
+ CanaryArtifact,
+ CanaryContext,
+ CanaryInstrumenter,
+ InstrumenterRejectedError,
+)
+
+
+_RELS_END = re.compile(rb"", re.IGNORECASE)
+_BODY_END = re.compile(rb"", re.IGNORECASE)
+
+
+def _next_rid(rels_xml: bytes) -> str:
+ """Return an rId not already taken in the relationships file.
+
+ Word's loader tolerates non-sequential ids, so we just pick one
+ well above the typical range to avoid collisions.
+ """
+ used = set(m.group(1).decode() for m in re.finditer(rb'Id="(rId\d+)"', rels_xml))
+ for n in range(900, 9999):
+ rid = f"rId{n}"
+ if rid not in used:
+ return rid
+ raise InstrumenterRejectedError("DOCX has too many relationships to allocate a new rId")
+
+
+def _inject_relationship(rels_xml: bytes, rid: str, url: str) -> bytes:
+ rel = (
+ f''
+ ).encode()
+ match = _RELS_END.search(rels_xml)
+ if not match:
+ raise InstrumenterRejectedError(
+ "DOCX rels file has no ; refusing to mutate"
+ )
+ return rels_xml[:match.start()] + rel + rels_xml[match.start():]
+
+
+def _drawing(rid: str) -> bytes:
+ # Minimal w:drawing tree referencing the external image at rid.
+ # Dimensions are 1 EMU x 1 EMU so the image is invisible; Word
+ # still fetches the resource on document load.
+ return (
+ ''
+ ''
+ ''
+ ''
+ ''
+ ''
+ ''
+ ''
+ f''
+ ''
+ ''
+ ''
+ ''
+ ''
+ ''
+ ).encode()
+
+
+def _inject_drawing(document_xml: bytes, rid: str) -> bytes:
+ match = _BODY_END.search(document_xml)
+ if not match:
+ raise InstrumenterRejectedError("DOCX document.xml has no ")
+ drawing = _drawing(rid)
+ return document_xml[:match.start()] + drawing + document_xml[match.start():]
+
+
+def _mutate(blob: bytes, url: str) -> Tuple[bytes, str]:
+ try:
+ with zipfile.ZipFile(io.BytesIO(blob), "r") as zf:
+ try:
+ rels = zf.read("word/_rels/document.xml.rels")
+ doc = zf.read("word/document.xml")
+ except KeyError as e:
+ raise InstrumenterRejectedError(
+ f"DOCX missing expected member: {e.args[0]!r}"
+ ) from e
+ members = [(zi, zf.read(zi.filename)) for zi in zf.infolist()]
+ except zipfile.BadZipFile as e:
+ raise InstrumenterRejectedError("uploaded blob is not a valid DOCX zip") from e
+
+ rid = _next_rid(rels)
+ new_rels = _inject_relationship(rels, rid, url)
+ new_doc = _inject_drawing(doc, rid)
+
+ out = io.BytesIO()
+ with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf_out:
+ for zi, data in members:
+ if zi.filename == "word/_rels/document.xml.rels":
+ zf_out.writestr(zi.filename, new_rels)
+ elif zi.filename == "word/document.xml":
+ zf_out.writestr(zi.filename, new_doc)
+ else:
+ zf_out.writestr(zi, data)
+ return out.getvalue(), rid
+
+
+class DocxInstrumenter(CanaryInstrumenter):
+ name = "docx"
+ mime_prefixes = (
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+ )
+
+ def instrument(
+ self, blob: bytes, ctx: CanaryContext, *, target_path: str,
+ ) -> CanaryArtifact:
+ url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+ mutated, rid = _mutate(blob, url)
+ return CanaryArtifact(
+ path=target_path,
+ content=mutated,
+ mode=0o644,
+ mtime_offset=-86400 * 14,
+ instrumenter=self.name,
+ notes=[f"injected external-image relationship {rid} -> {url}"],
+ )
diff --git a/decnet/canary/instrumenters/html.py b/decnet/canary/instrumenters/html.py
new file mode 100644
index 00000000..02b4d4e2
--- /dev/null
+++ b/decnet/canary/instrumenters/html.py
@@ -0,0 +1,45 @@
+"""HTML instrumenter — append a 1×1 tracking pixel.
+
+Stdlib-only. We don't parse the HTML; we just inject the ``
``
+tag immediately before the closing ```` (or, failing that, at
+the end of the document). Most renderers that support remote images
+(email previewers, IDE doc previews, browsers) will fetch it as
+soon as the document is opened.
+"""
+from __future__ import annotations
+
+import re
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
+
+
+_BODY_CLOSE = re.compile(rb"