diff --git a/decnet/canary/factory.py b/decnet/canary/factory.py
index bc641eb0..876906e0 100644
--- a/decnet/canary/factory.py
+++ b/decnet/canary/factory.py
@@ -18,6 +18,8 @@ KNOWN_GENERATORS: Tuple[str, ...] = (
"ssh_key",
"aws_creds",
"honeydoc",
+ "honeydoc_docx",
+ "honeydoc_pdf",
)
KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
@@ -52,6 +54,12 @@ def get_generator(name: str) -> CanaryGenerator:
if name == "honeydoc":
from decnet.canary.generators.honeydoc import HoneydocGenerator
return HoneydocGenerator()
+ if name == "honeydoc_docx":
+ from decnet.canary.generators.honeydoc_docx import HoneydocDocxGenerator
+ return HoneydocDocxGenerator()
+ if name == "honeydoc_pdf":
+ from decnet.canary.generators.honeydoc_pdf import HoneydocPdfGenerator
+ return HoneydocPdfGenerator()
raise ValueError(
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
)
diff --git a/decnet/canary/generators/honeydoc_docx.py b/decnet/canary/generators/honeydoc_docx.py
new file mode 100644
index 00000000..35456a23
--- /dev/null
+++ b/decnet/canary/generators/honeydoc_docx.py
@@ -0,0 +1,133 @@
+"""Real-DOCX honeydoc generator.
+
+Synthesises a minimal but structurally valid DOCX from scratch via
+stdlib :mod:`zipfile`, then uses the same external-image relationship
+trick that powers :mod:`decnet.canary.instrumenters.docx` to embed
+the callback URL. No python-docx dependency.
+
+The output opens cleanly in Word / LibreOffice; both fetch the
+external image relationship on document load.
+"""
+from __future__ import annotations
+
+import io
+import zipfile
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+from decnet.canary.instrumenters.docx import _drawing, _next_rid
+
+
+_CONTENT_TYPES = (
+ ''
+ ''
+ ''
+ ''
+ ''
+ ''
+).encode()
+
+_PACKAGE_RELS = (
+ ''
+ ''
+ ''
+ ''
+).encode()
+
+_BODY_PARAGRAPHS = (
+ "Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)",
+ "",
+ "Forecast and remediation timeline below. Numbers are preliminary "
+ "and subject to revision before the all-hands.",
+ "",
+ "Region Incidents MTTR (h)",
+ "us-east 14 3.2",
+ "us-west 9 4.7",
+ "eu-central 22 2.1",
+ "",
+ "Internal contact: secops@internal",
+)
+
+
+def _document_xml(rid_with_drawing: str | None = None) -> bytes:
+ """Build the body XML.
+
+ ``rid_with_drawing`` is the rId of the external image relationship;
+ when set, we append the same ```` element that the DOCX
+ instrumenter inserts so the body references the external resource.
+ """
+ paragraphs = []
+ for line in _BODY_PARAGRAPHS:
+ if line:
+ paragraphs.append(
+ ""
+ + _xml_escape(line)
+ + ""
+ )
+ else:
+ paragraphs.append("")
+ body = "".join(paragraphs)
+ drawing = _drawing(rid_with_drawing).decode() if rid_with_drawing else ""
+ return (
+ ''
+ ''
+ f'{body}{drawing}'
+ ''
+ ).encode()
+
+
+def _xml_escape(s: str) -> str:
+ return (
+ s.replace("&", "&")
+ .replace("<", "<")
+ .replace(">", ">")
+ )
+
+
+def _document_rels(rid: str, url: str) -> bytes:
+ return (
+ ''
+ ''
+ f''
+ ''
+ ).encode()
+
+
+class HoneydocDocxGenerator(CanaryGenerator):
+ name = "honeydoc_docx"
+
+ def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+ url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+ # Pick a stable rId — there's only one relationship in the
+ # synthesised file, so any unused id works. Reuse the
+ # instrumenter's allocator against the bare relationships
+ # skeleton for parity with operator-uploaded DOCX flow.
+ skeleton = (
+ b''
+ b''
+ b''
+ )
+ rid = _next_rid(skeleton)
+
+ out = io.BytesIO()
+ with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
+ zf.writestr("[Content_Types].xml", _CONTENT_TYPES)
+ zf.writestr("_rels/.rels", _PACKAGE_RELS)
+ zf.writestr("word/document.xml", _document_xml(rid))
+ zf.writestr("word/_rels/document.xml.rels", _document_rels(rid, url))
+
+ return CanaryArtifact(
+ path="",
+ content=out.getvalue(),
+ mode=0o644,
+ mtime_offset=-86400 * 21,
+ generator=self.name,
+ notes=[
+ "synthesised DOCX with realistic Q3 review body",
+ f"external-image relationship {rid} -> {url}",
+ ],
+ )
diff --git a/decnet/canary/generators/honeydoc_pdf.py b/decnet/canary/generators/honeydoc_pdf.py
new file mode 100644
index 00000000..400271ff
--- /dev/null
+++ b/decnet/canary/generators/honeydoc_pdf.py
@@ -0,0 +1,127 @@
+"""Real-PDF honeydoc generator (uses :mod:`pikepdf`).
+
+Builds a one-page PDF with the same Q3-review body as the HTML/DOCX
+flavors and installs an ``/OpenAction`` ``/URI`` action on the
+catalog so most viewers fire the callback the moment the document
+opens.
+
+Pikepdf is now a hard dependency for this generator (the operator
+installed it explicitly so we can use it). We still surface a
+clear :class:`InstrumenterRejectedError` when imports fail, so a
+deployment without pikepdf can fall back to the DOCX or HTML
+generators rather than crashing the API.
+"""
+from __future__ import annotations
+
+import io
+
+from decnet.canary.base import (
+ CanaryArtifact,
+ CanaryContext,
+ CanaryGenerator,
+ InstrumenterRejectedError,
+)
+
+
+_BODY_LINES = (
+ ("Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)", 14),
+ ("", 12),
+ ("Forecast and remediation timeline below.", 11),
+ ("Numbers are preliminary, subject to revision.", 11),
+ ("", 12),
+ ("Region Incidents MTTR (h)", 11),
+ ("us-east 14 3.2", 11),
+ ("us-west 9 4.7", 11),
+ ("eu-central 22 2.1", 11),
+ ("", 12),
+ ("Internal contact: secops@internal", 11),
+)
+
+
+class HoneydocPdfGenerator(CanaryGenerator):
+ name = "honeydoc_pdf"
+
+ def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+ try:
+ from pikepdf import Pdf, Name, Dictionary, String # type: ignore[import-not-found]
+ except ImportError as e:
+ raise InstrumenterRejectedError(
+ "honeydoc_pdf requires pikepdf; install it (`pip install "
+ "pikepdf`) or pick honeydoc / honeydoc_docx instead."
+ ) from e
+
+ url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+
+ pdf = Pdf.new()
+ # Helvetica is one of the 14 PDF base fonts — every viewer ships
+ # it, so no font embedding is required.
+ font = pdf.make_indirect(Dictionary(
+ Type=Name("/Font"),
+ Subtype=Name("/Type1"),
+ BaseFont=Name("/Helvetica"),
+ ))
+
+ # Build a single content stream that writes each body line at a
+ # decreasing y-coordinate. PDF coordinates start at the bottom-
+ # left (US Letter = 612 x 792 points); we lay out lines roughly
+ # 18 points apart starting near the top.
+ ops: list[str] = ["BT /F1 12 Tf 72 750 Td"]
+ first = True
+ for line, size in _BODY_LINES:
+ if not first:
+ ops.append("0 -18 Td")
+ first = False
+ ops.append(f"/F1 {size} Tf")
+ ops.append(f"({_pdf_escape(line)}) Tj")
+ ops.append("ET")
+ content_bytes = "\n".join(ops).encode("latin-1")
+
+ content_stream = pdf.make_stream(content_bytes)
+
+ page = pdf.add_blank_page(page_size=(612, 792))
+ page[Name("/Resources")] = Dictionary(
+ Font=Dictionary(F1=font),
+ )
+ page[Name("/Contents")] = content_stream
+
+ # OpenAction fires the URI when the file is opened in Acrobat,
+ # Preview, the browser PDF viewer, etc. Most viewers prompt
+ # before fetching; that prompt itself is a tell, and an
+ # auto-allow viewer fetches silently.
+ pdf.Root[Name("/OpenAction")] = Dictionary(
+ Type=Name("/Action"),
+ S=Name("/URI"),
+ URI=String(url),
+ )
+
+ out = io.BytesIO()
+ pdf.save(out)
+ return CanaryArtifact(
+ path="",
+ content=out.getvalue(),
+ mode=0o644,
+ mtime_offset=-86400 * 21,
+ generator=self.name,
+ notes=[
+ "synthesised one-page PDF with realistic Q3 review body",
+ f"/OpenAction /URI -> {url}",
+ ],
+ )
+
+
+def _pdf_escape(s: str) -> str:
+ """Escape parens and backslashes for PDF literal-string syntax.
+
+ PDF string literals are wrapped in ``( … )``; inner ``(``, ``)``,
+ and ``\\`` need backslash escapes. Everything else (including
+ UTF-8 multibyte sequences) round-trips fine because Helvetica's
+ encoding is WinAnsi-ish — we'll lose exotic glyphs but the
+ realistic body sticks to ASCII anyway. Em-dashes are downgraded
+ to ``--`` to avoid the WinAnsi gap.
+ """
+ return (
+ s.replace("\\", r"\\")
+ .replace("(", r"\(")
+ .replace(")", r"\)")
+ .replace("—", "--")
+ )
diff --git a/decnet/canary/paths.py b/decnet/canary/paths.py
index 35c84c50..5700ad0f 100644
--- a/decnet/canary/paths.py
+++ b/decnet/canary/paths.py
@@ -25,7 +25,9 @@ _LINUX_DEFAULTS: dict[str, str] = {
"env_file": "/home/{user}/.env",
"ssh_key": "/home/{user}/.ssh/id_rsa",
"aws_creds": "/home/{user}/.aws/credentials",
- "honeydoc": "/home/{user}/Documents/quarterly_report.docx",
+ "honeydoc": "/home/{user}/Documents/quarterly_report.html",
+ "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
+ "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
}
_WINDOWS_DEFAULTS: dict[str, str] = {
@@ -33,7 +35,9 @@ _WINDOWS_DEFAULTS: dict[str, str] = {
"env_file": "/home/{user}/Desktop/prod.env",
"ssh_key": "/home/{user}/.ssh/id_rsa", # OpenSSH on Windows uses the same path
"aws_creds": "/home/{user}/.aws/credentials",
- "honeydoc": "/home/{user}/Documents/quarterly_report.docx",
+ "honeydoc": "/home/{user}/Documents/quarterly_report.html",
+ "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
+ "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
}
diff --git a/decnet_web/src/components/CanaryTokens.tsx b/decnet_web/src/components/CanaryTokens.tsx
index fe06b5d2..83f3e5bc 100644
--- a/decnet_web/src/components/CanaryTokens.tsx
+++ b/decnet_web/src/components/CanaryTokens.tsx
@@ -20,7 +20,8 @@ interface BlobRow {
}
const KNOWN_GENERATORS = [
- 'git_config', 'env_file', 'ssh_key', 'aws_creds', 'honeydoc',
+ 'git_config', 'env_file', 'ssh_key', 'aws_creds',
+ 'honeydoc', 'honeydoc_docx', 'honeydoc_pdf',
] as const;
type GeneratorName = typeof KNOWN_GENERATORS[number];
diff --git a/tests/canary/test_factory.py b/tests/canary/test_factory.py
index ecb85985..e7db4390 100644
--- a/tests/canary/test_factory.py
+++ b/tests/canary/test_factory.py
@@ -59,7 +59,8 @@ def test_known_lists_are_stable() -> None:
# If anyone adds/removes from the dispatch tables, the test
# surfaces it. Keeps the schema-of-record in one place.
assert KNOWN_GENERATORS == (
- "git_config", "env_file", "ssh_key", "aws_creds", "honeydoc",
+ "git_config", "env_file", "ssh_key", "aws_creds",
+ "honeydoc", "honeydoc_docx", "honeydoc_pdf",
)
assert KNOWN_INSTRUMENTERS == (
"docx", "xlsx", "pdf", "html", "image", "plain", "passthrough",
diff --git a/tests/canary/test_generators.py b/tests/canary/test_generators.py
index e80566a5..0127b3a4 100644
--- a/tests/canary/test_generators.py
+++ b/tests/canary/test_generators.py
@@ -90,6 +90,37 @@ def test_honeydoc_html_is_valid_ish_html() -> None:
assert "width=\"1\" height=\"1\"" in body
+def test_honeydoc_docx_produces_valid_zip_with_callback() -> None:
+ import io
+ import zipfile
+ g = get_generator("honeydoc_docx")
+ art = g.generate(_ctx(callback_token="slugDX"))
+ assert art.content[:4] == b"PK\x03\x04" # zip magic
+ with zipfile.ZipFile(io.BytesIO(art.content), "r") as zf:
+ names = set(zf.namelist())
+ assert {"[Content_Types].xml", "_rels/.rels", "word/document.xml",
+ "word/_rels/document.xml.rels"} <= names
+ rels = zf.read("word/_rels/document.xml.rels").decode()
+ assert "https://canary.example.test/c/slugDX" in rels
+ assert "TargetMode=\"External\"" in rels
+ doc = zf.read("word/document.xml").decode()
+ assert "Q3 Operations Review" in doc
+ assert "" in doc
+
+
+def test_honeydoc_pdf_produces_valid_pdf_with_openaction() -> None:
+ pikepdf = pytest.importorskip("pikepdf")
+ g = get_generator("honeydoc_pdf")
+ art = g.generate(_ctx(callback_token="slugPDF"))
+ assert art.content[:5] == b"%PDF-"
+ # Re-open and confirm OpenAction URI round-trips.
+ import io
+ with pikepdf.open(io.BytesIO(art.content)) as pdf:
+ action = pdf.Root["/OpenAction"]
+ assert str(action["/S"]) == "/URI"
+ assert str(action["/URI"]) == "https://canary.example.test/c/slugPDF"
+
+
def test_git_config_remote_url_shape() -> None:
g = get_generator("git_config")
art = g.generate(_ctx(callback_token="slug42"))
diff --git a/tests/canary/test_paths.py b/tests/canary/test_paths.py
index c633d4f5..65232fe2 100644
--- a/tests/canary/test_paths.py
+++ b/tests/canary/test_paths.py
@@ -28,7 +28,9 @@ def test_default_user_dispatch() -> None:
("env_file", "windows", "/home/Administrator/Desktop/prod.env"),
("git_config", "linux", "/home/admin/.git/config"),
("ssh_key", "linux", "/home/admin/.ssh/id_rsa"),
- ("honeydoc", "linux", "/home/admin/Documents/quarterly_report.docx"),
+ ("honeydoc", "linux", "/home/admin/Documents/quarterly_report.html"),
+ ("honeydoc_docx", "linux", "/home/admin/Documents/quarterly_report.docx"),
+ ("honeydoc_pdf", "linux", "/home/admin/Documents/quarterly_report.pdf"),
],
)
def test_default_path_for_known_generators(