feat(canary): honeydoc_docx + honeydoc_pdf generators

honeydoc previously emitted HTML only — operators picking 'Document'
out of the dropdown got a .html file dropped at /Documents/
quarterly_report.docx, which any attacker would clock the moment they
ran 'file' on it.

Two new generators that emit the real artifact format:

- honeydoc_docx: stdlib zipfile only. Builds a minimal but valid
  Office Open XML zip with the same Q3 review body as the HTML
  flavor and an external-image relationship pointing at the
  callback URL — same trick the operator-upload DOCX instrumenter
  uses, fetched on document open by Word and LibreOffice. Reuses
  _drawing() and _next_rid() from instrumenters/docx.py to keep
  the body/relationships shape identical between synthesised and
  instrumented files.

- honeydoc_pdf: pikepdf-backed. One-page PDF in the 14 base fonts
  (Helvetica, no font embedding), realistic body, /OpenAction /URI
  on the catalog so most viewers fire the callback on document
  open. Falls back to a clear error if pikepdf is missing so the
  operator can switch to honeydoc / honeydoc_docx.

Default placement paths now reflect each generator's true extension
(.html / .docx / .pdf) so the UI suggests something sensible. Both
generators surfaced in the New Token modal's generator dropdown.
This commit is contained in:
2026-04-27 13:44:20 -04:00
parent c17b9e01c8
commit 5ac8e0f91a
8 changed files with 312 additions and 5 deletions

View File

@@ -59,7 +59,8 @@ def test_known_lists_are_stable() -> None:
# If anyone adds/removes from the dispatch tables, the test
# surfaces it. Keeps the schema-of-record in one place.
assert KNOWN_GENERATORS == (
"git_config", "env_file", "ssh_key", "aws_creds", "honeydoc",
"git_config", "env_file", "ssh_key", "aws_creds",
"honeydoc", "honeydoc_docx", "honeydoc_pdf",
)
assert KNOWN_INSTRUMENTERS == (
"docx", "xlsx", "pdf", "html", "image", "plain", "passthrough",

View File

@@ -90,6 +90,37 @@ def test_honeydoc_html_is_valid_ish_html() -> None:
assert "width=\"1\" height=\"1\"" in body
def test_honeydoc_docx_produces_valid_zip_with_callback() -> None:
import io
import zipfile
g = get_generator("honeydoc_docx")
art = g.generate(_ctx(callback_token="slugDX"))
assert art.content[:4] == b"PK\x03\x04" # zip magic
with zipfile.ZipFile(io.BytesIO(art.content), "r") as zf:
names = set(zf.namelist())
assert {"[Content_Types].xml", "_rels/.rels", "word/document.xml",
"word/_rels/document.xml.rels"} <= names
rels = zf.read("word/_rels/document.xml.rels").decode()
assert "https://canary.example.test/c/slugDX" in rels
assert "TargetMode=\"External\"" in rels
doc = zf.read("word/document.xml").decode()
assert "Q3 Operations Review" in doc
assert "<w:drawing>" in doc
def test_honeydoc_pdf_produces_valid_pdf_with_openaction() -> None:
pikepdf = pytest.importorskip("pikepdf")
g = get_generator("honeydoc_pdf")
art = g.generate(_ctx(callback_token="slugPDF"))
assert art.content[:5] == b"%PDF-"
# Re-open and confirm OpenAction URI round-trips.
import io
with pikepdf.open(io.BytesIO(art.content)) as pdf:
action = pdf.Root["/OpenAction"]
assert str(action["/S"]) == "/URI"
assert str(action["/URI"]) == "https://canary.example.test/c/slugPDF"
def test_git_config_remote_url_shape() -> None:
g = get_generator("git_config")
art = g.generate(_ctx(callback_token="slug42"))

View File

@@ -28,7 +28,9 @@ def test_default_user_dispatch() -> None:
("env_file", "windows", "/home/Administrator/Desktop/prod.env"),
("git_config", "linux", "/home/admin/.git/config"),
("ssh_key", "linux", "/home/admin/.ssh/id_rsa"),
("honeydoc", "linux", "/home/admin/Documents/quarterly_report.docx"),
("honeydoc", "linux", "/home/admin/Documents/quarterly_report.html"),
("honeydoc_docx", "linux", "/home/admin/Documents/quarterly_report.docx"),
("honeydoc_pdf", "linux", "/home/admin/Documents/quarterly_report.pdf"),
],
)
def test_default_path_for_known_generators(