feat(canary): honeydoc_docx + honeydoc_pdf generators
honeydoc previously emitted HTML only — operators picking 'Document' out of the dropdown got a .html file dropped at /Documents/ quarterly_report.docx, which any attacker would clock the moment they ran 'file' on it. Two new generators that emit the real artifact format: - honeydoc_docx: stdlib zipfile only. Builds a minimal but valid Office Open XML zip with the same Q3 review body as the HTML flavor and an external-image relationship pointing at the callback URL — same trick the operator-upload DOCX instrumenter uses, fetched on document open by Word and LibreOffice. Reuses _drawing() and _next_rid() from instrumenters/docx.py to keep the body/relationships shape identical between synthesised and instrumented files. - honeydoc_pdf: pikepdf-backed. One-page PDF in the 14 base fonts (Helvetica, no font embedding), realistic body, /OpenAction /URI on the catalog so most viewers fire the callback on document open. Falls back to a clear error if pikepdf is missing so the operator can switch to honeydoc / honeydoc_docx. Default placement paths now reflect each generator's true extension (.html / .docx / .pdf) so the UI suggests something sensible. Both generators surfaced in the New Token modal's generator dropdown.
This commit is contained in:
@@ -18,6 +18,8 @@ KNOWN_GENERATORS: Tuple[str, ...] = (
|
||||
"ssh_key",
|
||||
"aws_creds",
|
||||
"honeydoc",
|
||||
"honeydoc_docx",
|
||||
"honeydoc_pdf",
|
||||
)
|
||||
|
||||
KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
|
||||
@@ -52,6 +54,12 @@ def get_generator(name: str) -> CanaryGenerator:
|
||||
if name == "honeydoc":
|
||||
from decnet.canary.generators.honeydoc import HoneydocGenerator
|
||||
return HoneydocGenerator()
|
||||
if name == "honeydoc_docx":
|
||||
from decnet.canary.generators.honeydoc_docx import HoneydocDocxGenerator
|
||||
return HoneydocDocxGenerator()
|
||||
if name == "honeydoc_pdf":
|
||||
from decnet.canary.generators.honeydoc_pdf import HoneydocPdfGenerator
|
||||
return HoneydocPdfGenerator()
|
||||
raise ValueError(
|
||||
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
|
||||
)
|
||||
|
||||
133
decnet/canary/generators/honeydoc_docx.py
Normal file
133
decnet/canary/generators/honeydoc_docx.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""Real-DOCX honeydoc generator.
|
||||
|
||||
Synthesises a minimal but structurally valid DOCX from scratch via
|
||||
stdlib :mod:`zipfile`, then uses the same external-image relationship
|
||||
trick that powers :mod:`decnet.canary.instrumenters.docx` to embed
|
||||
the callback URL. No python-docx dependency.
|
||||
|
||||
The output opens cleanly in Word / LibreOffice; both fetch the
|
||||
external image relationship on document load.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import zipfile
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
from decnet.canary.instrumenters.docx import _drawing, _next_rid
|
||||
|
||||
|
||||
_CONTENT_TYPES = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
|
||||
'<Default Extension="xml" ContentType="application/xml"/>'
|
||||
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
|
||||
'<Override PartName="/word/document.xml" '
|
||||
'ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
|
||||
'</Types>'
|
||||
).encode()
|
||||
|
||||
_PACKAGE_RELS = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
'<Relationship Id="rId1" '
|
||||
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" '
|
||||
'Target="word/document.xml"/>'
|
||||
'</Relationships>'
|
||||
).encode()
|
||||
|
||||
_BODY_PARAGRAPHS = (
|
||||
"Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)",
|
||||
"",
|
||||
"Forecast and remediation timeline below. Numbers are preliminary "
|
||||
"and subject to revision before the all-hands.",
|
||||
"",
|
||||
"Region Incidents MTTR (h)",
|
||||
"us-east 14 3.2",
|
||||
"us-west 9 4.7",
|
||||
"eu-central 22 2.1",
|
||||
"",
|
||||
"Internal contact: secops@internal",
|
||||
)
|
||||
|
||||
|
||||
def _document_xml(rid_with_drawing: str | None = None) -> bytes:
|
||||
"""Build the body XML.
|
||||
|
||||
``rid_with_drawing`` is the rId of the external image relationship;
|
||||
when set, we append the same ``<w:drawing>`` element that the DOCX
|
||||
instrumenter inserts so the body references the external resource.
|
||||
"""
|
||||
paragraphs = []
|
||||
for line in _BODY_PARAGRAPHS:
|
||||
if line:
|
||||
paragraphs.append(
|
||||
"<w:p><w:r><w:t xml:space=\"preserve\">"
|
||||
+ _xml_escape(line)
|
||||
+ "</w:t></w:r></w:p>"
|
||||
)
|
||||
else:
|
||||
paragraphs.append("<w:p/>")
|
||||
body = "".join(paragraphs)
|
||||
drawing = _drawing(rid_with_drawing).decode() if rid_with_drawing else ""
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
|
||||
f'<w:body>{body}{drawing}</w:body>'
|
||||
'</w:document>'
|
||||
).encode()
|
||||
|
||||
|
||||
def _xml_escape(s: str) -> str:
|
||||
return (
|
||||
s.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
)
|
||||
|
||||
|
||||
def _document_rels(rid: str, url: str) -> bytes:
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
f'<Relationship Id="{rid}" '
|
||||
f'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" '
|
||||
f'Target="{url}" TargetMode="External"/>'
|
||||
'</Relationships>'
|
||||
).encode()
|
||||
|
||||
|
||||
class HoneydocDocxGenerator(CanaryGenerator):
|
||||
name = "honeydoc_docx"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
# Pick a stable rId — there's only one relationship in the
|
||||
# synthesised file, so any unused id works. Reuse the
|
||||
# instrumenter's allocator against the bare relationships
|
||||
# skeleton for parity with operator-uploaded DOCX flow.
|
||||
skeleton = (
|
||||
b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
b'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
b'</Relationships>'
|
||||
)
|
||||
rid = _next_rid(skeleton)
|
||||
|
||||
out = io.BytesIO()
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("[Content_Types].xml", _CONTENT_TYPES)
|
||||
zf.writestr("_rels/.rels", _PACKAGE_RELS)
|
||||
zf.writestr("word/document.xml", _document_xml(rid))
|
||||
zf.writestr("word/_rels/document.xml.rels", _document_rels(rid, url))
|
||||
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=out.getvalue(),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 21,
|
||||
generator=self.name,
|
||||
notes=[
|
||||
"synthesised DOCX with realistic Q3 review body",
|
||||
f"external-image relationship {rid} -> {url}",
|
||||
],
|
||||
)
|
||||
127
decnet/canary/generators/honeydoc_pdf.py
Normal file
127
decnet/canary/generators/honeydoc_pdf.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""Real-PDF honeydoc generator (uses :mod:`pikepdf`).
|
||||
|
||||
Builds a one-page PDF with the same Q3-review body as the HTML/DOCX
|
||||
flavors and installs an ``/OpenAction`` ``/URI`` action on the
|
||||
catalog so most viewers fire the callback the moment the document
|
||||
opens.
|
||||
|
||||
Pikepdf is now a hard dependency for this generator (the operator
|
||||
installed it explicitly so we can use it). We still surface a
|
||||
clear :class:`InstrumenterRejectedError` when imports fail, so a
|
||||
deployment without pikepdf can fall back to the DOCX or HTML
|
||||
generators rather than crashing the API.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryGenerator,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
|
||||
|
||||
_BODY_LINES = (
|
||||
("Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)", 14),
|
||||
("", 12),
|
||||
("Forecast and remediation timeline below.", 11),
|
||||
("Numbers are preliminary, subject to revision.", 11),
|
||||
("", 12),
|
||||
("Region Incidents MTTR (h)", 11),
|
||||
("us-east 14 3.2", 11),
|
||||
("us-west 9 4.7", 11),
|
||||
("eu-central 22 2.1", 11),
|
||||
("", 12),
|
||||
("Internal contact: secops@internal", 11),
|
||||
)
|
||||
|
||||
|
||||
class HoneydocPdfGenerator(CanaryGenerator):
|
||||
name = "honeydoc_pdf"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
try:
|
||||
from pikepdf import Pdf, Name, Dictionary, String # type: ignore[import-not-found]
|
||||
except ImportError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
"honeydoc_pdf requires pikepdf; install it (`pip install "
|
||||
"pikepdf`) or pick honeydoc / honeydoc_docx instead."
|
||||
) from e
|
||||
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
|
||||
pdf = Pdf.new()
|
||||
# Helvetica is one of the 14 PDF base fonts — every viewer ships
|
||||
# it, so no font embedding is required.
|
||||
font = pdf.make_indirect(Dictionary(
|
||||
Type=Name("/Font"),
|
||||
Subtype=Name("/Type1"),
|
||||
BaseFont=Name("/Helvetica"),
|
||||
))
|
||||
|
||||
# Build a single content stream that writes each body line at a
|
||||
# decreasing y-coordinate. PDF coordinates start at the bottom-
|
||||
# left (US Letter = 612 x 792 points); we lay out lines roughly
|
||||
# 18 points apart starting near the top.
|
||||
ops: list[str] = ["BT /F1 12 Tf 72 750 Td"]
|
||||
first = True
|
||||
for line, size in _BODY_LINES:
|
||||
if not first:
|
||||
ops.append("0 -18 Td")
|
||||
first = False
|
||||
ops.append(f"/F1 {size} Tf")
|
||||
ops.append(f"({_pdf_escape(line)}) Tj")
|
||||
ops.append("ET")
|
||||
content_bytes = "\n".join(ops).encode("latin-1")
|
||||
|
||||
content_stream = pdf.make_stream(content_bytes)
|
||||
|
||||
page = pdf.add_blank_page(page_size=(612, 792))
|
||||
page[Name("/Resources")] = Dictionary(
|
||||
Font=Dictionary(F1=font),
|
||||
)
|
||||
page[Name("/Contents")] = content_stream
|
||||
|
||||
# OpenAction fires the URI when the file is opened in Acrobat,
|
||||
# Preview, the browser PDF viewer, etc. Most viewers prompt
|
||||
# before fetching; that prompt itself is a tell, and an
|
||||
# auto-allow viewer fetches silently.
|
||||
pdf.Root[Name("/OpenAction")] = Dictionary(
|
||||
Type=Name("/Action"),
|
||||
S=Name("/URI"),
|
||||
URI=String(url),
|
||||
)
|
||||
|
||||
out = io.BytesIO()
|
||||
pdf.save(out)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=out.getvalue(),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 21,
|
||||
generator=self.name,
|
||||
notes=[
|
||||
"synthesised one-page PDF with realistic Q3 review body",
|
||||
f"/OpenAction /URI -> {url}",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _pdf_escape(s: str) -> str:
|
||||
"""Escape parens and backslashes for PDF literal-string syntax.
|
||||
|
||||
PDF string literals are wrapped in ``( … )``; inner ``(``, ``)``,
|
||||
and ``\\`` need backslash escapes. Everything else (including
|
||||
UTF-8 multibyte sequences) round-trips fine because Helvetica's
|
||||
encoding is WinAnsi-ish — we'll lose exotic glyphs but the
|
||||
realistic body sticks to ASCII anyway. Em-dashes are downgraded
|
||||
to ``--`` to avoid the WinAnsi gap.
|
||||
"""
|
||||
return (
|
||||
s.replace("\\", r"\\")
|
||||
.replace("(", r"\(")
|
||||
.replace(")", r"\)")
|
||||
.replace("—", "--")
|
||||
)
|
||||
@@ -25,7 +25,9 @@ _LINUX_DEFAULTS: dict[str, str] = {
|
||||
"env_file": "/home/{user}/.env",
|
||||
"ssh_key": "/home/{user}/.ssh/id_rsa",
|
||||
"aws_creds": "/home/{user}/.aws/credentials",
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.docx",
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||
}
|
||||
|
||||
_WINDOWS_DEFAULTS: dict[str, str] = {
|
||||
@@ -33,7 +35,9 @@ _WINDOWS_DEFAULTS: dict[str, str] = {
|
||||
"env_file": "/home/{user}/Desktop/prod.env",
|
||||
"ssh_key": "/home/{user}/.ssh/id_rsa", # OpenSSH on Windows uses the same path
|
||||
"aws_creds": "/home/{user}/.aws/credentials",
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.docx",
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -20,7 +20,8 @@ interface BlobRow {
|
||||
}
|
||||
|
||||
const KNOWN_GENERATORS = [
|
||||
'git_config', 'env_file', 'ssh_key', 'aws_creds', 'honeydoc',
|
||||
'git_config', 'env_file', 'ssh_key', 'aws_creds',
|
||||
'honeydoc', 'honeydoc_docx', 'honeydoc_pdf',
|
||||
] as const;
|
||||
type GeneratorName = typeof KNOWN_GENERATORS[number];
|
||||
|
||||
|
||||
@@ -59,7 +59,8 @@ def test_known_lists_are_stable() -> None:
|
||||
# If anyone adds/removes from the dispatch tables, the test
|
||||
# surfaces it. Keeps the schema-of-record in one place.
|
||||
assert KNOWN_GENERATORS == (
|
||||
"git_config", "env_file", "ssh_key", "aws_creds", "honeydoc",
|
||||
"git_config", "env_file", "ssh_key", "aws_creds",
|
||||
"honeydoc", "honeydoc_docx", "honeydoc_pdf",
|
||||
)
|
||||
assert KNOWN_INSTRUMENTERS == (
|
||||
"docx", "xlsx", "pdf", "html", "image", "plain", "passthrough",
|
||||
|
||||
@@ -90,6 +90,37 @@ def test_honeydoc_html_is_valid_ish_html() -> None:
|
||||
assert "width=\"1\" height=\"1\"" in body
|
||||
|
||||
|
||||
def test_honeydoc_docx_produces_valid_zip_with_callback() -> None:
|
||||
import io
|
||||
import zipfile
|
||||
g = get_generator("honeydoc_docx")
|
||||
art = g.generate(_ctx(callback_token="slugDX"))
|
||||
assert art.content[:4] == b"PK\x03\x04" # zip magic
|
||||
with zipfile.ZipFile(io.BytesIO(art.content), "r") as zf:
|
||||
names = set(zf.namelist())
|
||||
assert {"[Content_Types].xml", "_rels/.rels", "word/document.xml",
|
||||
"word/_rels/document.xml.rels"} <= names
|
||||
rels = zf.read("word/_rels/document.xml.rels").decode()
|
||||
assert "https://canary.example.test/c/slugDX" in rels
|
||||
assert "TargetMode=\"External\"" in rels
|
||||
doc = zf.read("word/document.xml").decode()
|
||||
assert "Q3 Operations Review" in doc
|
||||
assert "<w:drawing>" in doc
|
||||
|
||||
|
||||
def test_honeydoc_pdf_produces_valid_pdf_with_openaction() -> None:
|
||||
pikepdf = pytest.importorskip("pikepdf")
|
||||
g = get_generator("honeydoc_pdf")
|
||||
art = g.generate(_ctx(callback_token="slugPDF"))
|
||||
assert art.content[:5] == b"%PDF-"
|
||||
# Re-open and confirm OpenAction URI round-trips.
|
||||
import io
|
||||
with pikepdf.open(io.BytesIO(art.content)) as pdf:
|
||||
action = pdf.Root["/OpenAction"]
|
||||
assert str(action["/S"]) == "/URI"
|
||||
assert str(action["/URI"]) == "https://canary.example.test/c/slugPDF"
|
||||
|
||||
|
||||
def test_git_config_remote_url_shape() -> None:
|
||||
g = get_generator("git_config")
|
||||
art = g.generate(_ctx(callback_token="slug42"))
|
||||
|
||||
@@ -28,7 +28,9 @@ def test_default_user_dispatch() -> None:
|
||||
("env_file", "windows", "/home/Administrator/Desktop/prod.env"),
|
||||
("git_config", "linux", "/home/admin/.git/config"),
|
||||
("ssh_key", "linux", "/home/admin/.ssh/id_rsa"),
|
||||
("honeydoc", "linux", "/home/admin/Documents/quarterly_report.docx"),
|
||||
("honeydoc", "linux", "/home/admin/Documents/quarterly_report.html"),
|
||||
("honeydoc_docx", "linux", "/home/admin/Documents/quarterly_report.docx"),
|
||||
("honeydoc_pdf", "linux", "/home/admin/Documents/quarterly_report.pdf"),
|
||||
],
|
||||
)
|
||||
def test_default_path_for_known_generators(
|
||||
|
||||
Reference in New Issue
Block a user