diff --git a/decnet/canary/factory.py b/decnet/canary/factory.py index bc641eb0..876906e0 100644 --- a/decnet/canary/factory.py +++ b/decnet/canary/factory.py @@ -18,6 +18,8 @@ KNOWN_GENERATORS: Tuple[str, ...] = ( "ssh_key", "aws_creds", "honeydoc", + "honeydoc_docx", + "honeydoc_pdf", ) KNOWN_INSTRUMENTERS: Tuple[str, ...] = ( @@ -52,6 +54,12 @@ def get_generator(name: str) -> CanaryGenerator: if name == "honeydoc": from decnet.canary.generators.honeydoc import HoneydocGenerator return HoneydocGenerator() + if name == "honeydoc_docx": + from decnet.canary.generators.honeydoc_docx import HoneydocDocxGenerator + return HoneydocDocxGenerator() + if name == "honeydoc_pdf": + from decnet.canary.generators.honeydoc_pdf import HoneydocPdfGenerator + return HoneydocPdfGenerator() raise ValueError( f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}" ) diff --git a/decnet/canary/generators/honeydoc_docx.py b/decnet/canary/generators/honeydoc_docx.py new file mode 100644 index 00000000..35456a23 --- /dev/null +++ b/decnet/canary/generators/honeydoc_docx.py @@ -0,0 +1,133 @@ +"""Real-DOCX honeydoc generator. + +Synthesises a minimal but structurally valid DOCX from scratch via +stdlib :mod:`zipfile`, then uses the same external-image relationship +trick that powers :mod:`decnet.canary.instrumenters.docx` to embed +the callback URL. No python-docx dependency. + +The output opens cleanly in Word / LibreOffice; both fetch the +external image relationship on document load. +""" +from __future__ import annotations + +import io +import zipfile + +from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator +from decnet.canary.instrumenters.docx import _drawing, _next_rid + + +_CONTENT_TYPES = ( + '' + '' + '' + '' + '' + '' +).encode() + +_PACKAGE_RELS = ( + '' + '' + '' + '' +).encode() + +_BODY_PARAGRAPHS = ( + "Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)", + "", + "Forecast and remediation timeline below. Numbers are preliminary " + "and subject to revision before the all-hands.", + "", + "Region Incidents MTTR (h)", + "us-east 14 3.2", + "us-west 9 4.7", + "eu-central 22 2.1", + "", + "Internal contact: secops@internal", +) + + +def _document_xml(rid_with_drawing: str | None = None) -> bytes: + """Build the body XML. + + ``rid_with_drawing`` is the rId of the external image relationship; + when set, we append the same ```` element that the DOCX + instrumenter inserts so the body references the external resource. + """ + paragraphs = [] + for line in _BODY_PARAGRAPHS: + if line: + paragraphs.append( + "" + + _xml_escape(line) + + "" + ) + else: + paragraphs.append("") + body = "".join(paragraphs) + drawing = _drawing(rid_with_drawing).decode() if rid_with_drawing else "" + return ( + '' + '' + f'{body}{drawing}' + '' + ).encode() + + +def _xml_escape(s: str) -> str: + return ( + s.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + ) + + +def _document_rels(rid: str, url: str) -> bytes: + return ( + '' + '' + f'' + '' + ).encode() + + +class HoneydocDocxGenerator(CanaryGenerator): + name = "honeydoc_docx" + + def generate(self, ctx: CanaryContext) -> CanaryArtifact: + url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}" + # Pick a stable rId — there's only one relationship in the + # synthesised file, so any unused id works. Reuse the + # instrumenter's allocator against the bare relationships + # skeleton for parity with operator-uploaded DOCX flow. + skeleton = ( + b'' + b'' + b'' + ) + rid = _next_rid(skeleton) + + out = io.BytesIO() + with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr("[Content_Types].xml", _CONTENT_TYPES) + zf.writestr("_rels/.rels", _PACKAGE_RELS) + zf.writestr("word/document.xml", _document_xml(rid)) + zf.writestr("word/_rels/document.xml.rels", _document_rels(rid, url)) + + return CanaryArtifact( + path="", + content=out.getvalue(), + mode=0o644, + mtime_offset=-86400 * 21, + generator=self.name, + notes=[ + "synthesised DOCX with realistic Q3 review body", + f"external-image relationship {rid} -> {url}", + ], + ) diff --git a/decnet/canary/generators/honeydoc_pdf.py b/decnet/canary/generators/honeydoc_pdf.py new file mode 100644 index 00000000..400271ff --- /dev/null +++ b/decnet/canary/generators/honeydoc_pdf.py @@ -0,0 +1,127 @@ +"""Real-PDF honeydoc generator (uses :mod:`pikepdf`). + +Builds a one-page PDF with the same Q3-review body as the HTML/DOCX +flavors and installs an ``/OpenAction`` ``/URI`` action on the +catalog so most viewers fire the callback the moment the document +opens. + +Pikepdf is now a hard dependency for this generator (the operator +installed it explicitly so we can use it). We still surface a +clear :class:`InstrumenterRejectedError` when imports fail, so a +deployment without pikepdf can fall back to the DOCX or HTML +generators rather than crashing the API. +""" +from __future__ import annotations + +import io + +from decnet.canary.base import ( + CanaryArtifact, + CanaryContext, + CanaryGenerator, + InstrumenterRejectedError, +) + + +_BODY_LINES = ( + ("Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)", 14), + ("", 12), + ("Forecast and remediation timeline below.", 11), + ("Numbers are preliminary, subject to revision.", 11), + ("", 12), + ("Region Incidents MTTR (h)", 11), + ("us-east 14 3.2", 11), + ("us-west 9 4.7", 11), + ("eu-central 22 2.1", 11), + ("", 12), + ("Internal contact: secops@internal", 11), +) + + +class HoneydocPdfGenerator(CanaryGenerator): + name = "honeydoc_pdf" + + def generate(self, ctx: CanaryContext) -> CanaryArtifact: + try: + from pikepdf import Pdf, Name, Dictionary, String # type: ignore[import-not-found] + except ImportError as e: + raise InstrumenterRejectedError( + "honeydoc_pdf requires pikepdf; install it (`pip install " + "pikepdf`) or pick honeydoc / honeydoc_docx instead." + ) from e + + url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}" + + pdf = Pdf.new() + # Helvetica is one of the 14 PDF base fonts — every viewer ships + # it, so no font embedding is required. + font = pdf.make_indirect(Dictionary( + Type=Name("/Font"), + Subtype=Name("/Type1"), + BaseFont=Name("/Helvetica"), + )) + + # Build a single content stream that writes each body line at a + # decreasing y-coordinate. PDF coordinates start at the bottom- + # left (US Letter = 612 x 792 points); we lay out lines roughly + # 18 points apart starting near the top. + ops: list[str] = ["BT /F1 12 Tf 72 750 Td"] + first = True + for line, size in _BODY_LINES: + if not first: + ops.append("0 -18 Td") + first = False + ops.append(f"/F1 {size} Tf") + ops.append(f"({_pdf_escape(line)}) Tj") + ops.append("ET") + content_bytes = "\n".join(ops).encode("latin-1") + + content_stream = pdf.make_stream(content_bytes) + + page = pdf.add_blank_page(page_size=(612, 792)) + page[Name("/Resources")] = Dictionary( + Font=Dictionary(F1=font), + ) + page[Name("/Contents")] = content_stream + + # OpenAction fires the URI when the file is opened in Acrobat, + # Preview, the browser PDF viewer, etc. Most viewers prompt + # before fetching; that prompt itself is a tell, and an + # auto-allow viewer fetches silently. + pdf.Root[Name("/OpenAction")] = Dictionary( + Type=Name("/Action"), + S=Name("/URI"), + URI=String(url), + ) + + out = io.BytesIO() + pdf.save(out) + return CanaryArtifact( + path="", + content=out.getvalue(), + mode=0o644, + mtime_offset=-86400 * 21, + generator=self.name, + notes=[ + "synthesised one-page PDF with realistic Q3 review body", + f"/OpenAction /URI -> {url}", + ], + ) + + +def _pdf_escape(s: str) -> str: + """Escape parens and backslashes for PDF literal-string syntax. + + PDF string literals are wrapped in ``( … )``; inner ``(``, ``)``, + and ``\\`` need backslash escapes. Everything else (including + UTF-8 multibyte sequences) round-trips fine because Helvetica's + encoding is WinAnsi-ish — we'll lose exotic glyphs but the + realistic body sticks to ASCII anyway. Em-dashes are downgraded + to ``--`` to avoid the WinAnsi gap. + """ + return ( + s.replace("\\", r"\\") + .replace("(", r"\(") + .replace(")", r"\)") + .replace("—", "--") + ) diff --git a/decnet/canary/paths.py b/decnet/canary/paths.py index 35c84c50..5700ad0f 100644 --- a/decnet/canary/paths.py +++ b/decnet/canary/paths.py @@ -25,7 +25,9 @@ _LINUX_DEFAULTS: dict[str, str] = { "env_file": "/home/{user}/.env", "ssh_key": "/home/{user}/.ssh/id_rsa", "aws_creds": "/home/{user}/.aws/credentials", - "honeydoc": "/home/{user}/Documents/quarterly_report.docx", + "honeydoc": "/home/{user}/Documents/quarterly_report.html", + "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx", + "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf", } _WINDOWS_DEFAULTS: dict[str, str] = { @@ -33,7 +35,9 @@ _WINDOWS_DEFAULTS: dict[str, str] = { "env_file": "/home/{user}/Desktop/prod.env", "ssh_key": "/home/{user}/.ssh/id_rsa", # OpenSSH on Windows uses the same path "aws_creds": "/home/{user}/.aws/credentials", - "honeydoc": "/home/{user}/Documents/quarterly_report.docx", + "honeydoc": "/home/{user}/Documents/quarterly_report.html", + "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx", + "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf", } diff --git a/decnet_web/src/components/CanaryTokens.tsx b/decnet_web/src/components/CanaryTokens.tsx index fe06b5d2..83f3e5bc 100644 --- a/decnet_web/src/components/CanaryTokens.tsx +++ b/decnet_web/src/components/CanaryTokens.tsx @@ -20,7 +20,8 @@ interface BlobRow { } const KNOWN_GENERATORS = [ - 'git_config', 'env_file', 'ssh_key', 'aws_creds', 'honeydoc', + 'git_config', 'env_file', 'ssh_key', 'aws_creds', + 'honeydoc', 'honeydoc_docx', 'honeydoc_pdf', ] as const; type GeneratorName = typeof KNOWN_GENERATORS[number]; diff --git a/tests/canary/test_factory.py b/tests/canary/test_factory.py index ecb85985..e7db4390 100644 --- a/tests/canary/test_factory.py +++ b/tests/canary/test_factory.py @@ -59,7 +59,8 @@ def test_known_lists_are_stable() -> None: # If anyone adds/removes from the dispatch tables, the test # surfaces it. Keeps the schema-of-record in one place. assert KNOWN_GENERATORS == ( - "git_config", "env_file", "ssh_key", "aws_creds", "honeydoc", + "git_config", "env_file", "ssh_key", "aws_creds", + "honeydoc", "honeydoc_docx", "honeydoc_pdf", ) assert KNOWN_INSTRUMENTERS == ( "docx", "xlsx", "pdf", "html", "image", "plain", "passthrough", diff --git a/tests/canary/test_generators.py b/tests/canary/test_generators.py index e80566a5..0127b3a4 100644 --- a/tests/canary/test_generators.py +++ b/tests/canary/test_generators.py @@ -90,6 +90,37 @@ def test_honeydoc_html_is_valid_ish_html() -> None: assert "width=\"1\" height=\"1\"" in body +def test_honeydoc_docx_produces_valid_zip_with_callback() -> None: + import io + import zipfile + g = get_generator("honeydoc_docx") + art = g.generate(_ctx(callback_token="slugDX")) + assert art.content[:4] == b"PK\x03\x04" # zip magic + with zipfile.ZipFile(io.BytesIO(art.content), "r") as zf: + names = set(zf.namelist()) + assert {"[Content_Types].xml", "_rels/.rels", "word/document.xml", + "word/_rels/document.xml.rels"} <= names + rels = zf.read("word/_rels/document.xml.rels").decode() + assert "https://canary.example.test/c/slugDX" in rels + assert "TargetMode=\"External\"" in rels + doc = zf.read("word/document.xml").decode() + assert "Q3 Operations Review" in doc + assert "" in doc + + +def test_honeydoc_pdf_produces_valid_pdf_with_openaction() -> None: + pikepdf = pytest.importorskip("pikepdf") + g = get_generator("honeydoc_pdf") + art = g.generate(_ctx(callback_token="slugPDF")) + assert art.content[:5] == b"%PDF-" + # Re-open and confirm OpenAction URI round-trips. + import io + with pikepdf.open(io.BytesIO(art.content)) as pdf: + action = pdf.Root["/OpenAction"] + assert str(action["/S"]) == "/URI" + assert str(action["/URI"]) == "https://canary.example.test/c/slugPDF" + + def test_git_config_remote_url_shape() -> None: g = get_generator("git_config") art = g.generate(_ctx(callback_token="slug42")) diff --git a/tests/canary/test_paths.py b/tests/canary/test_paths.py index c633d4f5..65232fe2 100644 --- a/tests/canary/test_paths.py +++ b/tests/canary/test_paths.py @@ -28,7 +28,9 @@ def test_default_user_dispatch() -> None: ("env_file", "windows", "/home/Administrator/Desktop/prod.env"), ("git_config", "linux", "/home/admin/.git/config"), ("ssh_key", "linux", "/home/admin/.ssh/id_rsa"), - ("honeydoc", "linux", "/home/admin/Documents/quarterly_report.docx"), + ("honeydoc", "linux", "/home/admin/Documents/quarterly_report.html"), + ("honeydoc_docx", "linux", "/home/admin/Documents/quarterly_report.docx"), + ("honeydoc_pdf", "linux", "/home/admin/Documents/quarterly_report.pdf"), ], ) def test_default_path_for_known_generators(