"""Real-DOCX honeydoc generator. Synthesises a minimal but structurally valid DOCX from scratch via stdlib :mod:`zipfile`, then uses the same external-image relationship trick that powers :mod:`decnet.canary.instrumenters.docx` to embed the callback URL. No python-docx dependency. The output opens cleanly in Word / LibreOffice; both fetch the external image relationship on document load. """ from __future__ import annotations import io import zipfile from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator from decnet.canary.instrumenters.docx import _drawing, _next_rid _CONTENT_TYPES = ( '' '' '' '' '' '' ).encode() _PACKAGE_RELS = ( '' '' '' '' ).encode() _BODY_PARAGRAPHS = ( "Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)", "", "Forecast and remediation timeline below. Numbers are preliminary " "and subject to revision before the all-hands.", "", "Region Incidents MTTR (h)", "us-east 14 3.2", "us-west 9 4.7", "eu-central 22 2.1", "", "Internal contact: secops@internal", ) def _document_xml(rid_with_drawing: str | None = None) -> bytes: """Build the body XML. ``rid_with_drawing`` is the rId of the external image relationship; when set, we append the same ```` element that the DOCX instrumenter inserts so the body references the external resource. """ paragraphs = [] for line in _BODY_PARAGRAPHS: if line: paragraphs.append( "" + _xml_escape(line) + "" ) else: paragraphs.append("") body = "".join(paragraphs) drawing = _drawing(rid_with_drawing).decode() if rid_with_drawing else "" return ( '' '' f'{body}{drawing}' '' ).encode() def _xml_escape(s: str) -> str: return ( s.replace("&", "&") .replace("<", "<") .replace(">", ">") ) def _document_rels(rid: str, url: str) -> bytes: return ( '' '' f'' '' ).encode() class HoneydocDocxGenerator(CanaryGenerator): name = "honeydoc_docx" def generate(self, ctx: CanaryContext) -> CanaryArtifact: url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}" # Pick a stable rId — there's only one relationship in the # synthesised file, so any unused id works. Reuse the # instrumenter's allocator against the bare relationships # skeleton for parity with operator-uploaded DOCX flow. skeleton = ( b'' b'' b'' ) rid = _next_rid(skeleton) out = io.BytesIO() with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf: zf.writestr("[Content_Types].xml", _CONTENT_TYPES) zf.writestr("_rels/.rels", _PACKAGE_RELS) zf.writestr("word/document.xml", _document_xml(rid)) zf.writestr("word/_rels/document.xml.rels", _document_rels(rid, url)) return CanaryArtifact( path="", content=out.getvalue(), mode=0o644, mtime_offset=-86400 * 21, generator=self.name, notes=[ "synthesised DOCX with realistic Q3 review body", f"external-image relationship {rid} -> {url}", ], )