honeydoc previously emitted HTML only — operators picking 'Document' out of the dropdown got a .html file dropped at /Documents/ quarterly_report.docx, which any attacker would clock the moment they ran 'file' on it. Two new generators that emit the real artifact format: - honeydoc_docx: stdlib zipfile only. Builds a minimal but valid Office Open XML zip with the same Q3 review body as the HTML flavor and an external-image relationship pointing at the callback URL — same trick the operator-upload DOCX instrumenter uses, fetched on document open by Word and LibreOffice. Reuses _drawing() and _next_rid() from instrumenters/docx.py to keep the body/relationships shape identical between synthesised and instrumented files. - honeydoc_pdf: pikepdf-backed. One-page PDF in the 14 base fonts (Helvetica, no font embedding), realistic body, /OpenAction /URI on the catalog so most viewers fire the callback on document open. Falls back to a clear error if pikepdf is missing so the operator can switch to honeydoc / honeydoc_docx. Default placement paths now reflect each generator's true extension (.html / .docx / .pdf) so the UI suggests something sensible. Both generators surfaced in the New Token modal's generator dropdown.
134 lines
4.8 KiB
Python
134 lines
4.8 KiB
Python
"""Real-DOCX honeydoc generator.
|
|
|
|
Synthesises a minimal but structurally valid DOCX from scratch via
|
|
stdlib :mod:`zipfile`, then uses the same external-image relationship
|
|
trick that powers :mod:`decnet.canary.instrumenters.docx` to embed
|
|
the callback URL. No python-docx dependency.
|
|
|
|
The output opens cleanly in Word / LibreOffice; both fetch the
|
|
external image relationship on document load.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import io
|
|
import zipfile
|
|
|
|
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
|
from decnet.canary.instrumenters.docx import _drawing, _next_rid
|
|
|
|
|
|
_CONTENT_TYPES = (
|
|
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
|
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
|
|
'<Default Extension="xml" ContentType="application/xml"/>'
|
|
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
|
|
'<Override PartName="/word/document.xml" '
|
|
'ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
|
|
'</Types>'
|
|
).encode()
|
|
|
|
_PACKAGE_RELS = (
|
|
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
|
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
|
'<Relationship Id="rId1" '
|
|
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" '
|
|
'Target="word/document.xml"/>'
|
|
'</Relationships>'
|
|
).encode()
|
|
|
|
_BODY_PARAGRAPHS = (
|
|
"Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)",
|
|
"",
|
|
"Forecast and remediation timeline below. Numbers are preliminary "
|
|
"and subject to revision before the all-hands.",
|
|
"",
|
|
"Region Incidents MTTR (h)",
|
|
"us-east 14 3.2",
|
|
"us-west 9 4.7",
|
|
"eu-central 22 2.1",
|
|
"",
|
|
"Internal contact: secops@internal",
|
|
)
|
|
|
|
|
|
def _document_xml(rid_with_drawing: str | None = None) -> bytes:
|
|
"""Build the body XML.
|
|
|
|
``rid_with_drawing`` is the rId of the external image relationship;
|
|
when set, we append the same ``<w:drawing>`` element that the DOCX
|
|
instrumenter inserts so the body references the external resource.
|
|
"""
|
|
paragraphs = []
|
|
for line in _BODY_PARAGRAPHS:
|
|
if line:
|
|
paragraphs.append(
|
|
"<w:p><w:r><w:t xml:space=\"preserve\">"
|
|
+ _xml_escape(line)
|
|
+ "</w:t></w:r></w:p>"
|
|
)
|
|
else:
|
|
paragraphs.append("<w:p/>")
|
|
body = "".join(paragraphs)
|
|
drawing = _drawing(rid_with_drawing).decode() if rid_with_drawing else ""
|
|
return (
|
|
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
|
'<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
|
|
f'<w:body>{body}{drawing}</w:body>'
|
|
'</w:document>'
|
|
).encode()
|
|
|
|
|
|
def _xml_escape(s: str) -> str:
|
|
return (
|
|
s.replace("&", "&")
|
|
.replace("<", "<")
|
|
.replace(">", ">")
|
|
)
|
|
|
|
|
|
def _document_rels(rid: str, url: str) -> bytes:
|
|
return (
|
|
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
|
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
|
f'<Relationship Id="{rid}" '
|
|
f'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" '
|
|
f'Target="{url}" TargetMode="External"/>'
|
|
'</Relationships>'
|
|
).encode()
|
|
|
|
|
|
class HoneydocDocxGenerator(CanaryGenerator):
|
|
name = "honeydoc_docx"
|
|
|
|
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
|
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
|
# Pick a stable rId — there's only one relationship in the
|
|
# synthesised file, so any unused id works. Reuse the
|
|
# instrumenter's allocator against the bare relationships
|
|
# skeleton for parity with operator-uploaded DOCX flow.
|
|
skeleton = (
|
|
b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
|
b'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
|
b'</Relationships>'
|
|
)
|
|
rid = _next_rid(skeleton)
|
|
|
|
out = io.BytesIO()
|
|
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
zf.writestr("[Content_Types].xml", _CONTENT_TYPES)
|
|
zf.writestr("_rels/.rels", _PACKAGE_RELS)
|
|
zf.writestr("word/document.xml", _document_xml(rid))
|
|
zf.writestr("word/_rels/document.xml.rels", _document_rels(rid, url))
|
|
|
|
return CanaryArtifact(
|
|
path="",
|
|
content=out.getvalue(),
|
|
mode=0o644,
|
|
mtime_offset=-86400 * 21,
|
|
generator=self.name,
|
|
notes=[
|
|
"synthesised DOCX with realistic Q3 review body",
|
|
f"external-image relationship {rid} -> {url}",
|
|
],
|
|
)
|