Seven instrumenters that mutate operator-supplied artifacts to
embed the callback URL:
- passthrough — bytes unchanged; only DNS-callback tokens trip
detection, with the slug embedded in the placement path
- plain — substitutes {{CANARY_URL}}/{{CANARY_HOST}} placeholders;
falls back to appending a comment line whose prefix adapts to the
apparent file syntax (#, //, ;)
- html — injects a 1x1 tracking pixel before </body>, appends
if the close tag is missing
- docx — direct zipfile manipulation (no python-docx dep):
inserts an external-image Relationship into word/_rels/document.xml.rels
and a matching <w:drawing> element before </w:body>
- xlsx — sibling of docx; injects an external-image relationship
into xl/_rels/workbook.xml.rels (orphan rels are still fetched on
open by most viewers)
- pdf — uses pikepdf to install /OpenAction /URI on the catalog;
rejects with a clear message when pikepdf isn't installed
- image — uses Pillow to embed slug + URL in PNG tEXt / JPEG
comment; rejects with a clear message when Pillow isn't installed
DOCX and XLSX share the rId allocator + relationship injector via
the docx module; both work on stdlib zipfile only.
Tests synthesise minimal real DOCX/XLSX fixtures inline, round-trip
each instrumenter, and assert the callback URL ends up in the
mutated bytes while the file still parses.
96 lines
3.1 KiB
Python
96 lines
3.1 KiB
Python
"""XLSX instrumenter — embed an external-image link.
|
|
|
|
XLSX is structurally identical to DOCX (Office Open XML zip). The
|
|
injection target is the workbook's relationships file
|
|
(``xl/_rels/workbook.xml.rels``). We add an external image
|
|
relationship there; Excel/LibreOffice fetch external images on
|
|
workbook open in the same way Word does.
|
|
|
|
We don't inject a ``<drawing>`` element into a sheet because that
|
|
requires touching ``xl/worksheets/sheetN.xml`` *and* allocating a new
|
|
``xl/drawings/drawingN.xml`` part — much higher chance of mangling
|
|
the file. An orphan external image relationship is enough: many
|
|
Office viewers fetch all relationships at open time regardless of
|
|
whether they're referenced from a sheet.
|
|
|
|
If the operator wants a stronger trigger (image visible in the
|
|
sheet, fetched even by viewers that lazy-load external resources)
|
|
they should embed the slug as a hyperlink cell content via the
|
|
``plain``/``passthrough`` instrumenters.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import io
|
|
import zipfile
|
|
from typing import Tuple
|
|
|
|
from decnet.canary.base import (
|
|
CanaryArtifact,
|
|
CanaryContext,
|
|
CanaryInstrumenter,
|
|
InstrumenterRejectedError,
|
|
)
|
|
from decnet.canary.instrumenters.docx import _inject_relationship, _next_rid
|
|
|
|
|
|
_RELS_PATHS = (
|
|
"xl/_rels/workbook.xml.rels",
|
|
"xl/_rels/sharedStrings.xml.rels",
|
|
)
|
|
|
|
|
|
def _mutate(blob: bytes, url: str) -> Tuple[bytes, str, str]:
|
|
try:
|
|
with zipfile.ZipFile(io.BytesIO(blob), "r") as zf:
|
|
members = [(zi, zf.read(zi.filename)) for zi in zf.infolist()]
|
|
except zipfile.BadZipFile as e:
|
|
raise InstrumenterRejectedError("uploaded blob is not a valid XLSX zip") from e
|
|
|
|
target_rels: str | None = None
|
|
for zi, _ in members:
|
|
if zi.filename in _RELS_PATHS:
|
|
target_rels = zi.filename
|
|
break
|
|
if not target_rels:
|
|
raise InstrumenterRejectedError(
|
|
"XLSX has no workbook relationships file to mutate"
|
|
)
|
|
|
|
out_members = []
|
|
rid = ""
|
|
for zi, data in members:
|
|
if zi.filename == target_rels:
|
|
rid = _next_rid(data)
|
|
data = _inject_relationship(data, rid, url)
|
|
out_members.append((zi, data))
|
|
|
|
out = io.BytesIO()
|
|
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf_out:
|
|
for zi, data in out_members:
|
|
zf_out.writestr(zi, data)
|
|
return out.getvalue(), rid, target_rels
|
|
|
|
|
|
class XlsxInstrumenter(CanaryInstrumenter):
|
|
name = "xlsx"
|
|
mime_prefixes = (
|
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
)
|
|
|
|
def instrument(
|
|
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
|
) -> CanaryArtifact:
|
|
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
|
mutated, rid, target_rels = _mutate(blob, url)
|
|
return CanaryArtifact(
|
|
path=target_path,
|
|
content=mutated,
|
|
mode=0o644,
|
|
mtime_offset=-86400 * 14,
|
|
instrumenter=self.name,
|
|
notes=[
|
|
f"injected external-image relationship {rid} into "
|
|
f"{target_rels} -> {url}",
|
|
],
|
|
)
|