feat(canary): operator-upload instrumenters + tests
Seven instrumenters that mutate operator-supplied artifacts to
embed the callback URL:
- passthrough — bytes unchanged; only DNS-callback tokens trip
detection, with the slug embedded in the placement path
- plain — substitutes {{CANARY_URL}}/{{CANARY_HOST}} placeholders;
falls back to appending a comment line whose prefix adapts to the
apparent file syntax (#, //, ;)
- html — injects a 1x1 tracking pixel before </body>, appends
if the close tag is missing
- docx — direct zipfile manipulation (no python-docx dep):
inserts an external-image Relationship into word/_rels/document.xml.rels
and a matching <w:drawing> element before </w:body>
- xlsx — sibling of docx; injects an external-image relationship
into xl/_rels/workbook.xml.rels (orphan rels are still fetched on
open by most viewers)
- pdf — uses pikepdf to install /OpenAction /URI on the catalog;
rejects with a clear message when pikepdf isn't installed
- image — uses Pillow to embed slug + URL in PNG tEXt / JPEG
comment; rejects with a clear message when Pillow isn't installed
DOCX and XLSX share the rId allocator + relationship injector via
the docx module; both work on stdlib zipfile only.
Tests synthesise minimal real DOCX/XLSX fixtures inline, round-trip
each instrumenter, and assert the callback URL ends up in the
mutated bytes while the file still parses.
This commit is contained in:
95
decnet/canary/instrumenters/xlsx.py
Normal file
95
decnet/canary/instrumenters/xlsx.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""XLSX instrumenter — embed an external-image link.
|
||||
|
||||
XLSX is structurally identical to DOCX (Office Open XML zip). The
|
||||
injection target is the workbook's relationships file
|
||||
(``xl/_rels/workbook.xml.rels``). We add an external image
|
||||
relationship there; Excel/LibreOffice fetch external images on
|
||||
workbook open in the same way Word does.
|
||||
|
||||
We don't inject a ``<drawing>`` element into a sheet because that
|
||||
requires touching ``xl/worksheets/sheetN.xml`` *and* allocating a new
|
||||
``xl/drawings/drawingN.xml`` part — much higher chance of mangling
|
||||
the file. An orphan external image relationship is enough: many
|
||||
Office viewers fetch all relationships at open time regardless of
|
||||
whether they're referenced from a sheet.
|
||||
|
||||
If the operator wants a stronger trigger (image visible in the
|
||||
sheet, fetched even by viewers that lazy-load external resources)
|
||||
they should embed the slug as a hyperlink cell content via the
|
||||
``plain``/``passthrough`` instrumenters.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import zipfile
|
||||
from typing import Tuple
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryInstrumenter,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
from decnet.canary.instrumenters.docx import _inject_relationship, _next_rid
|
||||
|
||||
|
||||
_RELS_PATHS = (
|
||||
"xl/_rels/workbook.xml.rels",
|
||||
"xl/_rels/sharedStrings.xml.rels",
|
||||
)
|
||||
|
||||
|
||||
def _mutate(blob: bytes, url: str) -> Tuple[bytes, str, str]:
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(blob), "r") as zf:
|
||||
members = [(zi, zf.read(zi.filename)) for zi in zf.infolist()]
|
||||
except zipfile.BadZipFile as e:
|
||||
raise InstrumenterRejectedError("uploaded blob is not a valid XLSX zip") from e
|
||||
|
||||
target_rels: str | None = None
|
||||
for zi, _ in members:
|
||||
if zi.filename in _RELS_PATHS:
|
||||
target_rels = zi.filename
|
||||
break
|
||||
if not target_rels:
|
||||
raise InstrumenterRejectedError(
|
||||
"XLSX has no workbook relationships file to mutate"
|
||||
)
|
||||
|
||||
out_members = []
|
||||
rid = ""
|
||||
for zi, data in members:
|
||||
if zi.filename == target_rels:
|
||||
rid = _next_rid(data)
|
||||
data = _inject_relationship(data, rid, url)
|
||||
out_members.append((zi, data))
|
||||
|
||||
out = io.BytesIO()
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf_out:
|
||||
for zi, data in out_members:
|
||||
zf_out.writestr(zi, data)
|
||||
return out.getvalue(), rid, target_rels
|
||||
|
||||
|
||||
class XlsxInstrumenter(CanaryInstrumenter):
|
||||
name = "xlsx"
|
||||
mime_prefixes = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
)
|
||||
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
mutated, rid, target_rels = _mutate(blob, url)
|
||||
return CanaryArtifact(
|
||||
path=target_path,
|
||||
content=mutated,
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 14,
|
||||
instrumenter=self.name,
|
||||
notes=[
|
||||
f"injected external-image relationship {rid} into "
|
||||
f"{target_rels} -> {url}",
|
||||
],
|
||||
)
|
||||
Reference in New Issue
Block a user