feat(canary): operator-upload instrumenters + tests
Seven instrumenters that mutate operator-supplied artifacts to
embed the callback URL:
- passthrough — bytes unchanged; only DNS-callback tokens trip
detection, with the slug embedded in the placement path
- plain — substitutes {{CANARY_URL}}/{{CANARY_HOST}} placeholders;
falls back to appending a comment line whose prefix adapts to the
apparent file syntax (#, //, ;)
- html — injects a 1x1 tracking pixel before </body>, appends
if the close tag is missing
- docx — direct zipfile manipulation (no python-docx dep):
inserts an external-image Relationship into word/_rels/document.xml.rels
and a matching <w:drawing> element before </w:body>
- xlsx — sibling of docx; injects an external-image relationship
into xl/_rels/workbook.xml.rels (orphan rels are still fetched on
open by most viewers)
- pdf — uses pikepdf to install /OpenAction /URI on the catalog;
rejects with a clear message when pikepdf isn't installed
- image — uses Pillow to embed slug + URL in PNG tEXt / JPEG
comment; rejects with a clear message when Pillow isn't installed
DOCX and XLSX share the rId allocator + relationship injector via
the docx module; both work on stdlib zipfile only.
Tests synthesise minimal real DOCX/XLSX fixtures inline, round-trip
each instrumenter, and assert the callback URL ends up in the
mutated bytes while the file still parses.
This commit is contained in:
88
tests/canary/conftest.py
Normal file
88
tests/canary/conftest.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""Shared fixtures for canary tests — minimal DOCX/XLSX/HTML/PDF fixtures.
|
||||
|
||||
We synthesise the OOXML zips inline rather than checking real binary
|
||||
fixtures into the repo. Keeps the test surface portable and the diff
|
||||
reviewable; the smallest valid DOCX is ~12 files but Word/LibreOffice
|
||||
both accept a stripped-down skeleton with just ``[Content_Types].xml``,
|
||||
``_rels/.rels``, ``word/document.xml``, and ``word/_rels/document.xml.rels``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import zipfile
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
_DOCX_CONTENT_TYPES = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
|
||||
'<Default Extension="xml" ContentType="application/xml"/>'
|
||||
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
|
||||
'<Override PartName="/word/document.xml" '
|
||||
'ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
|
||||
'</Types>'
|
||||
)
|
||||
|
||||
_DOCX_PACKAGE_RELS = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
'<Relationship Id="rId1" '
|
||||
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" '
|
||||
'Target="word/document.xml"/>'
|
||||
'</Relationships>'
|
||||
)
|
||||
|
||||
_DOCX_DOCUMENT = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
|
||||
'<w:body><w:p><w:r><w:t>Existing content.</w:t></w:r></w:p></w:body>'
|
||||
'</w:document>'
|
||||
)
|
||||
|
||||
_DOCX_DOCUMENT_RELS = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
'</Relationships>'
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def minimal_docx() -> bytes:
|
||||
"""Return a tiny but structurally valid DOCX as bytes."""
|
||||
out = io.BytesIO()
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("[Content_Types].xml", _DOCX_CONTENT_TYPES)
|
||||
zf.writestr("_rels/.rels", _DOCX_PACKAGE_RELS)
|
||||
zf.writestr("word/document.xml", _DOCX_DOCUMENT)
|
||||
zf.writestr("word/_rels/document.xml.rels", _DOCX_DOCUMENT_RELS)
|
||||
return out.getvalue()
|
||||
|
||||
|
||||
_XLSX_CONTENT_TYPES = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
|
||||
'<Default Extension="xml" ContentType="application/xml"/>'
|
||||
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
|
||||
'<Override PartName="/xl/workbook.xml" '
|
||||
'ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>'
|
||||
'</Types>'
|
||||
)
|
||||
|
||||
_XLSX_WORKBOOK_RELS = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
'</Relationships>'
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def minimal_xlsx() -> bytes:
|
||||
"""Return a tiny but structurally valid XLSX as bytes."""
|
||||
out = io.BytesIO()
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("[Content_Types].xml", _XLSX_CONTENT_TYPES)
|
||||
zf.writestr("_rels/.rels", _DOCX_PACKAGE_RELS.replace("word/document.xml", "xl/workbook.xml"))
|
||||
zf.writestr("xl/workbook.xml", '<workbook/>')
|
||||
zf.writestr("xl/_rels/workbook.xml.rels", _XLSX_WORKBOOK_RELS)
|
||||
return out.getvalue()
|
||||
Reference in New Issue
Block a user