Files
DECNET/decnet/canary/instrumenters/image.py
anti 19ceff4417 feat(canary): operator-upload instrumenters + tests
Seven instrumenters that mutate operator-supplied artifacts to
embed the callback URL:

- passthrough — bytes unchanged; only DNS-callback tokens trip
  detection, with the slug embedded in the placement path
- plain      — substitutes {{CANARY_URL}}/{{CANARY_HOST}} placeholders;
  falls back to appending a comment line whose prefix adapts to the
  apparent file syntax (#, //, ;)
- html       — injects a 1x1 tracking pixel before </body>, appends
  if the close tag is missing
- docx       — direct zipfile manipulation (no python-docx dep):
  inserts an external-image Relationship into word/_rels/document.xml.rels
  and a matching <w:drawing> element before </w:body>
- xlsx       — sibling of docx; injects an external-image relationship
  into xl/_rels/workbook.xml.rels (orphan rels are still fetched on
  open by most viewers)
- pdf        — uses pikepdf to install /OpenAction /URI on the catalog;
  rejects with a clear message when pikepdf isn't installed
- image      — uses Pillow to embed slug + URL in PNG tEXt / JPEG
  comment; rejects with a clear message when Pillow isn't installed

DOCX and XLSX share the rId allocator + relationship injector via
the docx module; both work on stdlib zipfile only.

Tests synthesise minimal real DOCX/XLSX fixtures inline, round-trip
each instrumenter, and assert the callback URL ends up in the
mutated bytes while the file still parses.
2026-04-27 13:03:42 -04:00

73 lines
2.8 KiB
Python

"""Image instrumenter — requires :mod:`PIL` (optional dependency).
For PNG/JPEG/GIF we append a tEXt/EXIF chunk carrying the slug so
``exiftool`` / ``identify -verbose`` surface the slug, then route the
detection via a sibling **plain-text companion file**. The image
itself can't really embed an HTTP fetcher — image decoders don't
run network requests on decode — so the realistic detection surface
is "attacker exfils the image, runs metadata tools on it, hits our
URL when curious about the embedded marker."
When Pillow isn't installed we reject and direct the operator to
``passthrough`` (which preserves the bytes; the slug then lives in
the filename only).
"""
from __future__ import annotations
import io
from decnet.canary.base import (
CanaryArtifact,
CanaryContext,
CanaryInstrumenter,
InstrumenterRejectedError,
)
class ImageInstrumenter(CanaryInstrumenter):
name = "image"
mime_prefixes = ("image/png", "image/jpeg", "image/gif")
def instrument(
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
) -> CanaryArtifact:
try:
from PIL import Image, PngImagePlugin # type: ignore[import-not-found]
except ImportError as e:
raise InstrumenterRejectedError(
"image instrumenter requires Pillow; install it (`pip "
"install Pillow`) or re-upload the artifact with "
"kind=passthrough so it ships unmodified."
) from e
slug_url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
try:
buf_in = io.BytesIO(blob)
img = Image.open(buf_in)
fmt = (img.format or "").upper()
buf_out = io.BytesIO()
if fmt == "PNG":
meta = PngImagePlugin.PngInfo()
meta.add_text("Comment", f"reference: {slug_url}")
meta.add_text("X-Canary", ctx.callback_token)
img.save(buf_out, format="PNG", pnginfo=meta)
elif fmt in ("JPEG", "JPG"):
# Pillow encodes JPEG comments via the ``comment`` kwarg.
img.save(buf_out, format="JPEG", comment=slug_url.encode())
else:
# GIF and friends — Pillow doesn't expose comment metadata
# uniformly. Re-encode as-is and skip the metadata embed.
img.save(buf_out, format=fmt or "PNG")
mutated = buf_out.getvalue()
except Exception as e:
raise InstrumenterRejectedError(f"failed to instrument image: {e!s}") from e
return CanaryArtifact(
path=target_path,
content=mutated,
mode=0o644,
mtime_offset=-86400 * 30,
instrumenter=self.name,
notes=[f"image metadata carries {slug_url} (slug={ctx.callback_token})"],
)