honeydoc previously emitted HTML only — operators picking 'Document' out of the dropdown got a .html file dropped at /Documents/ quarterly_report.docx, which any attacker would clock the moment they ran 'file' on it. Two new generators that emit the real artifact format: - honeydoc_docx: stdlib zipfile only. Builds a minimal but valid Office Open XML zip with the same Q3 review body as the HTML flavor and an external-image relationship pointing at the callback URL — same trick the operator-upload DOCX instrumenter uses, fetched on document open by Word and LibreOffice. Reuses _drawing() and _next_rid() from instrumenters/docx.py to keep the body/relationships shape identical between synthesised and instrumented files. - honeydoc_pdf: pikepdf-backed. One-page PDF in the 14 base fonts (Helvetica, no font embedding), realistic body, /OpenAction /URI on the catalog so most viewers fire the callback on document open. Falls back to a clear error if pikepdf is missing so the operator can switch to honeydoc / honeydoc_docx. Default placement paths now reflect each generator's true extension (.html / .docx / .pdf) so the UI suggests something sensible. Both generators surfaced in the New Token modal's generator dropdown.
147 lines
5.2 KiB
Python
147 lines
5.2 KiB
Python
"""Coverage for the synthesised-artifact generators.
|
|
|
|
Each generator MUST be deterministic for a given ``CanaryContext`` —
|
|
the planter relies on that idempotency to re-seed without storing
|
|
the rendered bytes. We assert byte-for-byte stability across two
|
|
calls with the same inputs as well as the obvious "slug appears in
|
|
the artifact" property.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
import pytest
|
|
|
|
from decnet.canary import CanaryContext, get_generator
|
|
from decnet.canary.factory import KNOWN_GENERATORS
|
|
|
|
|
|
def _ctx(**kw) -> CanaryContext:
|
|
defaults = dict(
|
|
callback_token="abcDEF123-test",
|
|
http_base="https://canary.example.test",
|
|
dns_zone="canary.example.test",
|
|
persona="linux",
|
|
)
|
|
defaults.update(kw)
|
|
return CanaryContext(**defaults)
|
|
|
|
|
|
@pytest.mark.parametrize("name", KNOWN_GENERATORS)
|
|
def test_generator_is_deterministic(name: str) -> None:
|
|
g = get_generator(name)
|
|
a = g.generate(_ctx())
|
|
b = g.generate(_ctx())
|
|
assert a.content == b.content, f"{name} not deterministic"
|
|
assert a.generator == name
|
|
assert a.instrumenter is None
|
|
assert a.mode in (0o600, 0o644)
|
|
|
|
|
|
@pytest.mark.parametrize("name", ["git_config", "env_file", "honeydoc"])
|
|
def test_callback_url_embedded(name: str) -> None:
|
|
g = get_generator(name)
|
|
art = g.generate(_ctx(callback_token="slug-XYZ"))
|
|
body = art.content.decode("utf-8")
|
|
assert "slug-XYZ" in body, f"{name} did not embed slug"
|
|
assert "https://canary.example.test" in body
|
|
|
|
|
|
def test_aws_creds_passive_does_not_embed_url() -> None:
|
|
# AWS creds are passive — there's no realistic field to hide a URL
|
|
# in. Asserting the absence prevents a regression where a future
|
|
# change tries to slip the slug into a comment and breaks realism.
|
|
g = get_generator("aws_creds")
|
|
art = g.generate(_ctx(callback_token="slug-XYZ"))
|
|
body = art.content.decode("utf-8")
|
|
assert "https://" not in body
|
|
assert "slug-XYZ" not in body
|
|
# Access key matches the AKIA[A-Z0-9]{16} shape.
|
|
assert re.search(r"AKIA[A-Z0-9]{16}", body)
|
|
|
|
|
|
def test_aws_creds_changes_with_slug() -> None:
|
|
g = get_generator("aws_creds")
|
|
a = g.generate(_ctx(callback_token="slug-A"))
|
|
b = g.generate(_ctx(callback_token="slug-B"))
|
|
assert a.content != b.content
|
|
|
|
|
|
def test_ssh_key_uses_dns_zone_when_available() -> None:
|
|
g = get_generator("ssh_key")
|
|
art = g.generate(_ctx(callback_token="slugZ", dns_zone="canary.test"))
|
|
assert b"slugZ.canary.test" in art.content
|
|
|
|
|
|
def test_ssh_key_falls_back_to_http_host_without_dns() -> None:
|
|
g = get_generator("ssh_key")
|
|
art = g.generate(_ctx(
|
|
http_base="https://example.test", dns_zone="",
|
|
))
|
|
assert b"example.test" in art.content
|
|
|
|
|
|
def test_honeydoc_html_is_valid_ish_html() -> None:
|
|
g = get_generator("honeydoc")
|
|
art = g.generate(_ctx())
|
|
body = art.content.decode("utf-8")
|
|
assert "<!DOCTYPE html>" in body
|
|
assert "<img" in body
|
|
assert "width=\"1\" height=\"1\"" in body
|
|
|
|
|
|
def test_honeydoc_docx_produces_valid_zip_with_callback() -> None:
|
|
import io
|
|
import zipfile
|
|
g = get_generator("honeydoc_docx")
|
|
art = g.generate(_ctx(callback_token="slugDX"))
|
|
assert art.content[:4] == b"PK\x03\x04" # zip magic
|
|
with zipfile.ZipFile(io.BytesIO(art.content), "r") as zf:
|
|
names = set(zf.namelist())
|
|
assert {"[Content_Types].xml", "_rels/.rels", "word/document.xml",
|
|
"word/_rels/document.xml.rels"} <= names
|
|
rels = zf.read("word/_rels/document.xml.rels").decode()
|
|
assert "https://canary.example.test/c/slugDX" in rels
|
|
assert "TargetMode=\"External\"" in rels
|
|
doc = zf.read("word/document.xml").decode()
|
|
assert "Q3 Operations Review" in doc
|
|
assert "<w:drawing>" in doc
|
|
|
|
|
|
def test_honeydoc_pdf_produces_valid_pdf_with_openaction() -> None:
|
|
pikepdf = pytest.importorskip("pikepdf")
|
|
g = get_generator("honeydoc_pdf")
|
|
art = g.generate(_ctx(callback_token="slugPDF"))
|
|
assert art.content[:5] == b"%PDF-"
|
|
# Re-open and confirm OpenAction URI round-trips.
|
|
import io
|
|
with pikepdf.open(io.BytesIO(art.content)) as pdf:
|
|
action = pdf.Root["/OpenAction"]
|
|
assert str(action["/S"]) == "/URI"
|
|
assert str(action["/URI"]) == "https://canary.example.test/c/slugPDF"
|
|
|
|
|
|
def test_git_config_remote_url_shape() -> None:
|
|
g = get_generator("git_config")
|
|
art = g.generate(_ctx(callback_token="slug42"))
|
|
body = art.content.decode("utf-8")
|
|
assert "[remote \"origin\"]" in body
|
|
assert "https://canary.example.test/c/slug42/repo.git" in body
|
|
|
|
|
|
def test_env_file_carries_two_callback_fields() -> None:
|
|
g = get_generator("env_file")
|
|
art = g.generate(_ctx(callback_token="slugEnv"))
|
|
body = art.content.decode("utf-8")
|
|
assert "API_BASE_URL=https://canary.example.test/c/slugEnv" in body
|
|
assert "WEBHOOK_NOTIFY_URL=https://canary.example.test/c/slugEnv/webhook" in body
|
|
|
|
|
|
def test_artifacts_carry_notes() -> None:
|
|
# Notes drive the API ``preview`` endpoint so operators can sanity-
|
|
# check what we did before the file lands. Empty notes would mean
|
|
# the operator is staring at opaque bytes.
|
|
for name in KNOWN_GENERATORS:
|
|
art = get_generator(name).generate(_ctx())
|
|
assert art.notes, f"{name} produced no notes"
|