feat(canary): fingerprint_html + fingerprint_svg generators

Two new synthesised-artifact generators that bake the obfuscated
fingerprint payload into plausible-looking decoy files:

* fingerprint_html — a mundane "Internal Asset Directory" page with a
  small table of fake hosts; the obfuscated payload is inlined at the
  bottom of <body>. Visible content (row pool slice, sync timestamp)
  also varies per mint via SHA-256-derived stable ints, so two
  extracted canaries don't diff to zero even on the rendered surface.
* fingerprint_svg — standalone SVG with an embedded <script> CDATA
  block. SVG <script> only fires for top-level loads / <object> /
  <iframe>; <img>-referenced renders are safely inert.

Both derive the mint UUID via uuid.uuid5 from the callback token, so
re-mints are byte-identical (preserving the generator determinism
contract) AND the same token produces the same mint UUID across HTML
and SVG variants — the worker can correlate beacons across artifact
shapes.

Wired into the factory + KNOWN_GENERATORS, default placement paths
under ~/Documents/asset_directory.html and ~/Documents/network_topology.svg
for both linux and windows personas. Tests cover determinism, per-token
divergence, structural validity (DOCTYPE/SVG headers), and that the
beacon URL stays inside the obfuscated string array (not in plaintext).
The two new entries skip in test_generators.py when Node toolchain is
absent so bare CI checkouts still pass.
This commit is contained in:
2026-04-29 16:22:18 -04:00
parent 12cd7ad9cb
commit f64e78f78c
6 changed files with 363 additions and 0 deletions

View File

@@ -21,6 +21,8 @@ KNOWN_GENERATORS: Tuple[str, ...] = (
"honeydoc_docx", "honeydoc_docx",
"honeydoc_pdf", "honeydoc_pdf",
"mysql_dump", "mysql_dump",
"fingerprint_html",
"fingerprint_svg",
) )
KNOWN_INSTRUMENTERS: Tuple[str, ...] = ( KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
@@ -64,6 +66,16 @@ def get_generator(name: str) -> CanaryGenerator:
if name == "mysql_dump": if name == "mysql_dump":
from decnet.canary.generators.mysql_dump import MySQLDumpGenerator from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
return MySQLDumpGenerator() return MySQLDumpGenerator()
if name == "fingerprint_html":
from decnet.canary.generators.fingerprint_html import (
FingerprintHtmlGenerator,
)
return FingerprintHtmlGenerator()
if name == "fingerprint_svg":
from decnet.canary.generators.fingerprint_svg import (
FingerprintSvgGenerator,
)
return FingerprintSvgGenerator()
raise ValueError( raise ValueError(
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}" f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
) )

View File

@@ -0,0 +1,137 @@
"""HTML fingerprint canary — plausible-looking page with an obfuscated
browser-fingerprinting payload inlined at the bottom of ``<body>``.
The visible content is a deliberately mundane "internal directory"
table — the kind of file a curious attacker pulls off a decky's
filesystem and opens locally to triage. When the file is opened in
*any* network-connected browser the obfuscated payload runs and beacons
to ``/c/<callback_token>``: first a bare-open pixel, then a chunked
fingerprint dump (canvas, audio, fonts, WebGL, WebRTC local IPs,
timing jitter, permissions, composite identity hash).
Determinism: the mint UUID is derived from the callback token via
:func:`uuid.uuid5` so the same ``ctx`` always produces byte-identical
output, satisfying the generator contract in :mod:`decnet.canary.base`.
The obfuscator's seed and polymorphic config bits are likewise
callback-token-derived (see :mod:`decnet.canary.obfuscator`).
"""
from __future__ import annotations
import hashlib
import uuid
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
from decnet.canary.obfuscator import render_fingerprint_js
_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
def _mint_uuid_for(callback_token: str) -> str:
return str(uuid.uuid5(_MINT_NAMESPACE, callback_token))
def _stable_int(callback_token: str, salt: str = "") -> int:
"""Deterministic non-negative int derived from the callback token.
``builtins.hash`` is salted per-process — useless for a generator
that must be byte-identical across runs. SHA-256 prefix is
overkill but free.
"""
h = hashlib.sha256((callback_token + "|" + salt).encode("utf-8")).digest()
return int.from_bytes(h[:4], "big")
_PAGE_TEMPLATE = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Internal Asset Directory</title>
<style>
body{{font-family:Segoe UI,Arial,sans-serif;background:#fafafa;color:#222;
margin:24px;font-size:13px}}
h1{{font-size:18px;margin:0 0 4px 0}}
.sub{{color:#777;font-size:11px;margin-bottom:18px}}
table{{border-collapse:collapse;width:100%;background:#fff;
box-shadow:0 1px 2px rgba(0,0,0,.05)}}
th,td{{padding:6px 10px;border-bottom:1px solid #eee;text-align:left}}
th{{background:#f4f4f4;font-weight:600;font-size:11px;
text-transform:uppercase;letter-spacing:.5px;color:#555}}
tr:hover td{{background:#fafbff}}
.foot{{margin-top:16px;color:#999;font-size:11px}}
</style>
</head>
<body>
<h1>Internal Asset Directory</h1>
<div class="sub">last sync: {sync_label} · {row_count} entries · CONFIDENTIAL</div>
<table>
<tr><th>Hostname</th><th>Owner</th><th>Role</th><th>VLAN</th><th>Notes</th></tr>
{rows}
</table>
<div class="foot">page generated by directory-sync v2.4.1 — do not redistribute</div>
<script>{payload}</script>
</body>
</html>
"""
_ROW_POOL = (
("ny-app-01.corp.local", "k.tanaka", "app server", "vlan20", "primary"),
("ny-db-01.corp.local", "ops", "postgres primary", "vlan30", "backup nightly"),
("ny-build-02.corp.local", "ci-bot", "jenkins agent", "vlan40", ""),
("sf-vpn-01.corp.local", "netsec", "wireguard endpoint", "vlan10", "external"),
("ldn-mail-03.corp.local", "j.weber", "exchange edge", "vlan50", ""),
("hk-cache-01.corp.local", "ops", "redis replica", "vlan30", "lag <1s"),
("br-dev-04.corp.local", "m.silva", "dev sandbox", "vlan60", "ephemeral"),
("eu-bastion-02.corp.local", "secops", "ssh jump host", "vlan10", "mfa required"),
("us-archive-01.corp.local", "compliance", "log archive", "vlan70", "retain 7y"),
)
def _build_rows(callback_token: str) -> tuple[str, int]:
pick = _stable_int(callback_token, "pick") % len(_ROW_POOL)
take = 5 + (_stable_int(callback_token, "take") % 4)
selected = [_ROW_POOL[(pick + i) % len(_ROW_POOL)] for i in range(take)]
cells = "\n".join(
"<tr>" + "".join(f"<td>{c}</td>" for c in row) + "</tr>"
for row in selected
)
return cells, len(selected)
def _sync_label(callback_token: str) -> str:
day = _stable_int(callback_token, "day") % 28 + 1
hour = _stable_int(callback_token, "hour") % 24
return f"2026-04-{day:02d} {hour:02d}:14 UTC"
class FingerprintHtmlGenerator(CanaryGenerator):
"""Synthesise an HTML page that fingerprints the browser opening it."""
name = "fingerprint_html"
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
mint_uuid = _mint_uuid_for(ctx.callback_token)
payload = render_fingerprint_js(
callback_token=ctx.callback_token,
http_base=ctx.http_base,
mint_uuid=mint_uuid,
)
rows, row_count = _build_rows(ctx.callback_token)
body = _PAGE_TEMPLATE.format(
sync_label=_sync_label(ctx.callback_token),
row_count=row_count,
rows=rows,
payload=payload,
)
beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
return CanaryArtifact(
path="",
content=body.encode("utf-8"),
mode=0o644,
mtime_offset=-86400 * 14,
generator=self.name,
notes=[
f"obfuscated fingerprinter beacons={beacon}",
f"mint_uuid={mint_uuid}",
],
)

View File

@@ -0,0 +1,85 @@
"""SVG fingerprint canary — standalone SVG with an embedded ``<script>``
that runs the obfuscated fingerprinter when the file is opened directly
in a browser.
SVG ``<script>`` only fires when the SVG is loaded as a top-level
document (or via ``<object>``/``<iframe>``); it's *blocked* when the
SVG is referenced from another page's ``<img>``. That's the right
posture for canary use: an attacker browsing the decky filesystem and
double-clicking a stray ``network_diagram.svg`` triggers it; rendering
inside a sandboxed CMS preview does not.
Same determinism guarantees as :mod:`fingerprint_html`.
"""
from __future__ import annotations
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
from decnet.canary.generators.fingerprint_html import _mint_uuid_for, _stable_int
from decnet.canary.obfuscator import render_fingerprint_js
_DIAGRAM_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 600 360" width="600" height="360">
<style>
.box{{fill:#f7f9fb;stroke:#7a93ad;stroke-width:1.2}}
.lbl{{font:12px Segoe UI,Arial,sans-serif;fill:#2a3a4a}}
.edge{{stroke:#7a93ad;stroke-width:1.2;fill:none}}
.title{{font:bold 14px Segoe UI,Arial,sans-serif;fill:#1a2a3a}}
.cap{{font:10px Segoe UI,Arial,sans-serif;fill:#6a7a8a}}
</style>
<text class="title" x="20" y="28">Network Topology — {region} segment</text>
<text class="cap" x="20" y="44">draft v{ver} · last reviewed {review}</text>
<rect class="box" x="40" y="80" width="120" height="50" rx="4"/>
<text class="lbl" x="100" y="110" text-anchor="middle">edge gw</text>
<rect class="box" x="240" y="80" width="120" height="50" rx="4"/>
<text class="lbl" x="300" y="110" text-anchor="middle">core sw</text>
<rect class="box" x="440" y="80" width="120" height="50" rx="4"/>
<text class="lbl" x="500" y="110" text-anchor="middle">app cluster</text>
<rect class="box" x="240" y="220" width="120" height="50" rx="4"/>
<text class="lbl" x="300" y="250" text-anchor="middle">db tier</text>
<path class="edge" d="M160 105 L240 105"/>
<path class="edge" d="M360 105 L440 105"/>
<path class="edge" d="M300 130 L300 220"/>
<script type="application/ecmascript"><![CDATA[
{payload}
]]></script>
</svg>
"""
_REGIONS = ("us-east", "eu-central", "ap-south", "us-west", "sa-east")
class FingerprintSvgGenerator(CanaryGenerator):
"""Synthesise an SVG that fingerprints the browser opening it."""
name = "fingerprint_svg"
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
mint_uuid = _mint_uuid_for(ctx.callback_token)
payload = render_fingerprint_js(
callback_token=ctx.callback_token,
http_base=ctx.http_base,
mint_uuid=mint_uuid,
)
region = _REGIONS[_stable_int(ctx.callback_token, "reg") % len(_REGIONS)]
ver = 1 + (_stable_int(ctx.callback_token, "ver") % 6)
day = _stable_int(ctx.callback_token, "day") % 28 + 1
body = _DIAGRAM_TEMPLATE.format(
region=region,
ver=ver,
review=f"2026-03-{day:02d}",
payload=payload,
)
beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
return CanaryArtifact(
path="",
content=body.encode("utf-8"),
mode=0o644,
mtime_offset=-86400 * 30,
generator=self.name,
notes=[
f"obfuscated fingerprinter beacons={beacon}",
f"mint_uuid={mint_uuid}",
],
)

View File

@@ -28,6 +28,8 @@ _LINUX_DEFAULTS: dict[str, str] = {
"honeydoc": "/home/{user}/Documents/quarterly_report.html", "honeydoc": "/home/{user}/Documents/quarterly_report.html",
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx", "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf", "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
"fingerprint_html": "/home/{user}/Documents/asset_directory.html",
"fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
} }
_WINDOWS_DEFAULTS: dict[str, str] = { _WINDOWS_DEFAULTS: dict[str, str] = {
@@ -38,6 +40,8 @@ _WINDOWS_DEFAULTS: dict[str, str] = {
"honeydoc": "/home/{user}/Documents/quarterly_report.html", "honeydoc": "/home/{user}/Documents/quarterly_report.html",
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx", "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf", "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
"fingerprint_html": "/home/{user}/Documents/asset_directory.html",
"fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
} }

View File

@@ -0,0 +1,103 @@
"""Tests for the HTML/SVG fingerprint canary generators.
Skipped when the Node toolchain (or vendored javascript-obfuscator) is
not installed, mirroring :mod:`tests.canary.test_obfuscator`.
"""
from __future__ import annotations
import shutil
from pathlib import Path
import pytest
from decnet.canary import CanaryContext, get_generator
def _toolchain_ready() -> bool:
if shutil.which("node") is None:
return False
canary_dir = Path(__file__).resolve().parents[2] / "decnet" / "canary"
return (canary_dir / "node_modules" / "javascript-obfuscator").is_dir()
pytestmark = pytest.mark.skipif(
not _toolchain_ready(),
reason="node + javascript-obfuscator not installed under decnet/canary",
)
def _ctx(callback_token: str = "fp-tok-123") -> CanaryContext:
return CanaryContext(
callback_token=callback_token,
http_base="https://canary.example.test",
dns_zone="canary.example.test",
persona="linux",
)
def test_fingerprint_html_renders_full_page() -> None:
art = get_generator("fingerprint_html").generate(_ctx())
body = art.content.decode("utf-8")
assert body.startswith("<!DOCTYPE html>")
assert "<script>" in body and "</script>" in body
assert "Internal Asset Directory" in body
assert "<table>" in body
# Beacon URL must NOT appear in plaintext — it's inside the
# obfuscated string array. (Sanity: this is the whole point of
# obfuscating the payload.)
assert "/c/fp-tok-123" not in body
# Visible content shouldn't leak the slug either.
assert "fp-tok-123" not in body
assert art.mode == 0o644
assert art.generator == "fingerprint_html"
def test_fingerprint_html_is_deterministic_per_token() -> None:
a = get_generator("fingerprint_html").generate(_ctx("tokA"))
b = get_generator("fingerprint_html").generate(_ctx("tokA"))
assert a.content == b.content
def test_fingerprint_html_differs_across_tokens() -> None:
a = get_generator("fingerprint_html").generate(_ctx("tokA"))
b = get_generator("fingerprint_html").generate(_ctx("tokB"))
assert a.content != b.content
def test_fingerprint_html_notes_carry_mint_uuid_and_beacon() -> None:
art = get_generator("fingerprint_html").generate(_ctx("tok-notes"))
joined = " | ".join(art.notes)
assert "mint_uuid=" in joined
assert "https://canary.example.test/c/tok-notes" in joined
def test_fingerprint_svg_renders_valid_svg_with_script() -> None:
art = get_generator("fingerprint_svg").generate(_ctx())
body = art.content.decode("utf-8")
assert body.startswith("<?xml version=\"1.0\"")
assert "<svg" in body and "</svg>" in body
assert "<script" in body and "<![CDATA[" in body
assert art.mode == 0o644
assert art.generator == "fingerprint_svg"
def test_fingerprint_svg_is_deterministic_per_token() -> None:
a = get_generator("fingerprint_svg").generate(_ctx("svgTokA"))
b = get_generator("fingerprint_svg").generate(_ctx("svgTokA"))
assert a.content == b.content
def test_fingerprint_svg_differs_across_tokens() -> None:
a = get_generator("fingerprint_svg").generate(_ctx("svgTokA"))
b = get_generator("fingerprint_svg").generate(_ctx("svgTokB"))
assert a.content != b.content
def test_mint_uuid_stable_across_html_and_svg() -> None:
# Same callback token → same mint UUID across both generators, so
# the worker can correlate beacons regardless of artifact shape.
html = get_generator("fingerprint_html").generate(_ctx("shared-tok"))
svg = get_generator("fingerprint_svg").generate(_ctx("shared-tok"))
html_uuid = next(n for n in html.notes if n.startswith("mint_uuid="))
svg_uuid = next(n for n in svg.notes if n.startswith("mint_uuid="))
assert html_uuid == svg_uuid

View File

@@ -9,12 +9,31 @@ the artifact" property.
from __future__ import annotations from __future__ import annotations
import re import re
import shutil
from pathlib import Path
import pytest import pytest
from decnet.canary import CanaryContext, get_generator from decnet.canary import CanaryContext, get_generator
from decnet.canary.factory import KNOWN_GENERATORS from decnet.canary.factory import KNOWN_GENERATORS
# fingerprint_* generators shell out to javascript-obfuscator via Node.
# Skip those parametrized cases when the toolchain isn't installed so a
# bare CI checkout doesn't fail before `npm install` runs.
_NEEDS_NODE = {"fingerprint_html", "fingerprint_svg"}
def _node_toolchain_ready() -> bool:
if shutil.which("node") is None:
return False
canary_dir = Path(__file__).resolve().parents[2] / "decnet" / "canary"
return (canary_dir / "node_modules" / "javascript-obfuscator").is_dir()
def _maybe_skip(name: str) -> None:
if name in _NEEDS_NODE and not _node_toolchain_ready():
pytest.skip(f"{name} requires node + javascript-obfuscator")
def _ctx(**kw) -> CanaryContext: def _ctx(**kw) -> CanaryContext:
defaults = dict( defaults = dict(
@@ -29,6 +48,7 @@ def _ctx(**kw) -> CanaryContext:
@pytest.mark.parametrize("name", KNOWN_GENERATORS) @pytest.mark.parametrize("name", KNOWN_GENERATORS)
def test_generator_is_deterministic(name: str) -> None: def test_generator_is_deterministic(name: str) -> None:
_maybe_skip(name)
g = get_generator(name) g = get_generator(name)
a = g.generate(_ctx()) a = g.generate(_ctx())
b = g.generate(_ctx()) b = g.generate(_ctx())
@@ -184,5 +204,7 @@ def test_artifacts_carry_notes() -> None:
# check what we did before the file lands. Empty notes would mean # check what we did before the file lands. Empty notes would mean
# the operator is staring at opaque bytes. # the operator is staring at opaque bytes.
for name in KNOWN_GENERATORS: for name in KNOWN_GENERATORS:
if name in _NEEDS_NODE and not _node_toolchain_ready():
continue
art = get_generator(name).generate(_ctx()) art = get_generator(name).generate(_ctx())
assert art.notes, f"{name} produced no notes" assert art.notes, f"{name} produced no notes"