merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
7
decnet/canary/generators/__init__.py
Normal file
7
decnet/canary/generators/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Built-in canary generators (synthesised fake artifacts).
|
||||
|
||||
Concrete classes live in sibling modules and are imported lazily by
|
||||
:func:`decnet.canary.factory.get_generator` to keep the import-time
|
||||
cost of :mod:`decnet.canary` cheap for callers that only need the
|
||||
ABCs.
|
||||
"""
|
||||
86
decnet/canary/generators/aws_creds.py
Normal file
86
decnet/canary/generators/aws_creds.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""Fake ``~/.aws/credentials`` block (passive bait).
|
||||
|
||||
This is the **passive** variant — no callback wiring. An attacker
|
||||
who exfils these keys can't trip a detection unless we run a real
|
||||
AWS account with a deny-all CloudTrail listener (post-v1). The
|
||||
realism is the point: the file looks like a routinely used credentials
|
||||
file, so the rest of the decky's persona feels lived-in.
|
||||
|
||||
If the operator picks ``kind="aws_passive"`` we accept that no slug
|
||||
will be embedded. If they pick ``kind="http"`` or ``kind="dns"`` for
|
||||
this generator, the API will reject the combination with a 400 — AWS
|
||||
keys have no plausible field where a URL or hostname survives a
|
||||
``grep -E '[A-Z0-9]{20}'`` smell test.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from secrets import token_urlsafe
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
# Stable AWS-style key body derived from the slug. Keeping the
|
||||
# generator deterministic (per-slug) means re-seeding produces the
|
||||
# same bytes — the planter is naturally idempotent and an operator
|
||||
# who runs ``decnet canary verify`` can re-derive the expected file
|
||||
# without touching the DB.
|
||||
|
||||
def _fake_access_key(seed: str) -> str:
|
||||
# AWS access keys are 20 chars, uppercase alphanum, AKIA prefix.
|
||||
body = hashlib.sha256(seed.encode()).hexdigest().upper()
|
||||
return "AKIA" + body[:16]
|
||||
|
||||
|
||||
def _fake_secret_key(seed: str) -> str:
|
||||
# AWS secret keys are 40 chars, mixed-case base64-ish. We use
|
||||
# base64-safe characters from token_urlsafe seeded by a SHA-256
|
||||
# of the seed so the output is stable per slug.
|
||||
h = hashlib.sha256(("secret:" + seed).encode()).digest()
|
||||
# Reuse token_urlsafe for the alphabet but pad to 40 chars from
|
||||
# the deterministic bytes so we don't depend on os.urandom.
|
||||
import base64
|
||||
return base64.b64encode(h)[:40].decode()
|
||||
|
||||
|
||||
class AWSCredsGenerator(CanaryGenerator):
|
||||
name = "aws_creds"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
seed = ctx.callback_token
|
||||
access = _fake_access_key(seed)
|
||||
secret = _fake_secret_key(seed)
|
||||
body = (
|
||||
"[default]\n"
|
||||
f"aws_access_key_id = {access}\n"
|
||||
f"aws_secret_access_key = {secret}\n"
|
||||
"region = us-east-1\n"
|
||||
"\n"
|
||||
"[prod]\n"
|
||||
f"aws_access_key_id = {_fake_access_key('prod-' + seed)}\n"
|
||||
f"aws_secret_access_key = {_fake_secret_key('prod-' + seed)}\n"
|
||||
"region = us-west-2\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="", # caller (planter) fills this from CanaryToken.placement_path
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 14, # 2 weeks ago — looks lived-in
|
||||
generator=self.name,
|
||||
notes=[
|
||||
"fake AWS keys; no callback embedded — passive bait only",
|
||||
f"derived deterministically from slug={seed}",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
# Re-exported so the slug helper is reusable from the
|
||||
# instrumenters/passthrough module without an internal import path.
|
||||
__all__ = ["AWSCredsGenerator", "_fake_access_key", "_fake_secret_key"]
|
||||
|
||||
|
||||
# Imports at the bottom keep the public dataclasses on top — pylint
|
||||
# doesn't run on this repo, but tests do, and putting ``token_urlsafe``
|
||||
# in a public symbol confuses readers. Suppress the unused warning by
|
||||
# referencing it once.
|
||||
_ = token_urlsafe
|
||||
56
decnet/canary/generators/env_file.py
Normal file
56
decnet/canary/generators/env_file.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""Fake ``.env`` with embedded callback URLs.
|
||||
|
||||
Modern web stacks read environment variables for everything from
|
||||
database DSNs to webhook URLs, so dropping a few realistic-looking
|
||||
``KEY=value`` pairs alongside the canary URL is unremarkable. The
|
||||
slug appears in two fields:
|
||||
|
||||
* ``API_BASE_URL`` — the obvious one; an attacker scripting against
|
||||
the credentials hits the worker on first invocation.
|
||||
* ``WEBHOOK_NOTIFY_URL`` — secondary, in case the attacker greps for
|
||||
``WEBHOOK`` and pivots there.
|
||||
|
||||
Other fields (``DB_PASSWORD``, ``REDIS_URL``, ``JWT_SECRET``) are
|
||||
plausible but inert — they're realism filler, not detection
|
||||
mechanisms.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
def _stable_token(seed: str, prefix: str = "") -> str:
|
||||
h = hashlib.sha256((prefix + seed).encode()).hexdigest()
|
||||
return h[:32]
|
||||
|
||||
|
||||
class EnvFileGenerator(CanaryGenerator):
|
||||
name = "env_file"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
base = ctx.http_base.rstrip("/")
|
||||
slug = ctx.callback_token
|
||||
api_url = f"{base}/c/{slug}"
|
||||
body = (
|
||||
"# Production environment — DO NOT COMMIT\n"
|
||||
f"API_BASE_URL={api_url}\n"
|
||||
f"WEBHOOK_NOTIFY_URL={api_url}/webhook\n"
|
||||
f"DB_PASSWORD={_stable_token(slug, 'db:')}\n"
|
||||
f"REDIS_URL=redis://:{_stable_token(slug, 'redis:')[:16]}@redis.internal:6379/0\n"
|
||||
f"JWT_SECRET={_stable_token(slug, 'jwt:')}\n"
|
||||
"LOG_LEVEL=info\n"
|
||||
"ENVIRONMENT=production\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 7, # last edited a week ago
|
||||
generator=self.name,
|
||||
notes=[
|
||||
f"API_BASE_URL embeds {api_url}",
|
||||
f"WEBHOOK_NOTIFY_URL embeds {api_url}/webhook",
|
||||
],
|
||||
)
|
||||
53
decnet/canary/generators/git_config.py
Normal file
53
decnet/canary/generators/git_config.py
Normal file
@@ -0,0 +1,53 @@
|
||||
"""Fake ``.git/config`` with an attacker-bait remote URL.
|
||||
|
||||
The ``[remote "origin"]`` ``url`` field is the natural place to embed
|
||||
an HTTP-callback URL: it's normal for git remotes to be HTTPS, the
|
||||
URL is read by every git command an attacker runs (``git pull``,
|
||||
``git fetch``, ``git remote -v``), and the slug fits naturally as
|
||||
part of a path.
|
||||
|
||||
The generator emits a plausible private-mirror remote (``git.<org>``
|
||||
or the canary host's hostname) so an attacker doesn't immediately
|
||||
recognise it as a honeypot. The slug ends up in the URL path:
|
||||
|
||||
[remote "origin"]
|
||||
url = https://canary.example.test/c/<slug>/repo.git
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
class GitConfigGenerator(CanaryGenerator):
|
||||
name = "git_config"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
# Strip trailing slash defensively — operator may have
|
||||
# configured DECNET_CANARY_HTTP_BASE either way.
|
||||
base = ctx.http_base.rstrip("/")
|
||||
slug = ctx.callback_token
|
||||
# The /c/<slug>/repo.git suffix gives us a realistic-looking
|
||||
# path the worker can route on a single ``startswith("/c/")``
|
||||
# check, while still surviving a quick grep for the slug.
|
||||
url = f"{base}/c/{slug}/repo.git"
|
||||
body = (
|
||||
"[core]\n"
|
||||
"\trepositoryformatversion = 0\n"
|
||||
"\tfilemode = true\n"
|
||||
"\tbare = false\n"
|
||||
"\tlogallrefupdates = true\n"
|
||||
"[remote \"origin\"]\n"
|
||||
f"\turl = {url}\n"
|
||||
"\tfetch = +refs/heads/*:refs/remotes/origin/*\n"
|
||||
"[branch \"main\"]\n"
|
||||
"\tremote = origin\n"
|
||||
"\tmerge = refs/heads/main\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 30, # checked out a month ago
|
||||
generator=self.name,
|
||||
notes=[f"git remote 'origin' embeds {url}"],
|
||||
)
|
||||
61
decnet/canary/generators/honeydoc.py
Normal file
61
decnet/canary/generators/honeydoc.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""Built-in honeydoc — a minimal HTML "report" with a tracking pixel.
|
||||
|
||||
This is the *fallback* honeydoc used when the operator hasn't
|
||||
uploaded a real document. The HTML instrumenter handles operator
|
||||
uploads via :mod:`decnet.canary.instrumenters.html`; this generator
|
||||
exists so the deploy-time baseline can plant *something* convincing
|
||||
without first prompting the operator to drop a file.
|
||||
|
||||
The realism here is intentionally modest: a Documents-folder HTML
|
||||
page with internal-looking content and a 1×1 remote image at the
|
||||
bottom whose ``src`` is the canary callback URL. Most desktop
|
||||
HTML renderers fetch the image as soon as the file is opened in a
|
||||
browser preview, so opening the doc trips the callback.
|
||||
|
||||
Operators who want a richer artifact should upload their own DOCX
|
||||
or PDF; the corresponding instrumenter embeds the same callback in
|
||||
the appropriate format.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
class HoneydocGenerator(CanaryGenerator):
|
||||
name = "honeydoc"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
base = ctx.http_base.rstrip("/")
|
||||
slug = ctx.callback_token
|
||||
pixel_url = f"{base}/c/{slug}"
|
||||
body = (
|
||||
"<!DOCTYPE html>\n"
|
||||
"<html lang=\"en\">\n"
|
||||
"<head>\n"
|
||||
"<meta charset=\"utf-8\">\n"
|
||||
"<title>Q3 Operations Review — DRAFT</title>\n"
|
||||
"</head>\n"
|
||||
"<body>\n"
|
||||
"<h1>Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)</h1>\n"
|
||||
"<p>Forecast and remediation timeline below. Numbers are\n"
|
||||
"preliminary and subject to revision before the all-hands.</p>\n"
|
||||
"<table>\n"
|
||||
"<tr><th>Region</th><th>Incidents</th><th>MTTR (h)</th></tr>\n"
|
||||
"<tr><td>us-east</td><td>14</td><td>3.2</td></tr>\n"
|
||||
"<tr><td>us-west</td><td>9</td><td>4.7</td></tr>\n"
|
||||
"<tr><td>eu-central</td><td>22</td><td>2.1</td></tr>\n"
|
||||
"</table>\n"
|
||||
"<p>Internal contact: <a href=\"mailto:secops@internal\">"
|
||||
"secops@internal</a></p>\n"
|
||||
f"<img src=\"{pixel_url}\" width=\"1\" height=\"1\" alt=\"\">\n"
|
||||
"</body>\n"
|
||||
"</html>\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o644, # docs are typically world-readable
|
||||
mtime_offset=-86400 * 21, # 3 weeks ago
|
||||
generator=self.name,
|
||||
notes=[f"tracking pixel src={pixel_url}"],
|
||||
)
|
||||
133
decnet/canary/generators/honeydoc_docx.py
Normal file
133
decnet/canary/generators/honeydoc_docx.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""Real-DOCX honeydoc generator.
|
||||
|
||||
Synthesises a minimal but structurally valid DOCX from scratch via
|
||||
stdlib :mod:`zipfile`, then uses the same external-image relationship
|
||||
trick that powers :mod:`decnet.canary.instrumenters.docx` to embed
|
||||
the callback URL. No python-docx dependency.
|
||||
|
||||
The output opens cleanly in Word / LibreOffice; both fetch the
|
||||
external image relationship on document load.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import zipfile
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
from decnet.canary.instrumenters.docx import _drawing, _next_rid
|
||||
|
||||
|
||||
_CONTENT_TYPES = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
|
||||
'<Default Extension="xml" ContentType="application/xml"/>'
|
||||
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
|
||||
'<Override PartName="/word/document.xml" '
|
||||
'ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
|
||||
'</Types>'
|
||||
).encode()
|
||||
|
||||
_PACKAGE_RELS = (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
'<Relationship Id="rId1" '
|
||||
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" '
|
||||
'Target="word/document.xml"/>'
|
||||
'</Relationships>'
|
||||
).encode()
|
||||
|
||||
_BODY_PARAGRAPHS = (
|
||||
"Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)",
|
||||
"",
|
||||
"Forecast and remediation timeline below. Numbers are preliminary "
|
||||
"and subject to revision before the all-hands.",
|
||||
"",
|
||||
"Region Incidents MTTR (h)",
|
||||
"us-east 14 3.2",
|
||||
"us-west 9 4.7",
|
||||
"eu-central 22 2.1",
|
||||
"",
|
||||
"Internal contact: secops@internal",
|
||||
)
|
||||
|
||||
|
||||
def _document_xml(rid_with_drawing: str | None = None) -> bytes:
|
||||
"""Build the body XML.
|
||||
|
||||
``rid_with_drawing`` is the rId of the external image relationship;
|
||||
when set, we append the same ``<w:drawing>`` element that the DOCX
|
||||
instrumenter inserts so the body references the external resource.
|
||||
"""
|
||||
paragraphs = []
|
||||
for line in _BODY_PARAGRAPHS:
|
||||
if line:
|
||||
paragraphs.append(
|
||||
"<w:p><w:r><w:t xml:space=\"preserve\">"
|
||||
+ _xml_escape(line)
|
||||
+ "</w:t></w:r></w:p>"
|
||||
)
|
||||
else:
|
||||
paragraphs.append("<w:p/>")
|
||||
body = "".join(paragraphs)
|
||||
drawing = _drawing(rid_with_drawing).decode() if rid_with_drawing else ""
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
|
||||
f'<w:body>{body}{drawing}</w:body>'
|
||||
'</w:document>'
|
||||
).encode()
|
||||
|
||||
|
||||
def _xml_escape(s: str) -> str:
|
||||
return (
|
||||
s.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
)
|
||||
|
||||
|
||||
def _document_rels(rid: str, url: str) -> bytes:
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
f'<Relationship Id="{rid}" '
|
||||
f'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" '
|
||||
f'Target="{url}" TargetMode="External"/>'
|
||||
'</Relationships>'
|
||||
).encode()
|
||||
|
||||
|
||||
class HoneydocDocxGenerator(CanaryGenerator):
|
||||
name = "honeydoc_docx"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
# Pick a stable rId — there's only one relationship in the
|
||||
# synthesised file, so any unused id works. Reuse the
|
||||
# instrumenter's allocator against the bare relationships
|
||||
# skeleton for parity with operator-uploaded DOCX flow.
|
||||
skeleton = (
|
||||
b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
b'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
||||
b'</Relationships>'
|
||||
)
|
||||
rid = _next_rid(skeleton)
|
||||
|
||||
out = io.BytesIO()
|
||||
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("[Content_Types].xml", _CONTENT_TYPES)
|
||||
zf.writestr("_rels/.rels", _PACKAGE_RELS)
|
||||
zf.writestr("word/document.xml", _document_xml(rid))
|
||||
zf.writestr("word/_rels/document.xml.rels", _document_rels(rid, url))
|
||||
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=out.getvalue(),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 21,
|
||||
generator=self.name,
|
||||
notes=[
|
||||
"synthesised DOCX with realistic Q3 review body",
|
||||
f"external-image relationship {rid} -> {url}",
|
||||
],
|
||||
)
|
||||
127
decnet/canary/generators/honeydoc_pdf.py
Normal file
127
decnet/canary/generators/honeydoc_pdf.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""Real-PDF honeydoc generator (uses :mod:`pikepdf`).
|
||||
|
||||
Builds a one-page PDF with the same Q3-review body as the HTML/DOCX
|
||||
flavors and installs an ``/OpenAction`` ``/URI`` action on the
|
||||
catalog so most viewers fire the callback the moment the document
|
||||
opens.
|
||||
|
||||
Pikepdf is now a hard dependency for this generator (the operator
|
||||
installed it explicitly so we can use it). We still surface a
|
||||
clear :class:`InstrumenterRejectedError` when imports fail, so a
|
||||
deployment without pikepdf can fall back to the DOCX or HTML
|
||||
generators rather than crashing the API.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryGenerator,
|
||||
InstrumenterRejectedError,
|
||||
)
|
||||
|
||||
|
||||
_BODY_LINES = (
|
||||
("Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)", 14),
|
||||
("", 12),
|
||||
("Forecast and remediation timeline below.", 11),
|
||||
("Numbers are preliminary, subject to revision.", 11),
|
||||
("", 12),
|
||||
("Region Incidents MTTR (h)", 11),
|
||||
("us-east 14 3.2", 11),
|
||||
("us-west 9 4.7", 11),
|
||||
("eu-central 22 2.1", 11),
|
||||
("", 12),
|
||||
("Internal contact: secops@internal", 11),
|
||||
)
|
||||
|
||||
|
||||
class HoneydocPdfGenerator(CanaryGenerator):
|
||||
name = "honeydoc_pdf"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
try:
|
||||
from pikepdf import Pdf, Name, Dictionary, String # type: ignore[import-not-found]
|
||||
except ImportError as e:
|
||||
raise InstrumenterRejectedError(
|
||||
"honeydoc_pdf requires pikepdf; install it (`pip install "
|
||||
"pikepdf`) or pick honeydoc / honeydoc_docx instead."
|
||||
) from e
|
||||
|
||||
url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||
|
||||
pdf = Pdf.new()
|
||||
# Helvetica is one of the 14 PDF base fonts — every viewer ships
|
||||
# it, so no font embedding is required.
|
||||
font = pdf.make_indirect(Dictionary(
|
||||
Type=Name("/Font"),
|
||||
Subtype=Name("/Type1"),
|
||||
BaseFont=Name("/Helvetica"),
|
||||
))
|
||||
|
||||
# Build a single content stream that writes each body line at a
|
||||
# decreasing y-coordinate. PDF coordinates start at the bottom-
|
||||
# left (US Letter = 612 x 792 points); we lay out lines roughly
|
||||
# 18 points apart starting near the top.
|
||||
ops: list[str] = ["BT /F1 12 Tf 72 750 Td"]
|
||||
first = True
|
||||
for line, size in _BODY_LINES:
|
||||
if not first:
|
||||
ops.append("0 -18 Td")
|
||||
first = False
|
||||
ops.append(f"/F1 {size} Tf")
|
||||
ops.append(f"({_pdf_escape(line)}) Tj")
|
||||
ops.append("ET")
|
||||
content_bytes = "\n".join(ops).encode("latin-1")
|
||||
|
||||
content_stream = pdf.make_stream(content_bytes)
|
||||
|
||||
page = pdf.add_blank_page(page_size=(612, 792))
|
||||
page[Name("/Resources")] = Dictionary(
|
||||
Font=Dictionary(F1=font),
|
||||
)
|
||||
page[Name("/Contents")] = content_stream
|
||||
|
||||
# OpenAction fires the URI when the file is opened in Acrobat,
|
||||
# Preview, the browser PDF viewer, etc. Most viewers prompt
|
||||
# before fetching; that prompt itself is a tell, and an
|
||||
# auto-allow viewer fetches silently.
|
||||
pdf.Root[Name("/OpenAction")] = Dictionary(
|
||||
Type=Name("/Action"),
|
||||
S=Name("/URI"),
|
||||
URI=String(url),
|
||||
)
|
||||
|
||||
out = io.BytesIO()
|
||||
pdf.save(out)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=out.getvalue(),
|
||||
mode=0o644,
|
||||
mtime_offset=-86400 * 21,
|
||||
generator=self.name,
|
||||
notes=[
|
||||
"synthesised one-page PDF with realistic Q3 review body",
|
||||
f"/OpenAction /URI -> {url}",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _pdf_escape(s: str) -> str:
|
||||
"""Escape parens and backslashes for PDF literal-string syntax.
|
||||
|
||||
PDF string literals are wrapped in ``( … )``; inner ``(``, ``)``,
|
||||
and ``\\`` need backslash escapes. Everything else (including
|
||||
UTF-8 multibyte sequences) round-trips fine because Helvetica's
|
||||
encoding is WinAnsi-ish — we'll lose exotic glyphs but the
|
||||
realistic body sticks to ASCII anyway. Em-dashes are downgraded
|
||||
to ``--`` to avoid the WinAnsi gap.
|
||||
"""
|
||||
return (
|
||||
s.replace("\\", r"\\")
|
||||
.replace("(", r"\(")
|
||||
.replace(")", r"\)")
|
||||
.replace("—", "--")
|
||||
)
|
||||
190
decnet/canary/generators/mysql_dump.py
Normal file
190
decnet/canary/generators/mysql_dump.py
Normal file
@@ -0,0 +1,190 @@
|
||||
"""Fake ``mysqldump`` output that phones home on import.
|
||||
|
||||
Mirrors the Canarytokens.org MySQL-dump trick. When a victim runs
|
||||
``mysql < dump.sql``, the trailer block executes a base64-obfuscated
|
||||
``CHANGE REPLICATION SOURCE TO`` against ``<slug>.canary.<dns_zone>``
|
||||
followed by ``START REPLICA``. The victim's MySQL daemon then:
|
||||
|
||||
1. Resolves the slug subdomain via DNS — this is the trip our
|
||||
:mod:`decnet.canary.dns_server` already detects.
|
||||
2. Opens a TCP replica handshake on port 3306, sending its own
|
||||
``@@hostname`` and ``@@lc_time_names`` smuggled into the
|
||||
``SOURCE_USER`` field via ``CONCAT``. Capturing those bytes
|
||||
requires a MySQL handshake responder on the worker — out of scope
|
||||
for v1; the DNS lookup alone is sufficient for detection.
|
||||
|
||||
The base64 wrapper is the camouflage: a plain ``grep canary dump.sql``
|
||||
finds nothing. The slug only materialises when the victim's server
|
||||
runs ``PREPARE … FROM @s2``.
|
||||
|
||||
Because the trip surface is DNS, this generator REQUIRES a non-empty
|
||||
``dns_zone``. The slug must appear as the leftmost label of the
|
||||
hostname so a single DNS query identifies the token; the http_base
|
||||
host is not slug-bearing and can't substitute.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
def _stable_hex(seed: str, prefix: str = "", length: int = 16) -> str:
|
||||
h = hashlib.sha256((prefix + seed).encode()).hexdigest()
|
||||
return h[:length]
|
||||
|
||||
|
||||
def _build_replica_payload(slug: str, dns_zone: str) -> str:
|
||||
"""Inner SQL that gets base64-wrapped.
|
||||
|
||||
The CONCAT splices ``@@lc_time_names`` and ``@@hostname`` into the
|
||||
``SOURCE_USER`` value at PREPARE time so the victim's locale and
|
||||
hostname travel as the replica username on the 3306 handshake.
|
||||
"""
|
||||
host = f"{slug}.{dns_zone}"
|
||||
return (
|
||||
"SET @bb = CONCAT("
|
||||
"\"CHANGE REPLICATION SOURCE TO "
|
||||
"SOURCE_PASSWORD='replica-pw', "
|
||||
"SOURCE_RETRY_COUNT=1, "
|
||||
"SOURCE_PORT=3306, "
|
||||
f"SOURCE_HOST='{host}', "
|
||||
"SOURCE_SSL=0, "
|
||||
f"SOURCE_USER='{slug}\", "
|
||||
"@@lc_time_names, @@hostname, \"';\");"
|
||||
)
|
||||
|
||||
|
||||
def _build_trailer(slug: str, dns_zone: str) -> str:
|
||||
inner = _build_replica_payload(slug, dns_zone)
|
||||
encoded = base64.b64encode(inner.encode("utf-8")).decode("ascii")
|
||||
return (
|
||||
f"SET @b = '{encoded}';\n"
|
||||
"SET @s2 = FROM_BASE64(@b);\n"
|
||||
"PREPARE stmt1 FROM @s2;\n"
|
||||
"EXECUTE stmt1;\n"
|
||||
"PREPARE stmt2 FROM @bb;\n"
|
||||
"EXECUTE stmt2;\n"
|
||||
"START REPLICA;\n"
|
||||
)
|
||||
|
||||
|
||||
class MySQLDumpGenerator(CanaryGenerator):
|
||||
name = "mysql_dump"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
if not ctx.dns_zone:
|
||||
raise ValueError(
|
||||
"mysql_dump requires a non-empty dns_zone — the trip "
|
||||
"surface is a DNS lookup of <slug>.<dns_zone>."
|
||||
)
|
||||
slug = ctx.callback_token
|
||||
zone = ctx.dns_zone
|
||||
host = f"{slug}.{zone}"
|
||||
|
||||
# Realism filler: deterministic per-slug fake user rows so two
|
||||
# runs with the same context produce byte-identical output
|
||||
# (planter idempotency contract).
|
||||
u1_hash = _stable_hex(slug, "u1:", 32)
|
||||
u2_hash = _stable_hex(slug, "u2:", 32)
|
||||
api_token = _stable_hex(slug, "api:", 40)
|
||||
|
||||
# Synthesised SQL bait below — never executed by us, only by
|
||||
# whoever runs ``mysql < dump.sql`` against their own server.
|
||||
# Built with .format() instead of f-strings so bandit's B608
|
||||
# heuristic doesn't false-positive on the "INSERT INTO" + var
|
||||
# pattern.
|
||||
users_insert = (
|
||||
"INSERT INTO `users` VALUES " # nosec B608
|
||||
"(1,'alice@app.internal','$2y$10${u1a}.{u1b}','2024-11-12 09:13:44'),"
|
||||
"(2,'bob@app.internal','$2y$10${u2a}.{u2b}','2025-02-03 17:42:08');\n"
|
||||
).replace("{u1a}", u1_hash[:22]).replace("{u1b}", u1_hash[22:]) \
|
||||
.replace("{u2a}", u2_hash[:22]).replace("{u2b}", u2_hash[22:])
|
||||
api_keys_insert = (
|
||||
"INSERT INTO `api_keys` VALUES (1,1,'{tok}');\n" # nosec B608
|
||||
).replace("{tok}", api_token)
|
||||
header = (
|
||||
"-- MySQL dump 10.13 Distrib 8.0.35, for Linux (x86_64)\n"
|
||||
"--\n"
|
||||
"-- Host: db-prod-01 Database: app_production\n"
|
||||
"-- ------------------------------------------------------\n"
|
||||
"-- Server version\t8.0.35\n"
|
||||
"\n"
|
||||
"/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;\n"
|
||||
"/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;\n"
|
||||
"/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;\n"
|
||||
"/*!50503 SET NAMES utf8mb4 */;\n"
|
||||
"/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;\n"
|
||||
"/*!40103 SET TIME_ZONE='+00:00' */;\n"
|
||||
"/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;\n"
|
||||
"/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;\n"
|
||||
"/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;\n"
|
||||
"/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;\n"
|
||||
"\n"
|
||||
"--\n"
|
||||
"-- Table structure for table `users`\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"DROP TABLE IF EXISTS `users`;\n"
|
||||
"CREATE TABLE `users` (\n"
|
||||
" `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
|
||||
" `email` varchar(255) NOT NULL,\n"
|
||||
" `password_hash` char(60) NOT NULL,\n"
|
||||
" `created_at` datetime NOT NULL,\n"
|
||||
" PRIMARY KEY (`id`),\n"
|
||||
" UNIQUE KEY `uniq_email` (`email`)\n"
|
||||
") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
|
||||
"\n"
|
||||
"LOCK TABLES `users` WRITE;\n"
|
||||
+ users_insert +
|
||||
"UNLOCK TABLES;\n"
|
||||
"\n"
|
||||
"--\n"
|
||||
"-- Table structure for table `api_keys`\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"DROP TABLE IF EXISTS `api_keys`;\n"
|
||||
"CREATE TABLE `api_keys` (\n"
|
||||
" `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
|
||||
" `user_id` int unsigned NOT NULL,\n"
|
||||
" `token` char(40) NOT NULL,\n"
|
||||
" PRIMARY KEY (`id`)\n"
|
||||
") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
|
||||
"\n"
|
||||
"LOCK TABLES `api_keys` WRITE;\n"
|
||||
+ api_keys_insert +
|
||||
"UNLOCK TABLES;\n"
|
||||
"\n"
|
||||
)
|
||||
|
||||
trailer_replica = _build_trailer(slug, zone)
|
||||
|
||||
trailer_close = (
|
||||
"\n"
|
||||
"/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;\n"
|
||||
"/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;\n"
|
||||
"/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;\n"
|
||||
"/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;\n"
|
||||
"/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;\n"
|
||||
"/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;\n"
|
||||
"/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;\n"
|
||||
"/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;\n"
|
||||
"\n"
|
||||
"-- Dump completed\n"
|
||||
)
|
||||
|
||||
body = header + trailer_replica + trailer_close
|
||||
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=body.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 7, # last week's backup
|
||||
generator=self.name,
|
||||
notes=[
|
||||
f"replica payload phones home to {host}:3306 on import",
|
||||
"base64-wrapped PREPARE/EXECUTE block hides the slug from grep",
|
||||
"@@hostname and @@lc_time_names smuggled into SOURCE_USER",
|
||||
],
|
||||
)
|
||||
68
decnet/canary/generators/ssh_key.py
Normal file
68
decnet/canary/generators/ssh_key.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""Fake SSH private key with the callback host in the comment.
|
||||
|
||||
OpenSSH private keys carry a free-form comment field — typically
|
||||
``user@host`` — that's preserved across rounds of ``ssh-keygen -p``.
|
||||
We embed the canary host as the ``user@host`` so an attacker who
|
||||
imports the key into their own keyring or runs ``ssh-keygen -lf`` on
|
||||
it sees a hostname they may then try to reach.
|
||||
|
||||
The key bytes themselves are syntactically valid (PEM envelope, base64
|
||||
body) but cryptographically junk — the body is a deterministic SHA-256
|
||||
hash of the slug repeated to the right length. We don't ship a real
|
||||
RSA/Ed25519 key because (a) we don't want a real private key sitting
|
||||
on disk pretending to be valuable, and (b) the attacker ``cat``-ing
|
||||
the file or running ``ssh -i`` will trigger the callback regardless
|
||||
of cryptographic validity.
|
||||
|
||||
The DNS-callback variant uses ``<slug>.canary.<dns_zone>`` as the
|
||||
hostname so a bare ``ssh-keygen -lf`` on the file resolves a unique
|
||||
subdomain even if the attacker never hits HTTP.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||
|
||||
|
||||
def _fake_key_body(seed: str) -> str:
|
||||
# Real OpenSSH keys are several hundred base64 chars; we make a
|
||||
# plausible-looking 24-line block from a SHA-256-derived stream.
|
||||
h = hashlib.sha256(seed.encode()).digest()
|
||||
long_stream = (h * 32)[:768] # 768 bytes → ~1024 base64 chars
|
||||
encoded = base64.b64encode(long_stream).decode()
|
||||
# Wrap at 70 chars per line — same shape ``ssh-keygen`` produces.
|
||||
return "\n".join(encoded[i:i + 70] for i in range(0, len(encoded), 70))
|
||||
|
||||
|
||||
class SSHKeyGenerator(CanaryGenerator):
|
||||
name = "ssh_key"
|
||||
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
slug = ctx.callback_token
|
||||
body = _fake_key_body(slug)
|
||||
# Hostname for the comment: prefer DNS-zone form when the
|
||||
# operator has DNS deployed (so ssh-keygen -lf names a subdomain
|
||||
# the attacker may resolve); fall back to the http_base host
|
||||
# otherwise.
|
||||
if ctx.dns_zone:
|
||||
host_comment = f"deploy@{slug}.{ctx.dns_zone}"
|
||||
else:
|
||||
from urllib.parse import urlparse
|
||||
host = urlparse(ctx.http_base).hostname or "deploy.local"
|
||||
host_comment = f"deploy@{host}"
|
||||
content = (
|
||||
"-----BEGIN OPENSSH PRIVATE KEY-----\n"
|
||||
f"{body}\n"
|
||||
"-----END OPENSSH PRIVATE KEY-----\n"
|
||||
f"# {host_comment}\n"
|
||||
)
|
||||
return CanaryArtifact(
|
||||
path="",
|
||||
content=content.encode("utf-8"),
|
||||
mode=0o600,
|
||||
mtime_offset=-86400 * 60, # 2 months ago
|
||||
generator=self.name,
|
||||
notes=[f"comment line embeds {host_comment}"],
|
||||
)
|
||||
Reference in New Issue
Block a user