feat(canary): package scaffolding (base/factory/paths/storage) + tests
Mirrors the decnet.intel layout (base + factory + lazy concrete imports). Defines: - CanaryArtifact / CanaryContext dataclasses + the generator and instrumenter ABCs they share - factory dispatch for generators (git_config/env_file/ssh_key/ aws_creds/honeydoc) and instrumenters (docx/xlsx/pdf/html/image/ plain/passthrough), plus pick_instrumenter_for_mime() for MIME-driven dispatch on operator uploads - persona-aware default placement paths (Linux vs. Windows-shaped) and absolute-path validation that the API will use to validate operator-supplied placement_path values - on-disk blob store: sha256-keyed two-level fan-out, idempotent writes, refcount-aware unlink (the DB row is the source of truth) Also covers prior commits' tests (bus topics, models, repo CRUD) under tests/canary/. 79 tests, all pass.
This commit is contained in:
37
decnet/canary/__init__.py
Normal file
37
decnet/canary/__init__.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""Canary tokens — decoy artifacts planted in decky filesystems.
|
||||
|
||||
Public surface is exported here so callers can ``from decnet.canary
|
||||
import CanaryArtifact, get_generator, get_instrumenter`` without
|
||||
knowing the submodule layout. Concrete generators / instrumenters
|
||||
live under :mod:`decnet.canary.generators` and
|
||||
:mod:`decnet.canary.instrumenters` respectively; the factory keeps
|
||||
import-time cost down by deferring those imports until first use
|
||||
(same pattern as :mod:`decnet.intel.factory`).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.canary.base import (
|
||||
CanaryArtifact,
|
||||
CanaryContext,
|
||||
CanaryGenerator,
|
||||
CanaryInstrumenter,
|
||||
)
|
||||
from decnet.canary.factory import (
|
||||
KNOWN_GENERATORS,
|
||||
KNOWN_INSTRUMENTERS,
|
||||
get_generator,
|
||||
get_instrumenter,
|
||||
pick_instrumenter_for_mime,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"CanaryArtifact",
|
||||
"CanaryContext",
|
||||
"CanaryGenerator",
|
||||
"CanaryInstrumenter",
|
||||
"KNOWN_GENERATORS",
|
||||
"KNOWN_INSTRUMENTERS",
|
||||
"get_generator",
|
||||
"get_instrumenter",
|
||||
"pick_instrumenter_for_mime",
|
||||
]
|
||||
145
decnet/canary/base.py
Normal file
145
decnet/canary/base.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""Canary generator / instrumenter ABCs and the artifact dataclass.
|
||||
|
||||
Two flavors of producer share the same return shape:
|
||||
|
||||
* :class:`CanaryGenerator` synthesises a fake artifact from scratch
|
||||
(e.g. a plausible ``~/.aws/credentials`` block, a ``.git/config``
|
||||
pointing at an attacker-bait remote URL). Operators don't supply
|
||||
any input.
|
||||
|
||||
* :class:`CanaryInstrumenter` mutates an operator-uploaded blob to
|
||||
embed the callback (HTTP slug + DNS host). The original blob bytes
|
||||
are passed in; the instrumenter returns the mutated version.
|
||||
|
||||
Both return a :class:`CanaryArtifact` — the planter doesn't care
|
||||
which path produced it. Same dataclass keeps the planter's
|
||||
docker-exec injector trivial.
|
||||
|
||||
ABCs intentionally do not include I/O — generators and instrumenters
|
||||
are pure functions of (slug, host, blob?). All filesystem work
|
||||
happens in :mod:`decnet.canary.planter` and :mod:`decnet.canary.storage`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class CanaryContext:
|
||||
"""Inputs every generator/instrumenter needs to embed a working callback.
|
||||
|
||||
``callback_token`` is the unique slug; it appears verbatim in HTTP
|
||||
URLs (``https://<host>/c/<callback_token>``) and as the leftmost
|
||||
DNS label (``<callback_token>.canary.<dns_zone>``) so a single
|
||||
slug resolves to a single :class:`CanaryToken` row regardless of
|
||||
which path the attacker tripped.
|
||||
|
||||
``http_base`` and ``dns_zone`` come from the canary worker's
|
||||
public-facing config (``DECNET_CANARY_HTTP_BASE``,
|
||||
``DECNET_CANARY_DNS_ZONE``). When DNS isn't deployed,
|
||||
``dns_zone`` is empty and instrumenters that only have a DNS
|
||||
surface (e.g. an artifact whose only realistic embed point is a
|
||||
hostname) raise.
|
||||
"""
|
||||
|
||||
callback_token: str
|
||||
http_base: str # e.g. "https://canary.example.test" — no trailing slash
|
||||
dns_zone: str = "" # e.g. "canary.example.test"; "" disables DNS embeds
|
||||
persona: str = "linux" # "linux" | "windows" — drives default username, path style
|
||||
|
||||
|
||||
@dataclass
|
||||
class CanaryArtifact:
|
||||
"""Bytes-and-placement bundle produced by a generator/instrumenter."""
|
||||
|
||||
path: str
|
||||
"""Absolute path inside the target container."""
|
||||
|
||||
content: bytes
|
||||
"""Final bytes that hit the decky filesystem.
|
||||
|
||||
Always raw bytes — the planter base64-encodes for the wire so
|
||||
binary blobs (DOCX/PNG/PDF) survive ``docker exec sh -c`` safely.
|
||||
"""
|
||||
|
||||
mode: int = 0o600
|
||||
"""Unix file mode. Defaults to ``0600`` because most realistic
|
||||
canary placements (``~/.aws/credentials``, ``.env``, ``id_rsa``)
|
||||
are operator-only. Honeydocs in user docs folders should pass
|
||||
``0o644``.
|
||||
"""
|
||||
|
||||
mtime_offset: int = 0
|
||||
"""Seconds relative to *now* for the planted file's mtime.
|
||||
|
||||
Negative values backdate the file so it doesn't look like it
|
||||
appeared the moment the decky was deployed. ``-86400 * 90`` (90
|
||||
days ago) is a common choice for ``honeydoc`` artifacts; ``0``
|
||||
means "stamp it now," which is fine for ``aws_creds``-like files
|
||||
that would plausibly be touched recently.
|
||||
"""
|
||||
|
||||
instrumenter: Optional[str] = None
|
||||
"""Identifier of the instrumenter that produced this artifact (for
|
||||
upload-driven tokens). Mirrored into ``CanaryToken.instrumenter``.
|
||||
Mutually exclusive with :attr:`generator`.
|
||||
"""
|
||||
|
||||
generator: Optional[str] = None
|
||||
"""Identifier of the generator that produced this artifact (for
|
||||
synthesised tokens). Mirrored into ``CanaryToken.generator``.
|
||||
Mutually exclusive with :attr:`instrumenter`.
|
||||
"""
|
||||
|
||||
notes: list[str] = field(default_factory=list)
|
||||
"""Human-readable notes about the embedding (e.g. "DOCX: injected
|
||||
1×1 remote image at relsId rId99"). Surfaced in the API
|
||||
``preview`` response so the operator sees what we did before
|
||||
planting. Never leaked to the attacker-facing surface.
|
||||
"""
|
||||
|
||||
|
||||
class CanaryGenerator(ABC):
|
||||
"""Produces a fake artifact from scratch."""
|
||||
|
||||
name: str #: short tag — matches ``CanaryToken.generator``
|
||||
|
||||
@abstractmethod
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
"""Synthesise the artifact.
|
||||
|
||||
MUST NOT do I/O. MUST be deterministic for the same
|
||||
``(callback_token, http_base, dns_zone, persona)`` so re-seeding
|
||||
from :attr:`CanaryToken.secret_seed` produces byte-identical
|
||||
output and the planter is naturally idempotent.
|
||||
"""
|
||||
|
||||
|
||||
class CanaryInstrumenter(ABC):
|
||||
"""Mutates an operator-uploaded blob to embed a callback."""
|
||||
|
||||
name: str #: short tag — matches ``CanaryToken.instrumenter``
|
||||
|
||||
#: MIME prefixes this instrumenter handles. The factory uses these
|
||||
#: to dispatch by sniffed content-type. Sub-string match against
|
||||
#: the prefix list (e.g. ``("application/pdf",)`` or
|
||||
#: ``("text/",)``).
|
||||
mime_prefixes: tuple[str, ...] = ()
|
||||
|
||||
@abstractmethod
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
"""Return the mutated bytes with the callback embedded.
|
||||
|
||||
MUST raise :class:`InstrumenterRejectedError` when the blob
|
||||
can't be safely mutated (corrupt zip, encrypted PDF, etc.) so
|
||||
the API can surface a 400 with the specific reason rather than
|
||||
silently shipping the original bytes.
|
||||
"""
|
||||
|
||||
|
||||
class InstrumenterRejectedError(ValueError):
|
||||
"""Raised when an instrumenter can't safely mutate the input."""
|
||||
129
decnet/canary/factory.py
Normal file
129
decnet/canary/factory.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""Generator and instrumenter factories.
|
||||
|
||||
Same lazy-import pattern as :mod:`decnet.intel.factory` — concrete
|
||||
implementations stay un-imported until first use so importing
|
||||
:mod:`decnet.canary` from a CLI subcommand doesn't drag in
|
||||
``pikepdf`` / ``python-docx`` / ``Pillow`` for callers that only
|
||||
need the model layer.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from decnet.canary.base import CanaryGenerator, CanaryInstrumenter
|
||||
|
||||
KNOWN_GENERATORS: Tuple[str, ...] = (
|
||||
"git_config",
|
||||
"env_file",
|
||||
"ssh_key",
|
||||
"aws_creds",
|
||||
"honeydoc",
|
||||
)
|
||||
|
||||
KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
|
||||
"docx",
|
||||
"xlsx",
|
||||
"pdf",
|
||||
"html",
|
||||
"image",
|
||||
"plain",
|
||||
"passthrough",
|
||||
)
|
||||
|
||||
|
||||
def get_generator(name: str) -> CanaryGenerator:
|
||||
"""Return the generator registered under ``name``.
|
||||
|
||||
Raises :class:`ValueError` for unknown names so a typo in the API
|
||||
request surfaces as a 400 rather than silently producing nothing.
|
||||
"""
|
||||
if name == "git_config":
|
||||
from decnet.canary.generators.git_config import GitConfigGenerator
|
||||
return GitConfigGenerator()
|
||||
if name == "env_file":
|
||||
from decnet.canary.generators.env_file import EnvFileGenerator
|
||||
return EnvFileGenerator()
|
||||
if name == "ssh_key":
|
||||
from decnet.canary.generators.ssh_key import SSHKeyGenerator
|
||||
return SSHKeyGenerator()
|
||||
if name == "aws_creds":
|
||||
from decnet.canary.generators.aws_creds import AWSCredsGenerator
|
||||
return AWSCredsGenerator()
|
||||
if name == "honeydoc":
|
||||
from decnet.canary.generators.honeydoc import HoneydocGenerator
|
||||
return HoneydocGenerator()
|
||||
raise ValueError(
|
||||
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
|
||||
)
|
||||
|
||||
|
||||
def get_instrumenter(name: str) -> CanaryInstrumenter:
|
||||
"""Return the instrumenter registered under ``name``."""
|
||||
if name == "docx":
|
||||
from decnet.canary.instrumenters.docx import DocxInstrumenter
|
||||
return DocxInstrumenter()
|
||||
if name == "xlsx":
|
||||
from decnet.canary.instrumenters.xlsx import XlsxInstrumenter
|
||||
return XlsxInstrumenter()
|
||||
if name == "pdf":
|
||||
from decnet.canary.instrumenters.pdf import PdfInstrumenter
|
||||
return PdfInstrumenter()
|
||||
if name == "html":
|
||||
from decnet.canary.instrumenters.html import HtmlInstrumenter
|
||||
return HtmlInstrumenter()
|
||||
if name == "image":
|
||||
from decnet.canary.instrumenters.image import ImageInstrumenter
|
||||
return ImageInstrumenter()
|
||||
if name == "plain":
|
||||
from decnet.canary.instrumenters.plain import PlainInstrumenter
|
||||
return PlainInstrumenter()
|
||||
if name == "passthrough":
|
||||
from decnet.canary.instrumenters.passthrough import PassthroughInstrumenter
|
||||
return PassthroughInstrumenter()
|
||||
raise ValueError(
|
||||
f"Unknown canary instrumenter: {name!r}. Known: {KNOWN_INSTRUMENTERS}"
|
||||
)
|
||||
|
||||
|
||||
# MIME → instrumenter dispatch. Order matters: we walk the table
|
||||
# top-to-bottom and the first prefix match wins, so put the more
|
||||
# specific (DOCX/XLSX) before the generic (zip/octet-stream).
|
||||
_MIME_DISPATCH: tuple[tuple[str, str], ...] = (
|
||||
# Office Open XML — DOCX/XLSX share a zip structure but expose
|
||||
# different inner trees, so dispatch by MIME alias rather than
|
||||
# zip-poking.
|
||||
("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx"),
|
||||
("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx"),
|
||||
("application/pdf", "pdf"),
|
||||
("text/html", "html"),
|
||||
("application/xhtml+xml", "html"),
|
||||
("image/png", "image"),
|
||||
("image/jpeg", "image"),
|
||||
("image/gif", "image"),
|
||||
# Plaintext catch-alls — config files, .env, .ini, .yaml, .json,
|
||||
# source code. All handled by the same regex-substitution pass.
|
||||
("text/", "plain"),
|
||||
("application/json", "plain"),
|
||||
("application/x-yaml", "plain"),
|
||||
("application/yaml", "plain"),
|
||||
("application/toml", "plain"),
|
||||
)
|
||||
|
||||
|
||||
def pick_instrumenter_for_mime(content_type: str) -> str:
|
||||
"""Return the instrumenter name registered for a sniffed MIME.
|
||||
|
||||
Falls back to ``"passthrough"`` for anything we don't have an
|
||||
embedder for (binary blobs we can't mutate safely — random
|
||||
container images, archives, executables). ``passthrough`` only
|
||||
supports DNS-callback tokens (the slug ends up in the filename or
|
||||
an accompanying README), so the API surfaces that constraint to
|
||||
the operator before they pick a kind.
|
||||
"""
|
||||
if not content_type:
|
||||
return "passthrough"
|
||||
lowered = content_type.lower()
|
||||
for prefix, name in _MIME_DISPATCH:
|
||||
if lowered.startswith(prefix):
|
||||
return name
|
||||
return "passthrough"
|
||||
78
decnet/canary/paths.py
Normal file
78
decnet/canary/paths.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""Persona-aware path resolution for canary artifacts.
|
||||
|
||||
Linux-persona deckies use POSIX-shaped paths under ``/home/<user>``.
|
||||
"Windows" personas (still Linux containers under the hood — see
|
||||
:mod:`decnet.archetypes`) use Windows-shaped paths under
|
||||
``/home/<user>/AppData/...`` so an attacker browsing the filesystem
|
||||
through a planted RDP/SMB session sees the right shape.
|
||||
|
||||
The persona lookup is best-effort: callers pass the
|
||||
:attr:`decnet.archetypes.Archetype.nmap_os` value (``"linux"`` or
|
||||
``"windows"``); unknown personas fall through to ``"linux"``.
|
||||
Operators can always override by passing an explicit
|
||||
``placement_path`` when creating a token.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
DEFAULT_LINUX_USER = "admin"
|
||||
DEFAULT_WINDOWS_USER = "Administrator"
|
||||
|
||||
# Canonical placements for the synthesizer-driven baseline tokens.
|
||||
# Operators can override per-token via the API, but these are the
|
||||
# defaults the deploy-time seed uses.
|
||||
_LINUX_DEFAULTS: dict[str, str] = {
|
||||
"git_config": "/home/{user}/.git/config",
|
||||
"env_file": "/home/{user}/.env",
|
||||
"ssh_key": "/home/{user}/.ssh/id_rsa",
|
||||
"aws_creds": "/home/{user}/.aws/credentials",
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.docx",
|
||||
}
|
||||
|
||||
_WINDOWS_DEFAULTS: dict[str, str] = {
|
||||
"git_config": "/home/{user}/AppData/Local/Programs/Git/etc/gitconfig",
|
||||
"env_file": "/home/{user}/Desktop/prod.env",
|
||||
"ssh_key": "/home/{user}/.ssh/id_rsa", # OpenSSH on Windows uses the same path
|
||||
"aws_creds": "/home/{user}/.aws/credentials",
|
||||
"honeydoc": "/home/{user}/Documents/quarterly_report.docx",
|
||||
}
|
||||
|
||||
|
||||
def default_user(persona: str) -> str:
|
||||
"""Return the conventional unprivileged username for a persona."""
|
||||
return DEFAULT_WINDOWS_USER if persona == "windows" else DEFAULT_LINUX_USER
|
||||
|
||||
|
||||
def default_path_for(generator: str, persona: str = "linux") -> str:
|
||||
"""Resolve the default placement path for a synthesized token.
|
||||
|
||||
Returns an absolute container path with ``{user}`` already
|
||||
expanded. Falls back to a sane Linux default for unknown
|
||||
personas — better to plant *something* than fail the deploy hook.
|
||||
"""
|
||||
table = _WINDOWS_DEFAULTS if persona == "windows" else _LINUX_DEFAULTS
|
||||
template = table.get(generator)
|
||||
if not template:
|
||||
# Unknown generator — fall back to a generic /tmp drop so the
|
||||
# planter still has somewhere to write. The API rejects
|
||||
# unknown generators upstream, so this branch is defensive.
|
||||
return f"/tmp/{generator}.canary" # nosec B108 — placement inside attacker-facing decoy container, not host /tmp
|
||||
return template.format(user=default_user(persona))
|
||||
|
||||
|
||||
def normalize_placement(path: str) -> str:
|
||||
"""Validate and normalize an operator-supplied placement path.
|
||||
|
||||
Forbids relative paths, NUL bytes, and shell metacharacters that
|
||||
``docker exec sh -c`` can't safely round-trip. Returns the
|
||||
sanitised path unchanged when valid; raises :class:`ValueError`
|
||||
otherwise so the API can return a 400 with a clear message.
|
||||
"""
|
||||
if not path or not path.startswith("/"):
|
||||
raise ValueError("placement_path must be absolute (start with '/')")
|
||||
if "\x00" in path:
|
||||
raise ValueError("placement_path may not contain NUL")
|
||||
if "\n" in path or "\r" in path:
|
||||
raise ValueError("placement_path may not contain newlines")
|
||||
if "../" in path or path.endswith("/.."):
|
||||
raise ValueError("placement_path may not contain '..' segments")
|
||||
return path
|
||||
89
decnet/canary/storage.py
Normal file
89
decnet/canary/storage.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""Filesystem store for operator-uploaded canary blobs.
|
||||
|
||||
Blobs live under ``/var/lib/decnet/canary/blobs/<sha256>`` (override
|
||||
via ``DECNET_CANARY_BLOB_DIR``) and are deduplicated by content hash.
|
||||
The DB table :class:`decnet.web.db.models.CanaryBlob` mirrors
|
||||
metadata; the bytes are read on demand at instrumentation time, so
|
||||
the API process never holds large operator uploads in memory longer
|
||||
than the request itself.
|
||||
|
||||
Refcount-aware deletion is enforced at the DB layer (see
|
||||
:meth:`decnet.web.db.repository.BaseRepository.delete_canary_blob`);
|
||||
this module only provides write/read/unlink primitives keyed by
|
||||
sha256.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
|
||||
|
||||
def blob_dir() -> Path:
|
||||
"""Return the on-disk root for canary blobs.
|
||||
|
||||
Honors ``DECNET_CANARY_BLOB_DIR`` so tests can point at a tmp
|
||||
path. The directory is created lazily on first write.
|
||||
"""
|
||||
raw = os.environ.get("DECNET_CANARY_BLOB_DIR", "/var/lib/decnet/canary/blobs")
|
||||
return Path(raw)
|
||||
|
||||
|
||||
def _path_for(sha256: str) -> Path:
|
||||
# Two-level fan-out (``ab/cd/abcd...``) keeps any one directory
|
||||
# from accumulating thousands of entries on busy fleets. Same
|
||||
# shape as Git's loose-object store.
|
||||
if len(sha256) < 4:
|
||||
raise ValueError("sha256 must be at least 4 chars")
|
||||
root = blob_dir()
|
||||
return root / sha256[:2] / sha256[2:4] / sha256
|
||||
|
||||
|
||||
def write_blob(content: bytes) -> Tuple[str, Path, int]:
|
||||
"""Persist ``content`` under its sha256 path.
|
||||
|
||||
Idempotent: if the target file already exists with the same
|
||||
bytes, no rewrite happens. Returns ``(sha256, path,
|
||||
size_bytes)``.
|
||||
"""
|
||||
sha = hashlib.sha256(content).hexdigest()
|
||||
target = _path_for(sha)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
if not target.exists():
|
||||
# Atomic-ish: write to a temp sibling and rename. Avoids the
|
||||
# half-written-file race a concurrent reader would otherwise
|
||||
# see if we wrote in place.
|
||||
tmp = target.with_suffix(target.suffix + ".part")
|
||||
tmp.write_bytes(content)
|
||||
os.replace(tmp, target)
|
||||
return sha, target, len(content)
|
||||
|
||||
|
||||
def read_blob(sha256: str) -> bytes:
|
||||
"""Read the bytes for a stored blob.
|
||||
|
||||
Raises :class:`FileNotFoundError` when the on-disk row was unlinked
|
||||
out of band (operator pruned ``/var/lib/decnet`` by hand) — the
|
||||
caller (instrumenter dispatch) surfaces it as a 410-ish error so
|
||||
the operator can re-upload.
|
||||
"""
|
||||
return _path_for(sha256).read_bytes()
|
||||
|
||||
|
||||
def unlink_blob(sha256: str) -> bool:
|
||||
"""Delete the on-disk bytes for ``sha256``.
|
||||
|
||||
Returns True if a file was removed, False if it was already gone.
|
||||
The DB row deletion happens in
|
||||
:meth:`SQLModelRepository.delete_canary_blob`; this function is
|
||||
a best-effort companion called *after* the DB delete commits so
|
||||
a crash between them leaves a recoverable orphan, never a
|
||||
dangling DB reference.
|
||||
"""
|
||||
target = _path_for(sha256)
|
||||
try:
|
||||
target.unlink()
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
return True
|
||||
Reference in New Issue
Block a user