Mirrors the decnet.intel layout (base + factory + lazy concrete imports). Defines: - CanaryArtifact / CanaryContext dataclasses + the generator and instrumenter ABCs they share - factory dispatch for generators (git_config/env_file/ssh_key/ aws_creds/honeydoc) and instrumenters (docx/xlsx/pdf/html/image/ plain/passthrough), plus pick_instrumenter_for_mime() for MIME-driven dispatch on operator uploads - persona-aware default placement paths (Linux vs. Windows-shaped) and absolute-path validation that the API will use to validate operator-supplied placement_path values - on-disk blob store: sha256-keyed two-level fan-out, idempotent writes, refcount-aware unlink (the DB row is the source of truth) Also covers prior commits' tests (bus topics, models, repo CRUD) under tests/canary/. 79 tests, all pass.
146 lines
5.4 KiB
Python
146 lines
5.4 KiB
Python
"""Canary generator / instrumenter ABCs and the artifact dataclass.
|
||
|
||
Two flavors of producer share the same return shape:
|
||
|
||
* :class:`CanaryGenerator` synthesises a fake artifact from scratch
|
||
(e.g. a plausible ``~/.aws/credentials`` block, a ``.git/config``
|
||
pointing at an attacker-bait remote URL). Operators don't supply
|
||
any input.
|
||
|
||
* :class:`CanaryInstrumenter` mutates an operator-uploaded blob to
|
||
embed the callback (HTTP slug + DNS host). The original blob bytes
|
||
are passed in; the instrumenter returns the mutated version.
|
||
|
||
Both return a :class:`CanaryArtifact` — the planter doesn't care
|
||
which path produced it. Same dataclass keeps the planter's
|
||
docker-exec injector trivial.
|
||
|
||
ABCs intentionally do not include I/O — generators and instrumenters
|
||
are pure functions of (slug, host, blob?). All filesystem work
|
||
happens in :mod:`decnet.canary.planter` and :mod:`decnet.canary.storage`.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
from abc import ABC, abstractmethod
|
||
from dataclasses import dataclass, field
|
||
from typing import Optional
|
||
|
||
|
||
@dataclass
|
||
class CanaryContext:
|
||
"""Inputs every generator/instrumenter needs to embed a working callback.
|
||
|
||
``callback_token`` is the unique slug; it appears verbatim in HTTP
|
||
URLs (``https://<host>/c/<callback_token>``) and as the leftmost
|
||
DNS label (``<callback_token>.canary.<dns_zone>``) so a single
|
||
slug resolves to a single :class:`CanaryToken` row regardless of
|
||
which path the attacker tripped.
|
||
|
||
``http_base`` and ``dns_zone`` come from the canary worker's
|
||
public-facing config (``DECNET_CANARY_HTTP_BASE``,
|
||
``DECNET_CANARY_DNS_ZONE``). When DNS isn't deployed,
|
||
``dns_zone`` is empty and instrumenters that only have a DNS
|
||
surface (e.g. an artifact whose only realistic embed point is a
|
||
hostname) raise.
|
||
"""
|
||
|
||
callback_token: str
|
||
http_base: str # e.g. "https://canary.example.test" — no trailing slash
|
||
dns_zone: str = "" # e.g. "canary.example.test"; "" disables DNS embeds
|
||
persona: str = "linux" # "linux" | "windows" — drives default username, path style
|
||
|
||
|
||
@dataclass
|
||
class CanaryArtifact:
|
||
"""Bytes-and-placement bundle produced by a generator/instrumenter."""
|
||
|
||
path: str
|
||
"""Absolute path inside the target container."""
|
||
|
||
content: bytes
|
||
"""Final bytes that hit the decky filesystem.
|
||
|
||
Always raw bytes — the planter base64-encodes for the wire so
|
||
binary blobs (DOCX/PNG/PDF) survive ``docker exec sh -c`` safely.
|
||
"""
|
||
|
||
mode: int = 0o600
|
||
"""Unix file mode. Defaults to ``0600`` because most realistic
|
||
canary placements (``~/.aws/credentials``, ``.env``, ``id_rsa``)
|
||
are operator-only. Honeydocs in user docs folders should pass
|
||
``0o644``.
|
||
"""
|
||
|
||
mtime_offset: int = 0
|
||
"""Seconds relative to *now* for the planted file's mtime.
|
||
|
||
Negative values backdate the file so it doesn't look like it
|
||
appeared the moment the decky was deployed. ``-86400 * 90`` (90
|
||
days ago) is a common choice for ``honeydoc`` artifacts; ``0``
|
||
means "stamp it now," which is fine for ``aws_creds``-like files
|
||
that would plausibly be touched recently.
|
||
"""
|
||
|
||
instrumenter: Optional[str] = None
|
||
"""Identifier of the instrumenter that produced this artifact (for
|
||
upload-driven tokens). Mirrored into ``CanaryToken.instrumenter``.
|
||
Mutually exclusive with :attr:`generator`.
|
||
"""
|
||
|
||
generator: Optional[str] = None
|
||
"""Identifier of the generator that produced this artifact (for
|
||
synthesised tokens). Mirrored into ``CanaryToken.generator``.
|
||
Mutually exclusive with :attr:`instrumenter`.
|
||
"""
|
||
|
||
notes: list[str] = field(default_factory=list)
|
||
"""Human-readable notes about the embedding (e.g. "DOCX: injected
|
||
1×1 remote image at relsId rId99"). Surfaced in the API
|
||
``preview`` response so the operator sees what we did before
|
||
planting. Never leaked to the attacker-facing surface.
|
||
"""
|
||
|
||
|
||
class CanaryGenerator(ABC):
|
||
"""Produces a fake artifact from scratch."""
|
||
|
||
name: str #: short tag — matches ``CanaryToken.generator``
|
||
|
||
@abstractmethod
|
||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||
"""Synthesise the artifact.
|
||
|
||
MUST NOT do I/O. MUST be deterministic for the same
|
||
``(callback_token, http_base, dns_zone, persona)`` so re-seeding
|
||
from :attr:`CanaryToken.secret_seed` produces byte-identical
|
||
output and the planter is naturally idempotent.
|
||
"""
|
||
|
||
|
||
class CanaryInstrumenter(ABC):
|
||
"""Mutates an operator-uploaded blob to embed a callback."""
|
||
|
||
name: str #: short tag — matches ``CanaryToken.instrumenter``
|
||
|
||
#: MIME prefixes this instrumenter handles. The factory uses these
|
||
#: to dispatch by sniffed content-type. Sub-string match against
|
||
#: the prefix list (e.g. ``("application/pdf",)`` or
|
||
#: ``("text/",)``).
|
||
mime_prefixes: tuple[str, ...] = ()
|
||
|
||
@abstractmethod
|
||
def instrument(
|
||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||
) -> CanaryArtifact:
|
||
"""Return the mutated bytes with the callback embedded.
|
||
|
||
MUST raise :class:`InstrumenterRejectedError` when the blob
|
||
can't be safely mutated (corrupt zip, encrypted PDF, etc.) so
|
||
the API can surface a 400 with the specific reason rather than
|
||
silently shipping the original bytes.
|
||
"""
|
||
|
||
|
||
class InstrumenterRejectedError(ValueError):
|
||
"""Raised when an instrumenter can't safely mutate the input."""
|