merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
145
decnet/canary/base.py
Normal file
145
decnet/canary/base.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""Canary generator / instrumenter ABCs and the artifact dataclass.
|
||||
|
||||
Two flavors of producer share the same return shape:
|
||||
|
||||
* :class:`CanaryGenerator` synthesises a fake artifact from scratch
|
||||
(e.g. a plausible ``~/.aws/credentials`` block, a ``.git/config``
|
||||
pointing at an attacker-bait remote URL). Operators don't supply
|
||||
any input.
|
||||
|
||||
* :class:`CanaryInstrumenter` mutates an operator-uploaded blob to
|
||||
embed the callback (HTTP slug + DNS host). The original blob bytes
|
||||
are passed in; the instrumenter returns the mutated version.
|
||||
|
||||
Both return a :class:`CanaryArtifact` — the planter doesn't care
|
||||
which path produced it. Same dataclass keeps the planter's
|
||||
docker-exec injector trivial.
|
||||
|
||||
ABCs intentionally do not include I/O — generators and instrumenters
|
||||
are pure functions of (slug, host, blob?). All filesystem work
|
||||
happens in :mod:`decnet.canary.planter` and :mod:`decnet.canary.storage`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class CanaryContext:
|
||||
"""Inputs every generator/instrumenter needs to embed a working callback.
|
||||
|
||||
``callback_token`` is the unique slug; it appears verbatim in HTTP
|
||||
URLs (``https://<host>/c/<callback_token>``) and as the leftmost
|
||||
DNS label (``<callback_token>.canary.<dns_zone>``) so a single
|
||||
slug resolves to a single :class:`CanaryToken` row regardless of
|
||||
which path the attacker tripped.
|
||||
|
||||
``http_base`` and ``dns_zone`` come from the canary worker's
|
||||
public-facing config (``DECNET_CANARY_HTTP_BASE``,
|
||||
``DECNET_CANARY_DNS_ZONE``). When DNS isn't deployed,
|
||||
``dns_zone`` is empty and instrumenters that only have a DNS
|
||||
surface (e.g. an artifact whose only realistic embed point is a
|
||||
hostname) raise.
|
||||
"""
|
||||
|
||||
callback_token: str
|
||||
http_base: str # e.g. "https://canary.example.test" — no trailing slash
|
||||
dns_zone: str = "" # e.g. "canary.example.test"; "" disables DNS embeds
|
||||
persona: str = "linux" # "linux" | "windows" — drives default username, path style
|
||||
|
||||
|
||||
@dataclass
|
||||
class CanaryArtifact:
|
||||
"""Bytes-and-placement bundle produced by a generator/instrumenter."""
|
||||
|
||||
path: str
|
||||
"""Absolute path inside the target container."""
|
||||
|
||||
content: bytes
|
||||
"""Final bytes that hit the decky filesystem.
|
||||
|
||||
Always raw bytes — the planter base64-encodes for the wire so
|
||||
binary blobs (DOCX/PNG/PDF) survive ``docker exec sh -c`` safely.
|
||||
"""
|
||||
|
||||
mode: int = 0o600
|
||||
"""Unix file mode. Defaults to ``0600`` because most realistic
|
||||
canary placements (``~/.aws/credentials``, ``.env``, ``id_rsa``)
|
||||
are operator-only. Honeydocs in user docs folders should pass
|
||||
``0o644``.
|
||||
"""
|
||||
|
||||
mtime_offset: int = 0
|
||||
"""Seconds relative to *now* for the planted file's mtime.
|
||||
|
||||
Negative values backdate the file so it doesn't look like it
|
||||
appeared the moment the decky was deployed. ``-86400 * 90`` (90
|
||||
days ago) is a common choice for ``honeydoc`` artifacts; ``0``
|
||||
means "stamp it now," which is fine for ``aws_creds``-like files
|
||||
that would plausibly be touched recently.
|
||||
"""
|
||||
|
||||
instrumenter: Optional[str] = None
|
||||
"""Identifier of the instrumenter that produced this artifact (for
|
||||
upload-driven tokens). Mirrored into ``CanaryToken.instrumenter``.
|
||||
Mutually exclusive with :attr:`generator`.
|
||||
"""
|
||||
|
||||
generator: Optional[str] = None
|
||||
"""Identifier of the generator that produced this artifact (for
|
||||
synthesised tokens). Mirrored into ``CanaryToken.generator``.
|
||||
Mutually exclusive with :attr:`instrumenter`.
|
||||
"""
|
||||
|
||||
notes: list[str] = field(default_factory=list)
|
||||
"""Human-readable notes about the embedding (e.g. "DOCX: injected
|
||||
1×1 remote image at relsId rId99"). Surfaced in the API
|
||||
``preview`` response so the operator sees what we did before
|
||||
planting. Never leaked to the attacker-facing surface.
|
||||
"""
|
||||
|
||||
|
||||
class CanaryGenerator(ABC):
|
||||
"""Produces a fake artifact from scratch."""
|
||||
|
||||
name: str #: short tag — matches ``CanaryToken.generator``
|
||||
|
||||
@abstractmethod
|
||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||
"""Synthesise the artifact.
|
||||
|
||||
MUST NOT do I/O. MUST be deterministic for the same
|
||||
``(callback_token, http_base, dns_zone, persona)`` so re-seeding
|
||||
from :attr:`CanaryToken.secret_seed` produces byte-identical
|
||||
output and the planter is naturally idempotent.
|
||||
"""
|
||||
|
||||
|
||||
class CanaryInstrumenter(ABC):
|
||||
"""Mutates an operator-uploaded blob to embed a callback."""
|
||||
|
||||
name: str #: short tag — matches ``CanaryToken.instrumenter``
|
||||
|
||||
#: MIME prefixes this instrumenter handles. The factory uses these
|
||||
#: to dispatch by sniffed content-type. Sub-string match against
|
||||
#: the prefix list (e.g. ``("application/pdf",)`` or
|
||||
#: ``("text/",)``).
|
||||
mime_prefixes: tuple[str, ...] = ()
|
||||
|
||||
@abstractmethod
|
||||
def instrument(
|
||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||
) -> CanaryArtifact:
|
||||
"""Return the mutated bytes with the callback embedded.
|
||||
|
||||
MUST raise :class:`InstrumenterRejectedError` when the blob
|
||||
can't be safely mutated (corrupt zip, encrypted PDF, etc.) so
|
||||
the API can surface a 400 with the specific reason rather than
|
||||
silently shipping the original bytes.
|
||||
"""
|
||||
|
||||
|
||||
class InstrumenterRejectedError(ValueError):
|
||||
"""Raised when an instrumenter can't safely mutate the input."""
|
||||
Reference in New Issue
Block a user